diff --git a/BUILDING.txt b/BUILDING.txt
index c34946aa993b7..9bbb6dbf891a9 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -8,10 +8,10 @@ Requirements:
* Maven 3.3 or later
* Boost 1.72 (if compiling native code)
* Protocol Buffers 3.7.1 (if compiling native code)
-* CMake 3.1 or newer (if compiling native code)
+* CMake 3.19 or newer (if compiling native code)
* Zlib devel (if compiling native code)
* Cyrus SASL devel (if compiling native code)
-* One of the compilers that support thread_local storage: GCC 4.8.1 or later, Visual Studio,
+* One of the compilers that support thread_local storage: GCC 9.3.0 or later, Visual Studio,
Clang (community version), Clang (version for iOS 9 and later) (if compiling native code)
* openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
* Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
@@ -63,22 +63,30 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop:
$ sudo apt-get -y install maven
* Native libraries
$ sudo apt-get -y install build-essential autoconf automake libtool cmake zlib1g-dev pkg-config libssl-dev libsasl2-dev
+* GCC 9.3.0
+ $ sudo apt-get -y install software-properties-common
+ $ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+ $ sudo apt-get update
+ $ sudo apt-get -y install g++-9 gcc-9
+ $ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9
+* CMake 3.19
+ $ curl -L https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz > cmake-3.19.0.tar.gz
+ $ tar -zxvf cmake-3.19.0.tar.gz && cd cmake-3.19.0
+ $ ./bootstrap
+ $ make -j$(nproc)
+ $ sudo make install
* Protocol Buffers 3.7.1 (required to build native code)
- $ mkdir -p /opt/protobuf-3.7-src \
- && curl -L -s -S \
- https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz \
- -o /opt/protobuf-3.7.1.tar.gz \
- && tar xzf /opt/protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src \
- && cd /opt/protobuf-3.7-src \
- && ./configure\
- && make install \
- && rm -rf /opt/protobuf-3.7-src
+ $ curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz
+ $ mkdir protobuf-3.7-src
+ $ tar xzf protobuf-3.7.1.tar.gz --strip-components 1 -C protobuf-3.7-src && cd protobuf-3.7-src
+ $ ./configure
+ $ make -j$(nproc)
+ $ sudo make install
* Boost
- $ curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2 \
- && tar --bzip2 -xf boost_1_72_0.tar.bz2 \
- && cd boost_1_72_0 \
- && ./bootstrap.sh --prefix=/usr/ \
- && ./b2 --without-python install
+ $ curl -L https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download > boost_1_72_0.tar.bz2
+ $ tar --bzip2 -xf boost_1_72_0.tar.bz2 && cd boost_1_72_0
+ $ ./bootstrap.sh --prefix=/usr/
+ $ ./b2 --without-python install
Optional packages:
@@ -103,7 +111,7 @@ Maven main modules:
- hadoop-project (Parent POM for all Hadoop Maven modules. )
(All plugins & dependencies versions are defined here.)
- hadoop-project-dist (Parent POM for modules that generate distributions.)
- - hadoop-annotations (Generates the Hadoop doclet used to generated the Javadocs)
+ - hadoop-annotations (Generates the Hadoop doclet used to generate the Javadocs)
- hadoop-assemblies (Maven assemblies used by the different modules)
- hadoop-maven-plugins (Maven plugins used in project)
- hadoop-build-tools (Build tools like checkstyle, etc.)
@@ -120,7 +128,7 @@ Maven main modules:
----------------------------------------------------------------------------------
Where to run Maven from?
- It can be run from any module. The only catch is that if not run from utrunk
+ It can be run from any module. The only catch is that if not run from trunk
all modules that are not part of the build run must be installed in the local
Maven cache or available in a Maven repository.
@@ -131,7 +139,7 @@ Maven build goals:
* Compile : mvn compile [-Pnative]
* Run tests : mvn test [-Pnative] [-Pshelltest]
* Create JAR : mvn package
- * Run findbugs : mvn compile findbugs:findbugs
+ * Run spotbugs : mvn compile spotbugs:spotbugs
* Run checkstyle : mvn compile checkstyle:checkstyle
* Install JAR in M2 cache : mvn install
* Deploy JAR to Maven repo : mvn deploy
@@ -176,7 +184,6 @@ Maven build goals:
we silently build a version of libhadoop.so that cannot make use of snappy.
This option is recommended if you plan on making use of snappy and want
to get more repeatable builds.
-
* Use -Dsnappy.prefix to specify a nonstandard location for the libsnappy
header files and library files. You do not need this option if you have
installed snappy using a package manager.
@@ -453,6 +460,17 @@ Building on CentOS 8
* Install libraries provided by CentOS 8.
$ sudo dnf install libtirpc-devel zlib-devel lz4-devel bzip2-devel openssl-devel cyrus-sasl-devel libpmem-devel
+* Install GCC 9.3.0
+ $ sudo dnf -y install gcc-toolset-9-gcc gcc-toolset-9-gcc-c++
+ $ source /opt/rh/gcc-toolset-9/enable
+
+* Install CMake 3.19
+ $ curl -L https://cmake.org/files/v3.19/cmake-3.19.0.tar.gz > cmake-3.19.0.tar.gz
+ $ tar -zxvf cmake-3.19.0.tar.gz && cd cmake-3.19.0
+ $ ./bootstrap
+ $ make -j$(nproc)
+ $ sudo make install
+
* Install boost.
$ curl -L -o boost_1_72_0.tar.bz2 https://sourceforge.net/projects/boost/files/boost/1.72.0/boost_1_72_0.tar.bz2/download
$ tar xjf boost_1_72_0.tar.bz2
@@ -489,7 +507,7 @@ Requirements:
* Maven 3.0 or later
* Boost 1.72
* Protocol Buffers 3.7.1
-* CMake 3.1 or newer
+* CMake 3.19 or newer
* Visual Studio 2010 Professional or Higher
* Windows SDK 8.1 (if building CPU rate control for the container executor)
* zlib headers (if building native code bindings for zlib)
diff --git a/LICENSE-binary b/LICENSE-binary
index 4a4b953913c8f..198f97c584393 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -225,7 +225,7 @@ com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.9.9
com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.9.9
com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.9.9
com.fasterxml.uuid:java-uuid-generator:3.1.4
-com.fasterxml.woodstox:woodstox-core:5.0.3
+com.fasterxml.woodstox:woodstox-core:5.3.0
com.github.davidmoten:rxjava-extras:0.8.0.17
com.github.stephenc.jcip:jcip-annotations:1.0-1
com.google:guice:4.0
@@ -366,7 +366,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage
com.github.luben:zstd-jni:1.4.3-1
dnsjava:dnsjava:2.1.7
-org.codehaus.woodstox:stax2-api:3.1.4
+org.codehaus.woodstox:stax2-api:4.2.1
BSD 3-Clause
@@ -468,8 +468,8 @@ com.microsoft.azure:azure-cosmosdb-gateway:2.4.5
com.microsoft.azure:azure-data-lake-store-sdk:2.3.3
com.microsoft.azure:azure-keyvault-core:1.0.0
com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7
-org.bouncycastle:bcpkix-jdk15on:1.60
-org.bouncycastle:bcprov-jdk15on:1.60
+org.bouncycastle:bcpkix-jdk15on:1.68
+org.bouncycastle:bcprov-jdk15on:1.68
org.checkerframework:checker-qual:2.5.2
org.codehaus.mojo:animal-sniffer-annotations:1.17
org.jruby.jcodings:jcodings:1.0.13
diff --git a/Jenkinsfile b/dev-support/Jenkinsfile
similarity index 88%
rename from Jenkinsfile
rename to dev-support/Jenkinsfile
index 944a35b868b3a..d2266c5530bc2 100644
--- a/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -35,7 +35,7 @@ pipeline {
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
YETUS='yetus'
// Branch or tag name. Yetus release tags are 'rel/X.Y.Z'
- YETUS_VERSION='6ab19e71eaf3234863424c6f684b34c1d3dcc0ce'
+ YETUS_VERSION='11eb9b09786e401fbdeaa3be83a19a4066fd7813'
}
parameters {
@@ -118,13 +118,13 @@ pipeline {
# changing these to higher values may cause problems
# with other jobs on systemd-enabled machines
YETUS_ARGS+=("--proclimit=5500")
- YETUS_ARGS+=("--dockermemlimit=20g")
+ YETUS_ARGS+=("--dockermemlimit=22g")
- # -1 findbugs issues that show up prior to the patch being applied
- YETUS_ARGS+=("--findbugs-strict-precheck")
+ # -1 spotbugs issues that show up prior to the patch being applied
+ YETUS_ARGS+=("--spotbugs-strict-precheck")
# rsync these files back into the archive dir
- YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,findbugsXml.xml")
+ YETUS_ARGS+=("--archive-list=checkstyle-errors.xml,spotbugsXml.xml")
# URL for user-side presentation in reports and such to our artifacts
# (needs to match the archive bits below)
@@ -133,9 +133,6 @@ pipeline {
# plugins to enable
YETUS_ARGS+=("--plugins=all")
- # use Hadoop's bundled shelldocs
- YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs")
-
# don't let these tests cause -1s because we aren't really paying that
# much attention to them
YETUS_ARGS+=("--tests-filter=checkstyle")
@@ -152,9 +149,6 @@ pipeline {
# help keep the ASF boxes clean
YETUS_ARGS+=("--sentinel")
- # use emoji vote so it is easier to find the broken line
- YETUS_ARGS+=("--github-use-emoji-vote")
-
# test with Java 8 and 11
YETUS_ARGS+=("--java-home=/usr/lib/jvm/java-8-openjdk-amd64")
YETUS_ARGS+=("--multijdkdirs=/usr/lib/jvm/java-11-openjdk-amd64")
@@ -163,6 +157,10 @@ pipeline {
# custom javadoc goals
YETUS_ARGS+=("--mvn-javadoc-goals=process-sources,javadoc:javadoc-no-fork")
+ # write Yetus report as GitHub comment (YETUS-1102)
+ YETUS_ARGS+=("--github-write-comment")
+ YETUS_ARGS+=("--github-use-emoji-vote")
+
"${TESTPATCHBIN}" "${YETUS_ARGS[@]}"
'''
}
@@ -174,6 +172,19 @@ pipeline {
post {
always {
script {
+ // Publish status if it was missed (YETUS-1059)
+ withCredentials(
+ [usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
+ passwordVariable: 'GITHUB_TOKEN',
+ usernameVariable: 'GITHUB_USER')]) {
+ sh '''#!/usr/bin/env bash
+ YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
+ YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
+ TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
+ /usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true
+ '''
+ }
+
// Yetus output
archiveArtifacts "${env.PATCHDIR}/**"
// Publish the HTML report so that it can be looked at
diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py
index ad1e9cbe47ff2..3db36154ef9c5 100755
--- a/dev-support/bin/checkcompatibility.py
+++ b/dev-support/bin/checkcompatibility.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@@ -30,33 +30,16 @@
import shutil
import subprocess
import sys
-import urllib2
-try:
- import argparse
-except ImportError:
- sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.")
- sys.exit(2)
+import urllib.request
+import argparse
# Various relative paths
REPO_DIR = os.getcwd()
def check_output(*popenargs, **kwargs):
- r"""Run command with arguments and return its output as a byte string.
- Backported from Python 2.7 as it's implemented as pure python on stdlib.
- >>> check_output(['/usr/bin/python', '--version'])
- Python 2.6.2
- """
- process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
- output, _ = process.communicate()
- retcode = process.poll()
- if retcode:
- cmd = kwargs.get("args")
- if cmd is None:
- cmd = popenargs[0]
- error = subprocess.CalledProcessError(retcode, cmd)
- error.output = output
- raise error
- return output
+ """ Run command with arguments and return its output as a string. """
+ return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
+
def get_repo_dir():
""" Return the path to the top of the repo. """
@@ -139,7 +122,7 @@ def checkout_java_acc(force):
url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
scratch_dir = get_scratch_dir()
path = os.path.join(scratch_dir, os.path.basename(url))
- jacc = urllib2.urlopen(url)
+ jacc = urllib.request.urlopen(url)
with open(path, 'wb') as w:
w.write(jacc.read())
@@ -194,7 +177,7 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
with file(annotations_path, "w") as f:
for ann in annotations:
- print >>f, ann
+ print(ann, file=f)
args += ["-annotations-list", annotations_path]
subprocess.check_call(args)
@@ -264,8 +247,8 @@ def main():
parser.add_argument("--skip-build",
action="store_true",
help="Skip building the projects.")
- parser.add_argument("src_rev", nargs=1, help="Source revision.")
- parser.add_argument("dst_rev", nargs="?", default="HEAD",
+ parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
+ parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
help="Destination revision. " +
"If not specified, will use HEAD.")
diff --git a/dev-support/bin/dist-copynativelibs b/dev-support/bin/dist-copynativelibs
index 7f2b6ad1f5649..95de186e7e729 100755
--- a/dev-support/bin/dist-copynativelibs
+++ b/dev-support/bin/dist-copynativelibs
@@ -164,7 +164,7 @@ fi
# Windows doesn't have a LIB_DIR, everything goes into bin
-if [[ -d "${BIN_DIR}" ]] ; then
+if [[ -d "${BIN_DIR}" && $(ls -A "${BIN_DIR}") ]] ; then
mkdir -p "${TARGET_BIN_DIR}"
cd "${BIN_DIR}" || exit 1
${TAR} ./* | (cd "${TARGET_BIN_DIR}"/ || exit 1; ${UNTAR})
diff --git a/dev-support/bin/hadoop.sh b/dev-support/bin/hadoop.sh
index 3343014aae8bb..beebea8c97f6b 100755
--- a/dev-support/bin/hadoop.sh
+++ b/dev-support/bin/hadoop.sh
@@ -482,7 +482,7 @@ function personality_file_tests
fi
if [[ ${filename} =~ \.java$ ]]; then
- add_test findbugs
+ add_test spotbugs
fi
}
@@ -550,7 +550,7 @@ function shadedclient_rebuild
echo_and_redirect "${logfile}" \
"${MAVEN}" "${MAVEN_ARGS[@]}" verify -fae --batch-mode -am \
"${modules[@]}" \
- -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dfindbugs.skip=true
+ -Dtest=NoUnitTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true
count=$("${GREP}" -c '\[ERROR\]' "${logfile}")
if [[ ${count} -gt 0 ]]; then
diff --git a/dev-support/bin/test-patch b/dev-support/bin/test-patch
index 8ff8119b3e086..5faf472d325e8 100755
--- a/dev-support/bin/test-patch
+++ b/dev-support/bin/test-patch
@@ -15,4 +15,4 @@
# limitations under the License.
BINDIR=$(cd -P -- "$(dirname -- "${BASH_SOURCE-0}")" >/dev/null && pwd -P)
-exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dir=dev-support "$@"
+exec "${BINDIR}/yetus-wrapper" test-patch --project=hadoop --skip-dirs=dev-support "$@"
diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper
index bca2316ae6784..8532d1749701b 100755
--- a/dev-support/bin/yetus-wrapper
+++ b/dev-support/bin/yetus-wrapper
@@ -77,7 +77,7 @@ WANTED="$1"
shift
ARGV=("$@")
-HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0}
+HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0}
BIN=$(yetus_abs "${BASH_SOURCE-$0}")
BINDIR=$(dirname "${BIN}")
diff --git a/dev-support/determine-flaky-tests-hadoop.py b/dev-support/determine-flaky-tests-hadoop.py
deleted file mode 100755
index 8644299bba4a2..0000000000000
--- a/dev-support/determine-flaky-tests-hadoop.py
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Given a jenkins test job, this script examines all runs of the job done
-# within specified period of time (number of days prior to the execution
-# time of this script), and reports all failed tests.
-#
-# The output of this script includes a section for each run that has failed
-# tests, with each failed test name listed.
-#
-# More importantly, at the end, it outputs a summary section to list all failed
-# tests within all examined runs, and indicate how many runs a same test
-# failed, and sorted all failed tests by how many runs each test failed.
-#
-# This way, when we see failed tests in PreCommit build, we can quickly tell
-# whether a failed test is a new failure, or it failed before and how often it
-# failed, so to have idea whether it may just be a flaky test.
-#
-# Of course, to be 100% sure about the reason of a test failure, closer look
-# at the failed test for the specific run is necessary.
-#
-import sys
-import platform
-sysversion = sys.hexversion
-onward30 = False
-if sysversion < 0x020600F0:
- sys.exit("Minimum supported python version is 2.6, the current version is " +
- "Python" + platform.python_version())
-
-if sysversion == 0x030000F0:
- sys.exit("There is a known bug with Python" + platform.python_version() +
- ", please try a different version");
-
-if sysversion < 0x03000000:
- import urllib2
-else:
- onward30 = True
- import urllib.request
-
-import datetime
-import json as simplejson
-import logging
-from optparse import OptionParser
-import time
-
-# Configuration
-DEFAULT_JENKINS_URL = "https://builds.apache.org"
-DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
-DEFAULT_NUM_PREVIOUS_DAYS = 14
-DEFAULT_TOP_NUM_FAILED_TEST = -1
-
-SECONDS_PER_DAY = 86400
-
-# total number of runs to examine
-numRunsToExamine = 0
-
-#summary mode
-summary_mode = False
-
-#total number of errors
-error_count = 0
-
-""" Parse arguments """
-def parse_args():
- parser = OptionParser()
- parser.add_option("-J", "--jenkins-url", type="string",
- dest="jenkins_url", help="Jenkins URL",
- default=DEFAULT_JENKINS_URL)
- parser.add_option("-j", "--job-name", type="string",
- dest="job_name", help="Job name to look at",
- default=DEFAULT_JOB_NAME)
- parser.add_option("-n", "--num-days", type="int",
- dest="num_prev_days", help="Number of days to examine",
- default=DEFAULT_NUM_PREVIOUS_DAYS)
- parser.add_option("-t", "--top", type="int",
- dest="num_failed_tests",
- help="Summary Mode, only show top number of failed tests",
- default=DEFAULT_TOP_NUM_FAILED_TEST)
-
- (options, args) = parser.parse_args()
- if args:
- parser.error("unexpected arguments: " + repr(args))
- return options
-
-""" Load data from specified url """
-def load_url_data(url):
- if onward30:
- ourl = urllib.request.urlopen(url)
- codec = ourl.info().get_param('charset')
- content = ourl.read().decode(codec)
- data = simplejson.loads(content, strict=False)
- else:
- ourl = urllib2.urlopen(url)
- data = simplejson.load(ourl, strict=False)
- return data
-
-""" List all builds of the target project. """
-def list_builds(jenkins_url, job_name):
- global summary_mode
- url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
- jenkins=jenkins_url,
- job_name=job_name)
-
- try:
- data = load_url_data(url)
-
- except:
- if not summary_mode:
- logging.error("Could not fetch: %s" % url)
- error_count += 1
- raise
- return data['builds']
-
-""" Find the names of any tests which failed in the given build output URL. """
-def find_failing_tests(testReportApiJson, jobConsoleOutput):
- global summary_mode
- global error_count
- ret = set()
- try:
- data = load_url_data(testReportApiJson)
-
- except:
- if not summary_mode:
- logging.error(" Could not open testReport, check " +
- jobConsoleOutput + " for why it was reported failed")
- error_count += 1
- return ret
-
- for suite in data['suites']:
- for cs in suite['cases']:
- status = cs['status']
- errDetails = cs['errorDetails']
- if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
- ret.add(cs['className'] + "." + cs['name'])
-
- if len(ret) == 0 and (not summary_mode):
- logging.info(" No failed tests in testReport, check " +
- jobConsoleOutput + " for why it was reported failed.")
- return ret
-
-""" Iterate runs of specfied job within num_prev_days and collect results """
-def find_flaky_tests(jenkins_url, job_name, num_prev_days):
- global numRunsToExamine
- global summary_mode
- all_failing = dict()
- # First list all builds
- builds = list_builds(jenkins_url, job_name)
-
- # Select only those in the last N days
- min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
- builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]
-
- # Filter out only those that failed
- failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
- if (b['result'] in ('UNSTABLE', 'FAILURE'))]
-
- tnum = len(builds)
- num = len(failing_build_urls)
- numRunsToExamine = tnum
- if not summary_mode:
- logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
- + ") that have failed tests in the past " + str(num_prev_days) + " days"
- + ((".", ", as listed below:\n")[num > 0]))
-
- for failed_build_with_time in failing_build_urls:
- failed_build = failed_build_with_time[0];
- jobConsoleOutput = failed_build + "Console";
- testReport = failed_build + "testReport";
- testReportApiJson = testReport + "/api/json";
-
- ts = float(failed_build_with_time[1]) / 1000.
- st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
- if not summary_mode:
- logging.info("===>%s" % str(testReport) + " (" + st + ")")
- failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
- if failing:
- for ftest in failing:
- if not summary_mode:
- logging.info(" Failed test: %s" % ftest)
- all_failing[ftest] = all_failing.get(ftest,0)+1
-
- return all_failing
-
-def main():
- global numRunsToExamine
- global summary_mode
- logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
-
- # set up logger to write to stdout
- soh = logging.StreamHandler(sys.stdout)
- soh.setLevel(logging.INFO)
- logger = logging.getLogger()
- logger.removeHandler(logger.handlers[0])
- logger.addHandler(soh)
-
- opts = parse_args()
- logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
- + "/job/" + opts.job_name + "")
-
- if opts.num_failed_tests != -1:
- summary_mode = True
-
- all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
- opts.num_prev_days)
- if len(all_failing) == 0:
- raise SystemExit(0)
-
- if summary_mode and opts.num_failed_tests < len(all_failing):
- logging.info("\nAmong " + str(numRunsToExamine) +
- " runs examined, top " + str(opts.num_failed_tests) +
- " failed tests <#failedRuns: testName>:")
- else:
- logging.info("\nAmong " + str(numRunsToExamine) +
- " runs examined, all failed tests <#failedRuns: testName>:")
-
- # print summary section: all failed tests sorted by how many times they failed
- line_count = 0
- for tn in sorted(all_failing, key=all_failing.get, reverse=True):
- logging.info(" " + str(all_failing[tn])+ ": " + tn)
- if summary_mode:
- line_count += 1
- if line_count == opts.num_failed_tests:
- break
-
- if summary_mode and error_count > 0:
- logging.info("\n" + str(error_count) + " errors found, you may "
- + "re-run in non summary mode to see error details.");
-
-if __name__ == "__main__":
- main()
diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile
index 4bce9cf71d729..11f3c6f739b86 100644
--- a/dev-support/docker/Dockerfile
+++ b/dev-support/docker/Dockerfile
@@ -1,4 +1,3 @@
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -18,7 +17,7 @@
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
-FROM ubuntu:bionic
+FROM ubuntu:focal
WORKDIR /root
@@ -45,12 +44,12 @@ RUN apt-get -q update \
cmake \
curl \
doxygen \
- findbugs \
fuse \
g++ \
gcc \
git \
gnupg-agent \
+ hugo \
libbcprov-java \
libbz2-dev \
libcurl4-openssl-dev \
@@ -65,16 +64,18 @@ RUN apt-get -q update \
locales \
make \
maven \
+ nodejs \
+ node-yarn \
+ npm \
openjdk-11-jdk \
openjdk-8-jdk \
pinentry-curses \
pkg-config \
- python \
- python2.7 \
- python-pip \
- python-pkg-resources \
- python-setuptools \
- python-wheel \
+ python3 \
+ python3-pip \
+ python3-pkg-resources \
+ python3-setuptools \
+ python3-wheel \
rsync \
shellcheck \
software-properties-common \
@@ -90,10 +91,19 @@ RUN apt-get -q update \
ENV MAVEN_HOME /usr
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64
-ENV FINDBUGS_HOME /usr
#######
-# Install Boost 1.72 (1.65 ships with Bionic)
+# Install SpotBugs 4.2.2
+#######
+RUN mkdir -p /opt/spotbugs \
+ && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \
+ -o /opt/spotbugs.tgz \
+ && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \
+ && chmod +x /opt/spotbugs/bin/*
+ENV SPOTBUGS_HOME /opt/spotbugs
+
+#######
+# Install Boost 1.72 (1.71 ships with Focal)
#######
# hadolint ignore=DL3003
RUN mkdir -p /opt/boost-library \
@@ -108,7 +118,7 @@ RUN mkdir -p /opt/boost-library \
&& rm -rf /opt/boost-library
######
-# Install Google Protobuf 3.7.1 (3.0.0 ships with Bionic)
+# Install Google Protobuf 3.7.1 (3.6.1 ships with Focal)
######
# hadolint ignore=DL3003
RUN mkdir -p /opt/protobuf-src \
@@ -118,6 +128,7 @@ RUN mkdir -p /opt/protobuf-src \
&& tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \
&& cd /opt/protobuf-src \
&& ./configure --prefix=/opt/protobuf \
+ && make "-j$(nproc)" \
&& make install \
&& cd /root \
&& rm -rf /opt/protobuf-src
@@ -125,39 +136,15 @@ ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
####
-# Install pylint at fixed version (2.0.0 removed python2 support)
-# https://github.com/PyCQA/pylint/issues/2294
+# Install pylint and python-dateutil
####
-RUN pip2 install \
- astroid==1.6.6 \
- isort==4.3.21 \
- configparser==4.0.2 \
- pylint==1.9.2
+RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1
####
-# Install dateutil.parser
+# Install bower
####
-RUN pip2 install python-dateutil==2.7.3
-
-###
-# Install node.js 10.x for web UI framework (4.2.6 ships with Xenial)
-###
# hadolint ignore=DL3008
-RUN curl -L -s -S https://deb.nodesource.com/setup_10.x | bash - \
- && apt-get install -y --no-install-recommends nodejs \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/* \
- && npm install -g bower@1.8.8
-
-###
-## Install Yarn 1.12.1 for web UI framework
-####
-RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \
- && echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \
- && apt-get -q update \
- && apt-get install -y --no-install-recommends yarn=1.21.1-1 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+RUN npm install -g bower@1.8.8
###
# Install hadolint
@@ -169,6 +156,26 @@ RUN curl -L -s -S \
&& shasum -a 512 /bin/hadolint | \
awk '$1!="734e37c1f6619cbbd86b9b249e69c9af8ee1ea87a2b1ff71dccda412e9dac35e63425225a95d71572091a3f0a11e9a04c2fc25d9e91b840530c26af32b9891ca" {exit(1)}'
+######
+# Intel ISA-L 2.29.0
+######
+# hadolint ignore=DL3003,DL3008
+RUN mkdir -p /opt/isa-l-src \
+ && apt-get -q update \
+ && apt-get install -y --no-install-recommends automake yasm \
+ && apt-get clean \
+ && curl -L -s -S \
+ https://github.com/intel/isa-l/archive/v2.29.0.tar.gz \
+ -o /opt/isa-l.tar.gz \
+ && tar xzf /opt/isa-l.tar.gz --strip-components 1 -C /opt/isa-l-src \
+ && cd /opt/isa-l-src \
+ && ./autogen.sh \
+ && ./configure \
+ && make "-j$(nproc)" \
+ && make install \
+ && cd /root \
+ && rm -rf /opt/isa-l-src
+
###
# Avoid out of memory errors in builds
###
@@ -183,12 +190,6 @@ ENV HADOOP_SKIP_YETUS_VERIFICATION true
# YETUS CUT HERE
###
-# Hugo static website generator for new hadoop site
-RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-64bit.deb \
- && dpkg --install hugo.deb \
- && rm hugo.deb
-
-
# Add a welcome message and environment checks.
COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh
RUN chmod 755 /root/hadoop_env_checks.sh
diff --git a/dev-support/docker/Dockerfile_aarch64 b/dev-support/docker/Dockerfile_aarch64
index 19cfd13b5c763..362c5466a4a14 100644
--- a/dev-support/docker/Dockerfile_aarch64
+++ b/dev-support/docker/Dockerfile_aarch64
@@ -17,7 +17,7 @@
# Dockerfile for installing the necessary dependencies for building Hadoop.
# See BUILDING.txt.
-FROM ubuntu:bionic
+FROM ubuntu:focal
WORKDIR /root
@@ -48,12 +48,12 @@ RUN apt-get -q update \
cmake \
curl \
doxygen \
- findbugs \
fuse \
g++ \
gcc \
git \
gnupg-agent \
+ hugo \
libbcprov-java \
libbz2-dev \
libcurl4-openssl-dev \
@@ -68,16 +68,18 @@ RUN apt-get -q update \
locales \
make \
maven \
+ nodejs \
+ node-yarn \
+ npm \
openjdk-11-jdk \
openjdk-8-jdk \
pinentry-curses \
pkg-config \
- python \
- python2.7 \
- python-pip \
- python-pkg-resources \
- python-setuptools \
- python-wheel \
+ python3 \
+ python3-pip \
+ python3-pkg-resources \
+ python3-setuptools \
+ python3-wheel \
rsync \
shellcheck \
software-properties-common \
@@ -93,10 +95,19 @@ RUN apt-get -q update \
ENV MAVEN_HOME /usr
# JAVA_HOME must be set in Maven >= 3.5.0 (MNG-6003)
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-arm64
-ENV FINDBUGS_HOME /usr
#######
-# Install Boost 1.72 (1.65 ships with Bionic)
+# Install SpotBugs 4.2.2
+#######
+RUN mkdir -p /opt/spotbugs \
+ && curl -L -s -S https://github.com/spotbugs/spotbugs/releases/download/4.2.2/spotbugs-4.2.2.tgz \
+ -o /opt/spotbugs.tgz \
+ && tar xzf /opt/spotbugs.tgz --strip-components 1 -C /opt/spotbugs \
+ && chmod +x /opt/spotbugs/bin/*
+ENV SPOTBUGS_HOME /opt/spotbugs
+
+#######
+# Install Boost 1.72 (1.71 ships with Focal)
#######
# hadolint ignore=DL3003
RUN mkdir -p /opt/boost-library \
@@ -111,7 +122,7 @@ RUN mkdir -p /opt/boost-library \
&& rm -rf /opt/boost-library
######
-# Install Google Protobuf 3.7.1 (3.0.0 ships with Bionic)
+# Install Google Protobuf 3.7.1 (3.6.1 ships with Focal)
######
# hadolint ignore=DL3003
RUN mkdir -p /opt/protobuf-src \
@@ -121,6 +132,7 @@ RUN mkdir -p /opt/protobuf-src \
&& tar xzf /opt/protobuf.tar.gz --strip-components 1 -C /opt/protobuf-src \
&& cd /opt/protobuf-src \
&& ./configure --prefix=/opt/protobuf \
+ && make "-j$(nproc)" \
&& make install \
&& cd /root \
&& rm -rf /opt/protobuf-src
@@ -128,39 +140,15 @@ ENV PROTOBUF_HOME /opt/protobuf
ENV PATH "${PATH}:/opt/protobuf/bin"
####
-# Install pylint at fixed version (2.0.0 removed python2 support)
-# https://github.com/PyCQA/pylint/issues/2294
+# Install pylint and python-dateutil
####
-RUN pip2 install \
- astroid==1.6.6 \
- isort==4.3.21 \
- configparser==4.0.2 \
- pylint==1.9.2
+RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1
####
-# Install dateutil.parser
+# Install bower
####
-RUN pip2 install python-dateutil==2.7.3
-
-###
-# Install node.js 10.x for web UI framework (4.2.6 ships with Xenial)
-###
# hadolint ignore=DL3008
-RUN curl -L -s -S https://deb.nodesource.com/setup_10.x | bash - \
- && apt-get install -y --no-install-recommends nodejs \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/* \
- && npm install -g bower@1.8.8
-
-###
-## Install Yarn 1.12.1 for web UI framework
-####
-RUN curl -s -S https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - \
- && echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list \
- && apt-get -q update \
- && apt-get install -y --no-install-recommends yarn=1.21.1-1 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
+RUN npm install -g bower@1.8.8
###
# Install phantomjs built for aarch64
@@ -187,12 +175,6 @@ ENV HADOOP_SKIP_YETUS_VERIFICATION true
# YETUS CUT HERE
###
-# Hugo static website generator (for new hadoop site docs)
-RUN curl -L -o hugo.deb https://github.com/gohugoio/hugo/releases/download/v0.58.3/hugo_0.58.3_Linux-ARM64.deb \
- && dpkg --install hugo.deb \
- && rm hugo.deb
-
-
# Add a welcome message and environment checks.
COPY hadoop_env_checks.sh /root/hadoop_env_checks.sh
RUN chmod 755 /root/hadoop_env_checks.sh
diff --git a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
index 51f9acc4015ce..c09eb953d6396 100644
--- a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
+++ b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
@@ -121,8 +121,8 @@
+ * Upload files/parts directly via different buffering mechanisms: including
+ * memory and disk.
+ *
+ * If the stream is closed and no update has started, then the upload is
+ * instead done as a single PUT operation.
+ *
+ * Unstable: statistics and error handling might evolve.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+class OBSBlockOutputStream extends OutputStream implements Syncable {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(
+ OBSBlockOutputStream.class);
+
+ /**
+ * Owner FileSystem.
+ */
+ private final OBSFileSystem fs;
+
+ /**
+ * Key of the object being uploaded.
+ */
+ private final String key;
+
+ /**
+ * Length of object.
+ */
+ private long objectLen;
+
+ /**
+ * Size of all blocks.
+ */
+ private final int blockSize;
+
+ /**
+ * Callback for progress.
+ */
+ private final ListeningExecutorService executorService;
+
+ /**
+ * Factory for creating blocks.
+ */
+ private final OBSDataBlocks.BlockFactory blockFactory;
+
+ /**
+ * Preallocated byte buffer for writing single characters.
+ */
+ private final byte[] singleCharWrite = new byte[1];
+
+ /**
+ * Closed flag.
+ */
+ private final AtomicBoolean closed = new AtomicBoolean(false);
+
+ /**
+ * Has exception flag.
+ */
+ private final AtomicBoolean hasException = new AtomicBoolean(false);
+
+ /**
+ * Has flushed flag.
+ */
+ private final AtomicBoolean appendAble;
+
+ /**
+ * Multipart upload details; null means none started.
+ */
+ private MultiPartUpload multiPartUpload;
+
+ /**
+ * Current data block. Null means none currently active.
+ */
+ private OBSDataBlocks.DataBlock activeBlock;
+
+ /**
+ * Count of blocks uploaded.
+ */
+ private long blockCount = 0;
+
+ /**
+ * Write operation helper; encapsulation of the filesystem operations.
+ */
+ private OBSWriteOperationHelper writeOperationHelper;
+
+ /**
+ * Flag for mocking upload part error.
+ */
+ private boolean mockUploadPartError = false;
+
+ /**
+ * An OBS output stream which uploads partitions in a separate pool of
+ * threads; different {@link OBSDataBlocks.BlockFactory} instances can control
+ * where data is buffered.
+ *
+ * @param owner OBSFilesystem
+ * @param obsObjectKey OBS object to work on
+ * @param objLen object length
+ * @param execService the executor service to use to schedule work
+ * @param isAppendable if append is supported
+ * @throws IOException on any problem
+ */
+ OBSBlockOutputStream(
+ final OBSFileSystem owner,
+ final String obsObjectKey,
+ final long objLen,
+ final ExecutorService execService,
+ final boolean isAppendable)
+ throws IOException {
+ this.appendAble = new AtomicBoolean(isAppendable);
+ this.fs = owner;
+ this.key = obsObjectKey;
+ this.objectLen = objLen;
+ this.blockFactory = owner.getBlockFactory();
+ this.blockSize = (int) owner.getPartSize();
+ this.writeOperationHelper = owner.getWriteHelper();
+ Preconditions.checkArgument(
+ owner.getPartSize() >= OBSConstants.MULTIPART_MIN_SIZE,
+ "Block size is too small: %d", owner.getPartSize());
+ this.executorService = MoreExecutors.listeningDecorator(
+ execService);
+ this.multiPartUpload = null;
+ // create that first block. This guarantees that an open + close
+ // sequence writes a 0-byte entry.
+ createBlockIfNeeded();
+ LOG.debug(
+ "Initialized OBSBlockOutputStream for {}" + " output to {}",
+ owner.getWriteHelper(),
+ activeBlock);
+ }
+
+ /**
+ * Demand create a destination block.
+ *
+ * @return the active block; null if there isn't one.
+ * @throws IOException on any failure to create
+ */
+ private synchronized OBSDataBlocks.DataBlock createBlockIfNeeded()
+ throws IOException {
+ if (activeBlock == null) {
+ blockCount++;
+ if (blockCount >= OBSConstants.MAX_MULTIPART_COUNT) {
+ LOG.warn(
+ "Number of partitions in stream exceeds limit for OBS: "
+ + OBSConstants.MAX_MULTIPART_COUNT
+ + " write may fail.");
+ }
+ activeBlock = blockFactory.create(blockCount, this.blockSize);
+ }
+ return activeBlock;
+ }
+
+ /**
+ * Synchronized accessor to the active block.
+ *
+ * @return the active block; null if there isn't one.
+ */
+ synchronized OBSDataBlocks.DataBlock getActiveBlock() {
+ return activeBlock;
+ }
+
+ /**
+ * Set mock error.
+ *
+ * @param isException mock error
+ */
+ @VisibleForTesting
+ public void mockPutPartError(final boolean isException) {
+ this.mockUploadPartError = isException;
+ }
+
+ /**
+ * Predicate to query whether or not there is an active block.
+ *
+ * @return true if there is an active block.
+ */
+ private synchronized boolean hasActiveBlock() {
+ return activeBlock != null;
+ }
+
+ /**
+ * Clear the active block.
+ */
+ private synchronized void clearActiveBlock() {
+ if (activeBlock != null) {
+ LOG.debug("Clearing active block");
+ }
+ activeBlock = null;
+ }
+
+ /**
+ * Check for the filesystem being open.
+ *
+ * @throws IOException if the filesystem is closed.
+ */
+ private void checkOpen() throws IOException {
+ if (closed.get()) {
+ throw new IOException(
+ "Filesystem " + writeOperationHelper.toString(key) + " closed");
+ }
+ }
+
+ /**
+ * The flush operation does not trigger an upload; that awaits the next block
+ * being full. What it does do is call {@code flush() } on the current block,
+ * leaving it to choose how to react.
+ *
+ * @throws IOException Any IO problem.
+ */
+ @Override
+ public synchronized void flush() throws IOException {
+ checkOpen();
+ OBSDataBlocks.DataBlock dataBlock = getActiveBlock();
+ if (dataBlock != null) {
+ dataBlock.flush();
+ }
+ }
+
+ /**
+ * Writes a byte to the destination. If this causes the buffer to reach its
+ * limit, the actual upload is submitted to the threadpool.
+ *
+ * @param b the int of which the lowest byte is written
+ * @throws IOException on any problem
+ */
+ @Override
+ public synchronized void write(final int b) throws IOException {
+ singleCharWrite[0] = (byte) b;
+ write(singleCharWrite, 0, 1);
+ }
+
+ /**
+ * Writes a range of bytes from to the memory buffer. If this causes the
+ * buffer to reach its limit, the actual upload is submitted to the threadpool
+ * and the remainder of the array is written to memory (recursively).
+ *
+ * @param source byte array containing
+ * @param offset offset in array where to start
+ * @param len number of bytes to be written
+ * @throws IOException on any problem
+ */
+ @Override
+ public synchronized void write(@NotNull final byte[] source,
+ final int offset, final int len)
+ throws IOException {
+ if (hasException.get()) {
+ String closeWarning = String.format(
+ "write has error. bs : pre upload obs[%s] has error.", key);
+ LOG.warn(closeWarning);
+ throw new IOException(closeWarning);
+ }
+ OBSDataBlocks.validateWriteArgs(source, offset, len);
+ checkOpen();
+ if (len == 0) {
+ return;
+ }
+
+ OBSDataBlocks.DataBlock block = createBlockIfNeeded();
+ int written = block.write(source, offset, len);
+ int remainingCapacity = block.remainingCapacity();
+ try {
+ innerWrite(source, offset, len, written, remainingCapacity);
+ } catch (IOException e) {
+ LOG.error(
+ "Write data for key {} of bucket {} error, error message {}",
+ key, fs.getBucket(),
+ e.getMessage());
+ throw e;
+ }
+ }
+
+ private synchronized void innerWrite(final byte[] source, final int offset,
+ final int len,
+ final int written, final int remainingCapacity)
+ throws IOException {
+
+ if (written < len) {
+ // not everything was written the block has run out
+ // of capacity
+ // Trigger an upload then process the remainder.
+ LOG.debug(
+ "writing more data than block has capacity -triggering upload");
+ if (appendAble.get()) {
+ // to write a buffer then append to obs
+ LOG.debug("[Append] open stream and single write size {} "
+ + "greater than buffer size {}, append buffer to obs.",
+ len, blockSize);
+ flushCurrentBlock();
+ } else {
+ // block output stream logic, multi-part upload
+ uploadCurrentBlock();
+ }
+ // tail recursion is mildly expensive, but given buffer sizes
+ // must be MB. it's unlikely to recurse very deeply.
+ this.write(source, offset + written, len - written);
+ } else {
+ if (remainingCapacity == 0) {
+ // the whole buffer is done, trigger an upload
+ if (appendAble.get()) {
+ // to write a buffer then append to obs
+ LOG.debug("[Append] open stream and already write size "
+ + "equal to buffer size {}, append buffer to obs.",
+ blockSize);
+ flushCurrentBlock();
+ } else {
+ // block output stream logic, multi-part upload
+ uploadCurrentBlock();
+ }
+ }
+ }
+ }
+
+ /**
+ * Start an asynchronous upload of the current block.
+ *
+ * @throws IOException Problems opening the destination for upload or
+ * initializing the upload.
+ */
+ private synchronized void uploadCurrentBlock() throws IOException {
+ Preconditions.checkState(hasActiveBlock(), "No active block");
+ LOG.debug("Writing block # {}", blockCount);
+
+ try {
+ if (multiPartUpload == null) {
+ LOG.debug("Initiating Multipart upload");
+ multiPartUpload = new MultiPartUpload();
+ }
+ multiPartUpload.uploadBlockAsync(getActiveBlock());
+ } catch (IOException e) {
+ hasException.set(true);
+ LOG.error("Upload current block on ({}/{}) failed.", fs.getBucket(),
+ key, e);
+ throw e;
+ } finally {
+ // set the block to null, so the next write will create a new block.
+ clearActiveBlock();
+ }
+ }
+
+ /**
+ * Close the stream.
+ *
+ * This will not return until the upload is complete or the attempt to
+ * perform the upload has failed. Exceptions raised in this method are
+ * indicative that the write has failed and data is at risk of being lost.
+ *
+ * @throws IOException on any failure.
+ */
+ @Override
+ public synchronized void close() throws IOException {
+ if (closed.getAndSet(true)) {
+ // already closed
+ LOG.debug("Ignoring close() as stream is already closed");
+ return;
+ }
+ if (hasException.get()) {
+ String closeWarning = String.format(
+ "closed has error. bs : pre write obs[%s] has error.", key);
+ LOG.warn(closeWarning);
+ throw new IOException(closeWarning);
+ }
+ // do upload
+ completeCurrentBlock();
+
+ // clear
+ clearHFlushOrSync();
+
+ // All end of write operations, including deleting fake parent
+ // directories
+ writeOperationHelper.writeSuccessful(key);
+ }
+
+ /**
+ * If flush has take place, need to append file, else to put object.
+ *
+ * @throws IOException any problem in append or put object
+ */
+ private synchronized void putObjectIfNeedAppend() throws IOException {
+ if (appendAble.get() && fs.exists(
+ OBSCommonUtils.keyToQualifiedPath(fs, key))) {
+ appendFsFile();
+ } else {
+ putObject();
+ }
+ }
+
+ /**
+ * Append posix file.
+ *
+ * @throws IOException any problem
+ */
+ private synchronized void appendFsFile() throws IOException {
+ LOG.debug("bucket is posix, to append file. key is {}", key);
+ final OBSDataBlocks.DataBlock block = getActiveBlock();
+ WriteFileRequest writeFileReq;
+ if (block instanceof OBSDataBlocks.DiskBlock) {
+ writeFileReq = OBSCommonUtils.newAppendFileRequest(fs, key,
+ objectLen, (File) block.startUpload());
+ } else {
+ writeFileReq = OBSCommonUtils.newAppendFileRequest(fs, key,
+ objectLen, (InputStream) block.startUpload());
+ }
+ OBSCommonUtils.appendFile(fs, writeFileReq);
+ objectLen += block.dataSize();
+ }
+
+ /**
+ * Upload the current block as a single PUT request; if the buffer is empty a
+ * 0-byte PUT will be invoked, as it is needed to create an entry at the far
+ * end.
+ *
+ * @throws IOException any problem.
+ */
+ private synchronized void putObject() throws IOException {
+ LOG.debug("Executing regular upload for {}",
+ writeOperationHelper.toString(key));
+
+ final OBSDataBlocks.DataBlock block = getActiveBlock();
+ clearActiveBlock();
+ final int size = block.dataSize();
+ final PutObjectRequest putObjectRequest;
+ if (block instanceof OBSDataBlocks.DiskBlock) {
+ putObjectRequest = writeOperationHelper.newPutRequest(key,
+ (File) block.startUpload());
+
+ } else {
+ putObjectRequest =
+ writeOperationHelper.newPutRequest(key,
+ (InputStream) block.startUpload(), size);
+
+ }
+ putObjectRequest.setAcl(fs.getCannedACL());
+ fs.getSchemeStatistics().incrementWriteOps(1);
+ try {
+ // the putObject call automatically closes the input
+ // stream afterwards.
+ writeOperationHelper.putObject(putObjectRequest);
+ } finally {
+ OBSCommonUtils.closeAll(block);
+ }
+ }
+
+ @Override
+ public synchronized String toString() {
+ final StringBuilder sb = new StringBuilder("OBSBlockOutputStream{");
+ sb.append(writeOperationHelper.toString());
+ sb.append(", blockSize=").append(blockSize);
+ OBSDataBlocks.DataBlock block = activeBlock;
+ if (block != null) {
+ sb.append(", activeBlock=").append(block);
+ }
+ sb.append('}');
+ return sb.toString();
+ }
+
+ public synchronized void sync() {
+ // need to do
+ }
+
+ @Override
+ public synchronized void hflush() throws IOException {
+ // hflush hsyn same
+ flushOrSync();
+ }
+
+ /**
+ * Flush local file or multipart to obs. focus: not posix bucket is not
+ * support
+ *
+ * @throws IOException io exception
+ */
+ private synchronized void flushOrSync() throws IOException {
+
+ checkOpen();
+ if (hasException.get()) {
+ String flushWarning = String.format(
+ "flushOrSync has error. bs : pre write obs[%s] has error.",
+ key);
+ LOG.warn(flushWarning);
+ throw new IOException(flushWarning);
+ }
+ if (fs.isFsBucket()) {
+ // upload
+ flushCurrentBlock();
+
+ // clear
+ clearHFlushOrSync();
+ } else {
+ LOG.warn("not posix bucket, not support hflush or hsync.");
+ flush();
+ }
+ }
+
+ /**
+ * Clear for hflush or hsync.
+ */
+ private synchronized void clearHFlushOrSync() {
+ appendAble.set(true);
+ multiPartUpload = null;
+ }
+
+ /**
+ * Upload block to obs.
+ *
+ * @param block block
+ * @param hasBlock jungle if has block
+ * @throws IOException io exception
+ */
+ private synchronized void uploadWriteBlocks(
+ final OBSDataBlocks.DataBlock block,
+ final boolean hasBlock)
+ throws IOException {
+ if (multiPartUpload == null) {
+ if (hasBlock) {
+ // no uploads of data have taken place, put the single block
+ // up. This must happen even if there is no data, so that 0 byte
+ // files are created.
+ putObjectIfNeedAppend();
+ }
+ } else {
+ // there has already been at least one block scheduled for upload;
+ // put up the current then wait
+ if (hasBlock && block.hasData()) {
+ // send last part
+ uploadCurrentBlock();
+ }
+ // wait for the partial uploads to finish
+ final List The source of the updated property is set to the key name of the
+ * bucket property, to aid in diagnostics of where things came from.
+ *
+ * Returns a new configuration. Why the clone? You can use the same conf
+ * for different filesystems, and the original values are not updated.
+ *
+ * The {@code fs.obs.impl} property cannot be set, nor can any with the
+ * prefix {@code fs.obs.bucket}.
+ *
+ * This method does not propagate security provider path information
+ * from the OBS property into the Hadoop common provider: callers must call
+ * {@link #patchSecurityCredentialProviders(Configuration)} explicitly.
+ *
+ * @param source Source Configuration object.
+ * @param bucket bucket name. Must not be empty.
+ * @return a (potentially) patched clone of the original.
+ */
+ static Configuration propagateBucketOptions(final Configuration source,
+ final String bucket) {
+
+ Preconditions.checkArgument(StringUtils.isNotEmpty(bucket), "bucket");
+ final String bucketPrefix = OBSConstants.FS_OBS_BUCKET_PREFIX + bucket
+ + '.';
+ LOG.debug("Propagating entries under {}", bucketPrefix);
+ final Configuration dest = new Configuration(source);
+ for (Map.Entry This allows different buckets to use different credential files.
+ *
+ * @param conf configuration to patch
+ */
+ static void patchSecurityCredentialProviders(final Configuration conf) {
+ Collection Some of the strings are marked as {@code Unstable}. This means that they
+ * may be unsupported in future; at which point they will be marked as
+ * deprecated and simply ignored.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+final class OBSConstants {
+ /**
+ * Minimum multipart size which OBS supports.
+ */
+ static final int MULTIPART_MIN_SIZE = 5 * 1024 * 1024;
+
+ /**
+ * OBS access key.
+ */
+ static final String ACCESS_KEY = "fs.obs.access.key";
+
+ /**
+ * OBS secret key.
+ */
+ static final String SECRET_KEY = "fs.obs.secret.key";
+
+ /**
+ * OBS credentials provider.
+ */
+ static final String OBS_CREDENTIALS_PROVIDER
+ = "fs.obs.credentials.provider";
+
+ /**
+ * OBS client security provider.
+ */
+ static final String OBS_SECURITY_PROVIDER = "fs.obs.security.provider";
+
+ /**
+ * Extra set of security credentials which will be prepended to that set in
+ * {@code "hadoop.security.credential.provider.path"}. This extra option
+ * allows for per-bucket overrides.
+ */
+ static final String OBS_SECURITY_CREDENTIAL_PROVIDER_PATH =
+ "fs.obs.security.credential.provider.path";
+
+ /**
+ * Session token for when using TemporaryOBSCredentialsProvider.
+ */
+ static final String SESSION_TOKEN = "fs.obs.session.token";
+
+ /**
+ * Maximum number of simultaneous connections to obs.
+ */
+ static final String MAXIMUM_CONNECTIONS = "fs.obs.connection.maximum";
+
+ /**
+ * Default value of {@link #MAXIMUM_CONNECTIONS}.
+ */
+ static final int DEFAULT_MAXIMUM_CONNECTIONS = 1000;
+
+ /**
+ * Connect to obs over ssl.
+ */
+ static final String SECURE_CONNECTIONS = "fs.obs.connection.ssl.enabled";
+
+ /**
+ * Default value of {@link #SECURE_CONNECTIONS}.
+ */
+ static final boolean DEFAULT_SECURE_CONNECTIONS = false;
+
+ /**
+ * Use a custom endpoint.
+ */
+ static final String ENDPOINT = "fs.obs.endpoint";
+
+ /**
+ * Host for connecting to OBS through proxy server.
+ */
+ static final String PROXY_HOST = "fs.obs.proxy.host";
+
+ /**
+ * Port for connecting to OBS through proxy server.
+ */
+ static final String PROXY_PORT = "fs.obs.proxy.port";
+
+ /**
+ * User name for connecting to OBS through proxy server.
+ */
+ static final String PROXY_USERNAME = "fs.obs.proxy.username";
+
+ /**
+ * Password for connecting to OBS through proxy server.
+ */
+ static final String PROXY_PASSWORD = "fs.obs.proxy.password";
+
+ /**
+ * Default port for HTTPS.
+ */
+ static final int DEFAULT_HTTPS_PORT = 443;
+
+ /**
+ * Default port for HTTP.
+ */
+ static final int DEFAULT_HTTP_PORT = 80;
+
+ /**
+ * Number of times we should retry errors.
+ */
+ static final String MAX_ERROR_RETRIES = "fs.obs.attempts.maximum";
+
+ /**
+ * Default value of {@link #MAX_ERROR_RETRIES}.
+ */
+ static final int DEFAULT_MAX_ERROR_RETRIES = 3;
+
+ /**
+ * Seconds until we give up trying to establish a connection to obs.
+ */
+ static final String ESTABLISH_TIMEOUT
+ = "fs.obs.connection.establish.timeout";
+
+ /**
+ * Default value of {@link #ESTABLISH_TIMEOUT}.
+ */
+ static final int DEFAULT_ESTABLISH_TIMEOUT = 120000;
+
+ /**
+ * Seconds until we give up on a connection to obs.
+ */
+ static final String SOCKET_TIMEOUT = "fs.obs.connection.timeout";
+
+ /**
+ * Default value of {@link #SOCKET_TIMEOUT}.
+ */
+ static final int DEFAULT_SOCKET_TIMEOUT = 120000;
+
+ /**
+ * Socket send buffer to be used in OBS SDK.
+ */
+ static final String SOCKET_SEND_BUFFER = "fs.obs.socket.send.buffer";
+
+ /**
+ * Default value of {@link #SOCKET_SEND_BUFFER}.
+ */
+ static final int DEFAULT_SOCKET_SEND_BUFFER = 256 * 1024;
+
+ /**
+ * Socket receive buffer to be used in OBS SDK.
+ */
+ static final String SOCKET_RECV_BUFFER = "fs.obs.socket.recv.buffer";
+
+ /**
+ * Default value of {@link #SOCKET_RECV_BUFFER}.
+ */
+ static final int DEFAULT_SOCKET_RECV_BUFFER = 256 * 1024;
+
+ /**
+ * Number of records to get while paging through a directory listing.
+ */
+ static final String MAX_PAGING_KEYS = "fs.obs.paging.maximum";
+
+ /**
+ * Default value of {@link #MAX_PAGING_KEYS}.
+ */
+ static final int DEFAULT_MAX_PAGING_KEYS = 1000;
+
+ /**
+ * Maximum number of threads to allow in the pool used by TransferManager.
+ */
+ static final String MAX_THREADS = "fs.obs.threads.max";
+
+ /**
+ * Default value of {@link #MAX_THREADS}.
+ */
+ static final int DEFAULT_MAX_THREADS = 20;
+
+ /**
+ * Maximum number of tasks cached if all threads are already uploading.
+ */
+ static final String MAX_TOTAL_TASKS = "fs.obs.max.total.tasks";
+
+ /**
+ * Default value of {@link #MAX_TOTAL_TASKS}.
+ */
+ static final int DEFAULT_MAX_TOTAL_TASKS = 20;
+
+ /**
+ * Max number of copy threads.
+ */
+ static final String MAX_COPY_THREADS = "fs.obs.copy.threads.max";
+
+ /**
+ * Default value of {@link #MAX_COPY_THREADS}.
+ */
+ static final int DEFAULT_MAX_COPY_THREADS = 40;
+
+ /**
+ * Max number of delete threads.
+ */
+ static final String MAX_DELETE_THREADS = "fs.obs.delete.threads.max";
+
+ /**
+ * Default value of {@link #MAX_DELETE_THREADS}.
+ */
+ static final int DEFAULT_MAX_DELETE_THREADS = 20;
+
+ /**
+ * Unused option: maintained for compile-time compatibility. If set, a warning
+ * is logged in OBS during init.
+ */
+ @Deprecated
+ static final String CORE_THREADS = "fs.obs.threads.core";
+
+ /**
+ * The time that an idle thread waits before terminating.
+ */
+ static final String KEEPALIVE_TIME = "fs.obs.threads.keepalivetime";
+
+ /**
+ * Default value of {@link #KEEPALIVE_TIME}.
+ */
+ static final int DEFAULT_KEEPALIVE_TIME = 60;
+
+ /**
+ * Size of each of or multipart pieces in bytes.
+ */
+ static final String MULTIPART_SIZE = "fs.obs.multipart.size";
+
+ /**
+ * Default value of {@link #MULTIPART_SIZE}.
+ */
+ static final long DEFAULT_MULTIPART_SIZE = 104857600; // 100 MB
+
+ /**
+ * Enable multi-object delete calls.
+ */
+ static final String ENABLE_MULTI_DELETE = "fs.obs.multiobjectdelete.enable";
+
+ /**
+ * Max number of objects in one multi-object delete call. This option takes
+ * effect only when the option 'ENABLE_MULTI_DELETE' is set to 'true'.
+ */
+ static final String MULTI_DELETE_MAX_NUMBER
+ = "fs.obs.multiobjectdelete.maximum";
+
+ /**
+ * Default value of {@link #MULTI_DELETE_MAX_NUMBER}.
+ */
+ static final int DEFAULT_MULTI_DELETE_MAX_NUMBER = 1000;
+
+ /**
+ * Delete recursively or not.
+ */
+ static final String MULTI_DELETE_RECURSION
+ = "fs.obs.multiobjectdelete.recursion";
+
+ /**
+ * Minimum number of objects in one multi-object delete call.
+ */
+ static final String MULTI_DELETE_THRESHOLD
+ = "fs.obs.multiobjectdelete.threshold";
+
+ /**
+ * Default value of {@link #MULTI_DELETE_THRESHOLD}.
+ */
+ static final int MULTI_DELETE_DEFAULT_THRESHOLD = 3;
+
+ /**
+ * Comma separated list of directories.
+ */
+ static final String BUFFER_DIR = "fs.obs.buffer.dir";
+
+ /**
+ * Switch to the fast block-by-block upload mechanism.
+ */
+ static final String FAST_UPLOAD = "fs.obs.fast.upload";
+
+ /**
+ * What buffer to use. Default is {@link #FAST_UPLOAD_BUFFER_DISK} Value:
+ * {@value}
+ */
+ @InterfaceStability.Unstable
+ static final String FAST_UPLOAD_BUFFER = "fs.obs.fast.upload.buffer";
+
+ /**
+ * Buffer blocks to disk: {@value}. Capacity is limited to available disk
+ * space.
+ */
+ @InterfaceStability.Unstable
+ static final String FAST_UPLOAD_BUFFER_DISK = "disk";
+
+ /**
+ * Use an in-memory array. Fast but will run of heap rapidly: {@value}.
+ */
+ @InterfaceStability.Unstable
+ static final String FAST_UPLOAD_BUFFER_ARRAY = "array";
+
+ /**
+ * Use a byte buffer. May be more memory efficient than the {@link
+ * #FAST_UPLOAD_BUFFER_ARRAY}: {@value}.
+ */
+ @InterfaceStability.Unstable
+ static final String FAST_UPLOAD_BYTEBUFFER = "bytebuffer";
+
+ /**
+ * Maximum number of blocks a single output stream can have active (uploading,
+ * or queued to the central FileSystem instance's pool of queued operations.
+ * )This stops a single stream overloading the shared thread pool. {@value}
+ *
+ * Default is {@link #DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS}
+ */
+ @InterfaceStability.Unstable
+ static final String FAST_UPLOAD_ACTIVE_BLOCKS
+ = "fs.obs.fast.upload.active.blocks";
+
+ /**
+ * Limit of queued block upload operations before writes block. Value:
+ * {@value}
+ */
+ @InterfaceStability.Unstable
+ static final int DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS = 4;
+
+ /**
+ * Canned acl options: Private | PublicRead | PublicReadWrite |
+ * AuthenticatedRead | LogDeliveryWrite | BucketOwnerRead |
+ * BucketOwnerFullControl.
+ */
+ static final String CANNED_ACL = "fs.obs.acl.default";
+
+ /**
+ * Default value of {@link #CANNED_ACL}.
+ */
+ static final String DEFAULT_CANNED_ACL = "";
+
+ /**
+ * Should we try to purge old multipart uploads when starting up.
+ */
+ static final String PURGE_EXISTING_MULTIPART = "fs.obs.multipart.purge";
+
+ /**
+ * Default value of {@link #PURGE_EXISTING_MULTIPART}.
+ */
+ static final boolean DEFAULT_PURGE_EXISTING_MULTIPART = false;
+
+ /**
+ * Purge any multipart uploads older than this number of seconds.
+ */
+ static final String PURGE_EXISTING_MULTIPART_AGE
+ = "fs.obs.multipart.purge.age";
+
+ /**
+ * Default value of {@link #PURGE_EXISTING_MULTIPART_AGE}.
+ */
+ static final long DEFAULT_PURGE_EXISTING_MULTIPART_AGE = 86400;
+
+ /**
+ * OBS folder suffix.
+ */
+ static final String OBS_FOLDER_SUFFIX = "_$folder$";
+
+ /**
+ * Block size for
+ * {@link org.apache.hadoop.fs.FileSystem#getDefaultBlockSize()}.
+ */
+ static final String FS_OBS_BLOCK_SIZE = "fs.obs.block.size";
+
+ /**
+ * Default value of {@link #FS_OBS_BLOCK_SIZE}.
+ */
+ static final int DEFAULT_FS_OBS_BLOCK_SIZE = 128 * 1024 * 1024;
+
+ /**
+ * OBS scheme.
+ */
+ static final String OBS_SCHEME = "obs";
+
+ /**
+ * Prefix for all OBS properties: {@value}.
+ */
+ static final String FS_OBS_PREFIX = "fs.obs.";
+
+ /**
+ * Prefix for OBS bucket-specific properties: {@value}.
+ */
+ static final String FS_OBS_BUCKET_PREFIX = "fs.obs.bucket.";
+
+ /**
+ * OBS default port.
+ */
+ static final int OBS_DEFAULT_PORT = -1;
+
+ /**
+ * User agent prefix.
+ */
+ static final String USER_AGENT_PREFIX = "fs.obs.user.agent.prefix";
+
+ /**
+ * Read ahead buffer size to prevent connection re-establishments.
+ */
+ static final String READAHEAD_RANGE = "fs.obs.readahead.range";
+
+ /**
+ * Default value of {@link #READAHEAD_RANGE}.
+ */
+ static final long DEFAULT_READAHEAD_RANGE = 1024 * 1024;
+
+ /**
+ * Flag indicating if {@link OBSInputStream#read(long, byte[], int, int)} will
+ * use the implementation of
+ * {@link org.apache.hadoop.fs.FSInputStream#read(long,
+ * byte[], int, int)}.
+ */
+ static final String READ_TRANSFORM_ENABLE = "fs.obs.read.transform.enable";
+
+ /**
+ * OBS client factory implementation class.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ static final String OBS_CLIENT_FACTORY_IMPL
+ = "fs.obs.client.factory.impl";
+
+ /**
+ * Default value of {@link #OBS_CLIENT_FACTORY_IMPL}.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ static final Class extends OBSClientFactory>
+ DEFAULT_OBS_CLIENT_FACTORY_IMPL =
+ DefaultOBSClientFactory.class;
+
+ /**
+ * Maximum number of partitions in a multipart upload: {@value}.
+ */
+ @InterfaceAudience.Private
+ static final int MAX_MULTIPART_COUNT = 10000;
+
+ // OBS Client configuration
+
+ /**
+ * Idle connection time.
+ */
+ static final String IDLE_CONNECTION_TIME = "fs.obs.idle.connection.time";
+
+ /**
+ * Default value of {@link #IDLE_CONNECTION_TIME}.
+ */
+ static final int DEFAULT_IDLE_CONNECTION_TIME = 30000;
+
+ /**
+ * Maximum number of idle connections.
+ */
+ static final String MAX_IDLE_CONNECTIONS = "fs.obs.max.idle.connections";
+
+ /**
+ * Default value of {@link #MAX_IDLE_CONNECTIONS}.
+ */
+ static final int DEFAULT_MAX_IDLE_CONNECTIONS = 1000;
+
+ /**
+ * Keep alive.
+ */
+ static final String KEEP_ALIVE = "fs.obs.keep.alive";
+
+ /**
+ * Default value of {@link #KEEP_ALIVE}.
+ */
+ static final boolean DEFAULT_KEEP_ALIVE = true;
+
+ /**
+ * Validate certificate.
+ */
+ static final String VALIDATE_CERTIFICATE = "fs.obs.validate.certificate";
+
+ /**
+ * Default value of {@link #VALIDATE_CERTIFICATE}.
+ */
+ static final boolean DEFAULT_VALIDATE_CERTIFICATE = false;
+
+ /**
+ * Verify response content type.
+ */
+ static final String VERIFY_RESPONSE_CONTENT_TYPE
+ = "fs.obs.verify.response.content.type";
+
+ /**
+ * Default value of {@link #VERIFY_RESPONSE_CONTENT_TYPE}.
+ */
+ static final boolean DEFAULT_VERIFY_RESPONSE_CONTENT_TYPE = true;
+
+ /**
+ * UploadStreamRetryBufferSize.
+ */
+ static final String UPLOAD_STREAM_RETRY_SIZE
+ = "fs.obs.upload.stream.retry.buffer.size";
+
+ /**
+ * Default value of {@link #UPLOAD_STREAM_RETRY_SIZE}.
+ */
+ static final int DEFAULT_UPLOAD_STREAM_RETRY_SIZE = 512 * 1024;
+
+ /**
+ * Read buffer size.
+ */
+ static final String READ_BUFFER_SIZE = "fs.obs.read.buffer.size";
+
+ /**
+ * Default value of {@link #READ_BUFFER_SIZE}.
+ */
+ static final int DEFAULT_READ_BUFFER_SIZE = 256 * 1024;
+
+ /**
+ * Write buffer size.
+ */
+ static final String WRITE_BUFFER_SIZE = "fs.obs.write.buffer.size";
+
+ /**
+ * Default value of {@link #WRITE_BUFFER_SIZE}.
+ */
+ static final int DEFAULT_WRITE_BUFFER_SIZE = 256 * 1024;
+
+ /**
+ * Canonical name.
+ */
+ static final String CNAME = "fs.obs.cname";
+
+ /**
+ * Default value of {@link #CNAME}.
+ */
+ static final boolean DEFAULT_CNAME = false;
+
+ /**
+ * Strict host name verification.
+ */
+ static final String STRICT_HOSTNAME_VERIFICATION
+ = "fs.obs.strict.hostname.verification";
+
+ /**
+ * Default value of {@link #STRICT_HOSTNAME_VERIFICATION}.
+ */
+ static final boolean DEFAULT_STRICT_HOSTNAME_VERIFICATION = false;
+
+ /**
+ * Size of object copy part pieces in bytes.
+ */
+ static final String COPY_PART_SIZE = "fs.obs.copypart.size";
+
+ /**
+ * Maximum value of {@link #COPY_PART_SIZE}.
+ */
+ static final long MAX_COPY_PART_SIZE = 5368709120L; // 5GB
+
+ /**
+ * Default value of {@link #COPY_PART_SIZE}.
+ */
+ static final long DEFAULT_COPY_PART_SIZE = 104857600L; // 100MB
+
+ /**
+ * Maximum number of copy part threads.
+ */
+ static final String MAX_COPY_PART_THREADS = "fs.obs.copypart.threads.max";
+
+ /**
+ * Default value of {@link #MAX_COPY_PART_THREADS}.
+ */
+ static final int DEFAULT_MAX_COPY_PART_THREADS = 40;
+
+ /**
+ * Number of core list threads.
+ */
+ static final String CORE_LIST_THREADS = "fs.obs.list.threads.core";
+
+ /**
+ * Default value of {@link #CORE_LIST_THREADS}.
+ */
+ static final int DEFAULT_CORE_LIST_THREADS = 30;
+
+ /**
+ * Maximum number of list threads.
+ */
+ static final String MAX_LIST_THREADS = "fs.obs.list.threads.max";
+
+ /**
+ * Default value of {@link #MAX_LIST_THREADS}.
+ */
+ static final int DEFAULT_MAX_LIST_THREADS = 60;
+
+ /**
+ * Capacity of list work queue.
+ */
+ static final String LIST_WORK_QUEUE_CAPACITY
+ = "fs.obs.list.workqueue.capacity";
+
+ /**
+ * Default value of {@link #LIST_WORK_QUEUE_CAPACITY}.
+ */
+ static final int DEFAULT_LIST_WORK_QUEUE_CAPACITY = 1024;
+
+ /**
+ * List parallel factor.
+ */
+ static final String LIST_PARALLEL_FACTOR = "fs.obs.list.parallel.factor";
+
+ /**
+ * Default value of {@link #LIST_PARALLEL_FACTOR}.
+ */
+ static final int DEFAULT_LIST_PARALLEL_FACTOR = 30;
+
+ /**
+ * Switch for the fast delete.
+ */
+ static final String TRASH_ENABLE = "fs.obs.trash.enable";
+
+ /**
+ * Enable obs content summary or not.
+ */
+ static final String OBS_CONTENT_SUMMARY_ENABLE
+ = "fs.obs.content.summary.enable";
+
+ /**
+ * Enable obs client dfs list or not.
+ */
+ static final String OBS_CLIENT_DFS_LIST_ENABLE
+ = "fs.obs.client.dfs.list.enable";
+
+ /**
+ * Default trash : false.
+ */
+ static final boolean DEFAULT_TRASH = false;
+
+ /**
+ * The fast delete recycle directory.
+ */
+ static final String TRASH_DIR = "fs.obs.trash.dir";
+
+ /**
+ * Encryption type is sse-kms or sse-c.
+ */
+ static final String SSE_TYPE = "fs.obs.server-side-encryption-type";
+
+ /**
+ * Kms key id for sse-kms, while key base64 encoded content for sse-c.
+ */
+ static final String SSE_KEY = "fs.obs.server-side-encryption-key";
+
+ /**
+ * Array first block size.
+ */
+ static final String FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE
+ = "fs.obs.fast.upload.array.first.buffer";
+
+ /**
+ * The fast upload buffer array first block default size.
+ */
+ static final int FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE_DEFAULT = 1024
+ * 1024;
+
+ /**
+ * Auth Type Negotiation Enable Switch.
+ */
+ static final String SDK_AUTH_TYPE_NEGOTIATION_ENABLE
+ = "fs.obs.authtype.negotiation.enable";
+
+ /**
+ * Default value of {@link #SDK_AUTH_TYPE_NEGOTIATION_ENABLE}.
+ */
+ static final boolean DEFAULT_SDK_AUTH_TYPE_NEGOTIATION_ENABLE = false;
+
+ /**
+ * Okhttp retryOnConnectionFailure switch.
+ */
+ static final String SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE
+ = "fs.obs.connection.retry.enable";
+
+ /**
+ * Default value of {@link #SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE}.
+ */
+ static final boolean DEFAULT_SDK_RETRY_ON_CONNECTION_FAILURE_ENABLE = true;
+
+ /**
+ * Sdk max retry times on unexpected end of stream. exception, default: -1,
+ * don't retry
+ */
+ static final String SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION
+ = "fs.obs.unexpectedend.retrytime";
+
+ /**
+ * Default value of {@link #SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION}.
+ */
+ static final int DEFAULT_SDK_RETRY_TIMES_ON_UNEXPECTED_END_EXCEPTION = -1;
+
+ /**
+ * Maximum sdk connection retry times, default : 2000.
+ */
+ static final int DEFAULT_MAX_SDK_CONNECTION_RETRY_TIMES = 2000;
+
+ /**
+ * Second to millisecond factor.
+ */
+ static final int SEC2MILLISEC_FACTOR = 1000;
+
+ private OBSConstants() {
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java
new file mode 100644
index 0000000000000..5e413e6841a19
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java
@@ -0,0 +1,1020 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSExceptionMessages;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.util.DirectBufferPool;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Set of classes to support output streaming into blocks which are then
+ * uploaded as to OBS as a single PUT, or as part of a multipart request.
+ */
+final class OBSDataBlocks {
+
+ /**
+ * Class logger.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(
+ OBSDataBlocks.class);
+
+ private OBSDataBlocks() {
+ }
+
+ /**
+ * Validate args to a write command. These are the same validation checks
+ * expected for any implementation of {@code OutputStream.write()}.
+ *
+ * @param b byte array containing data
+ * @param off offset in array where to start
+ * @param len number of bytes to be written
+ * @throws NullPointerException for a null buffer
+ * @throws IndexOutOfBoundsException if indices are out of range
+ */
+ static void validateWriteArgs(final byte[] b, final int off,
+ final int len) {
+ Preconditions.checkNotNull(b);
+ if (off < 0 || off > b.length || len < 0 || off + len > b.length
+ || off + len < 0) {
+ throw new IndexOutOfBoundsException(
+ "write (b[" + b.length + "], " + off + ", " + len + ')');
+ }
+ }
+
+ /**
+ * Create a factory.
+ *
+ * @param owner factory owner
+ * @param name factory name -the option from {@link OBSConstants}.
+ * @return the factory, ready to be initialized.
+ * @throws IllegalArgumentException if the name is unknown.
+ */
+ static BlockFactory createFactory(final OBSFileSystem owner,
+ final String name) {
+ switch (name) {
+ case OBSConstants.FAST_UPLOAD_BUFFER_ARRAY:
+ return new ByteArrayBlockFactory(owner);
+ case OBSConstants.FAST_UPLOAD_BUFFER_DISK:
+ return new DiskBlockFactory(owner);
+ case OBSConstants.FAST_UPLOAD_BYTEBUFFER:
+ return new ByteBufferBlockFactory(owner);
+ default:
+ throw new IllegalArgumentException(
+ "Unsupported block buffer" + " \"" + name + '"');
+ }
+ }
+
+ /**
+ * Base class for block factories.
+ */
+ abstract static class BlockFactory {
+ /**
+ * OBS file system type.
+ */
+ private final OBSFileSystem owner;
+
+ protected BlockFactory(final OBSFileSystem obsFileSystem) {
+ this.owner = obsFileSystem;
+ }
+
+ /**
+ * Create a block.
+ *
+ * @param index index of block
+ * @param limit limit of the block.
+ * @return a new block.
+ * @throws IOException on any failure to create block
+ */
+ abstract DataBlock create(long index, int limit) throws IOException;
+
+ /**
+ * Owner.
+ *
+ * @return obsFileSystem instance
+ */
+ protected OBSFileSystem getOwner() {
+ return owner;
+ }
+ }
+
+ /**
+ * This represents a block being uploaded.
+ */
+ abstract static class DataBlock implements Closeable {
+
+ /**
+ * Data block index.
+ */
+ private final long index;
+
+ /**
+ * Dest state can be : writing/upload/closed.
+ */
+ private volatile DestState state = DestState.Writing;
+
+ protected DataBlock(final long dataIndex) {
+ this.index = dataIndex;
+ }
+
+ /**
+ * Atomically enter a state, verifying current state.
+ *
+ * @param current current state. null means "no check"
+ * @param next next state
+ * @throws IllegalStateException if the current state is not as expected
+ */
+ protected final synchronized void enterState(final DestState current,
+ final DestState next)
+ throws IllegalStateException {
+ verifyState(current);
+ LOG.debug("{}: entering state {}", this, next);
+ state = next;
+ }
+
+ /**
+ * Verify that the block is in the declared state.
+ *
+ * @param expected expected state.
+ * @throws IllegalStateException if the DataBlock is in the wrong state
+ */
+ protected final void verifyState(final DestState expected)
+ throws IllegalStateException {
+ if (expected != null && state != expected) {
+ throw new IllegalStateException(
+ "Expected stream state " + expected
+ + " -but actual state is " + state + " in " + this);
+ }
+ }
+
+ /**
+ * Current state.
+ *
+ * @return the current state.
+ */
+ protected final DestState getState() {
+ return state;
+ }
+
+ protected long getIndex() {
+ return index;
+ }
+
+ /**
+ * Return the current data size.
+ *
+ * @return the size of the data
+ */
+ abstract int dataSize();
+
+ /**
+ * Predicate to verify that the block has the capacity to write the given
+ * set of bytes.
+ *
+ * @param bytes number of bytes desired to be written.
+ * @return true if there is enough space.
+ */
+ abstract boolean hasCapacity(long bytes);
+
+ /**
+ * Predicate to check if there is data in the block.
+ *
+ * @return true if there is
+ */
+ boolean hasData() {
+ return dataSize() > 0;
+ }
+
+ /**
+ * The remaining capacity in the block before it is full.
+ *
+ * @return the number of bytes remaining.
+ */
+ abstract int remainingCapacity();
+
+ /**
+ * Write a series of bytes from the buffer, from the offset. Returns the
+ * number of bytes written. Only valid in the state {@code Writing}. Base
+ * class verifies the state but does no writing.
+ *
+ * @param buffer buffer
+ * @param offset offset
+ * @param length length of write
+ * @return number of bytes written
+ * @throws IOException trouble
+ */
+ int write(final byte[] buffer, final int offset, final int length)
+ throws IOException {
+ verifyState(DestState.Writing);
+ Preconditions.checkArgument(buffer != null, "Null buffer");
+ Preconditions.checkArgument(length >= 0, "length is negative");
+ Preconditions.checkArgument(offset >= 0, "offset is negative");
+ Preconditions.checkArgument(
+ !(buffer.length - offset < length),
+ "buffer shorter than amount of data to write");
+ return 0;
+ }
+
+ /**
+ * Flush the output. Only valid in the state {@code Writing}. In the base
+ * class, this is a no-op
+ *
+ * @throws IOException any IO problem.
+ */
+ void flush() throws IOException {
+ verifyState(DestState.Writing);
+ }
+
+ /**
+ * Switch to the upload state and return a stream for uploading. Base class
+ * calls {@link #enterState(DestState, DestState)} to manage the state
+ * machine.
+ *
+ * @return the stream
+ * @throws IOException trouble
+ */
+ Object startUpload() throws IOException {
+ LOG.debug("Start datablock[{}] upload", index);
+ enterState(DestState.Writing, DestState.Upload);
+ return null;
+ }
+
+ /**
+ * Enter the closed state.
+ *
+ * @return true if the class was in any other state, implying that the
+ * subclass should do its close operations
+ */
+ protected synchronized boolean enterClosedState() {
+ if (!state.equals(DestState.Closed)) {
+ enterState(null, DestState.Closed);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (enterClosedState()) {
+ LOG.debug("Closed {}", this);
+ innerClose();
+ }
+ }
+
+ /**
+ * Inner close logic for subclasses to implement.
+ *
+ * @throws IOException on any failure to close
+ */
+ protected abstract void innerClose() throws IOException;
+
+ /**
+ * Destination state definition for a data block.
+ */
+ enum DestState {
+ /**
+ * destination state : writing.
+ */
+ Writing,
+ /**
+ * destination state : upload.
+ */
+ Upload,
+ /**
+ * destination state : closed.
+ */
+ Closed
+ }
+ }
+
+ /**
+ * Use byte arrays on the heap for storage.
+ */
+ static class ByteArrayBlockFactory extends BlockFactory {
+ ByteArrayBlockFactory(final OBSFileSystem owner) {
+ super(owner);
+ }
+
+ @Override
+ DataBlock create(final long index, final int limit) {
+ int firstBlockSize = super.owner.getConf()
+ .getInt(OBSConstants.FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE,
+ OBSConstants
+ .FAST_UPLOAD_BUFFER_ARRAY_FIRST_BLOCK_SIZE_DEFAULT);
+ return new ByteArrayBlock(0, limit, firstBlockSize);
+ }
+ }
+
+ /**
+ * OBS specific byte array output stream.
+ */
+ static class OBSByteArrayOutputStream extends ByteArrayOutputStream {
+ OBSByteArrayOutputStream(final int size) {
+ super(size);
+ }
+
+ /**
+ * InputStream backed by the internal byte array.
+ *
+ * @return input stream
+ */
+ ByteArrayInputStream getInputStream() {
+ ByteArrayInputStream bin = new ByteArrayInputStream(this.buf, 0,
+ count);
+ this.reset();
+ this.buf = null;
+ return bin;
+ }
+ }
+
+ /**
+ * Stream to memory via a {@code ByteArrayOutputStream}.
+ *
+ * This was taken from {@code OBSBlockOutputStream} and has the same
+ * problem which surfaced there: it can consume a lot of heap space
+ * proportional to the mismatch between writes to the stream and the JVM-wide
+ * upload bandwidth to the OBS endpoint. The memory consumption can be limited
+ * by tuning the filesystem settings to restrict the number of queued/active
+ * uploads.
+ */
+ static class ByteArrayBlock extends DataBlock {
+ /**
+ * Memory limit.
+ */
+ private final int limit;
+
+ /**
+ * Output stream.
+ */
+ private OBSByteArrayOutputStream buffer;
+
+ /**
+ * Cache data size so that it is consistent after the buffer is reset.
+ */
+ private Integer dataSize;
+
+ /**
+ * Block first size.
+ */
+ private int firstBlockSize;
+
+ /**
+ * Input stream.
+ */
+ private ByteArrayInputStream inputStream = null;
+
+ ByteArrayBlock(final long index, final int limitBlockSize,
+ final int blockSize) {
+ super(index);
+ this.limit = limitBlockSize;
+ this.buffer = new OBSByteArrayOutputStream(blockSize);
+ this.firstBlockSize = blockSize;
+ }
+
+ /**
+ * Returns the block first block size.
+ *
+ * @return the block first block size
+ */
+ @VisibleForTesting
+ public int firstBlockSize() {
+ return this.firstBlockSize;
+ }
+
+ /**
+ * Get the amount of data; if there is no buffer then the size is 0.
+ *
+ * @return the amount of data available to upload.
+ */
+ @Override
+ int dataSize() {
+ return dataSize != null ? dataSize : buffer.size();
+ }
+
+ @Override
+ InputStream startUpload() throws IOException {
+ super.startUpload();
+ dataSize = buffer.size();
+ inputStream = buffer.getInputStream();
+ return inputStream;
+ }
+
+ @Override
+ boolean hasCapacity(final long bytes) {
+ return dataSize() + bytes <= limit;
+ }
+
+ @Override
+ int remainingCapacity() {
+ return limit - dataSize();
+ }
+
+ @Override
+ int write(final byte[] b, final int offset, final int len)
+ throws IOException {
+ super.write(b, offset, len);
+ int written = Math.min(remainingCapacity(), len);
+ buffer.write(b, offset, written);
+ return written;
+ }
+
+ @Override
+ protected void innerClose() throws IOException {
+ if (buffer != null) {
+ buffer.close();
+ buffer = null;
+ }
+
+ if (inputStream != null) {
+ inputStream.close();
+ inputStream = null;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ByteArrayBlock{"
+ + "index="
+ + getIndex()
+ + ", state="
+ + getState()
+ + ", limit="
+ + limit
+ + ", dataSize="
+ + dataSize
+ + '}';
+ }
+ }
+
+ /**
+ * Stream via Direct ByteBuffers; these are allocated off heap via {@link
+ * DirectBufferPool}.
+ */
+ static class ByteBufferBlockFactory extends BlockFactory {
+
+ /**
+ * The directory buffer pool.
+ */
+ private static final DirectBufferPool BUFFER_POOL
+ = new DirectBufferPool();
+
+ /**
+ * Count of outstanding buffers.
+ */
+ private static final AtomicInteger BUFFERS_OUTSTANDING
+ = new AtomicInteger(0);
+
+ ByteBufferBlockFactory(final OBSFileSystem owner) {
+ super(owner);
+ }
+
+ @Override
+ ByteBufferBlock create(final long index, final int limit) {
+ return new ByteBufferBlock(index, limit);
+ }
+
+ public static ByteBuffer requestBuffer(final int limit) {
+ LOG.debug("Requesting buffer of size {}", limit);
+ BUFFERS_OUTSTANDING.incrementAndGet();
+ return BUFFER_POOL.getBuffer(limit);
+ }
+
+ public static void releaseBuffer(final ByteBuffer buffer) {
+ LOG.debug("Releasing buffer");
+ BUFFER_POOL.returnBuffer(buffer);
+ BUFFERS_OUTSTANDING.decrementAndGet();
+ }
+
+ /**
+ * Get count of outstanding buffers.
+ *
+ * @return the current buffer count
+ */
+ public int getOutstandingBufferCount() {
+ return BUFFERS_OUTSTANDING.get();
+ }
+
+ @Override
+ public String toString() {
+ return "ByteBufferBlockFactory{" + "buffersOutstanding="
+ + BUFFERS_OUTSTANDING + '}';
+ }
+ }
+
+ /**
+ * A DataBlock which requests a buffer from pool on creation; returns it when
+ * it is closed.
+ */
+ static class ByteBufferBlock extends DataBlock {
+ /**
+ * Set the buffer size.
+ */
+ private final int bufferSize;
+
+ /**
+ * Create block buffer.
+ */
+ private ByteBuffer blockBuffer;
+
+ /**
+ * Cache data size so that it is consistent after the buffer is reset.
+ */
+ private Integer dataSize;
+
+ /**
+ * Create input stream.
+ */
+ private ByteBufferInputStream inputStream;
+
+ /**
+ * Instantiate. This will request a ByteBuffer of the desired size.
+ *
+ * @param index block index
+ * @param initBufferSize buffer size
+ */
+ ByteBufferBlock(final long index, final int initBufferSize) {
+ super(index);
+ this.bufferSize = initBufferSize;
+ blockBuffer = ByteBufferBlockFactory.requestBuffer(initBufferSize);
+ }
+
+ /**
+ * Get the amount of data; if there is no buffer then the size is 0.
+ *
+ * @return the amount of data available to upload.
+ */
+ @Override
+ int dataSize() {
+ return dataSize != null ? dataSize : bufferCapacityUsed();
+ }
+
+ @Override
+ InputStream startUpload() throws IOException {
+ super.startUpload();
+ dataSize = bufferCapacityUsed();
+ // set the buffer up from reading from the beginning
+ blockBuffer.limit(blockBuffer.position());
+ blockBuffer.position(0);
+ inputStream = new ByteBufferInputStream(dataSize, blockBuffer);
+ return inputStream;
+ }
+
+ @Override
+ public boolean hasCapacity(final long bytes) {
+ return bytes <= remainingCapacity();
+ }
+
+ @Override
+ public int remainingCapacity() {
+ return blockBuffer != null ? blockBuffer.remaining() : 0;
+ }
+
+ private int bufferCapacityUsed() {
+ return blockBuffer.capacity() - blockBuffer.remaining();
+ }
+
+ @Override
+ int write(final byte[] b, final int offset, final int len)
+ throws IOException {
+ super.write(b, offset, len);
+ int written = Math.min(remainingCapacity(), len);
+ blockBuffer.put(b, offset, written);
+ return written;
+ }
+
+ /**
+ * Closing the block will release the buffer.
+ */
+ @Override
+ protected void innerClose() {
+ if (blockBuffer != null) {
+ ByteBufferBlockFactory.releaseBuffer(blockBuffer);
+ blockBuffer = null;
+ }
+ if (inputStream != null) {
+ inputStream.close();
+ inputStream = null;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ByteBufferBlock{"
+ + "index="
+ + getIndex()
+ + ", state="
+ + getState()
+ + ", dataSize="
+ + dataSize()
+ + ", limit="
+ + bufferSize
+ + ", remainingCapacity="
+ + remainingCapacity()
+ + '}';
+ }
+
+ /**
+ * Provide an input stream from a byte buffer; supporting {@link
+ * #mark(int)}, which is required to enable replay of failed PUT attempts.
+ */
+ class ByteBufferInputStream extends InputStream {
+
+ /**
+ * Set the input stream size.
+ */
+ private final int size;
+
+ /**
+ * Set the byte buffer.
+ */
+ private ByteBuffer byteBuffer;
+
+ ByteBufferInputStream(final int streamSize,
+ final ByteBuffer streamByteBuffer) {
+ LOG.debug("Creating ByteBufferInputStream of size {}",
+ streamSize);
+ this.size = streamSize;
+ this.byteBuffer = streamByteBuffer;
+ }
+
+ /**
+ * After the stream is closed, set the local reference to the byte buffer
+ * to null; this guarantees that future attempts to use stream methods
+ * will fail.
+ */
+ @Override
+ public synchronized void close() {
+ LOG.debug("ByteBufferInputStream.close() for {}",
+ ByteBufferBlock.super.toString());
+ byteBuffer = null;
+ }
+
+ /**
+ * Verify that the stream is open.
+ *
+ * @throws IOException if the stream is closed
+ */
+ private void verifyOpen() throws IOException {
+ if (byteBuffer == null) {
+ throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED);
+ }
+ }
+
+ public synchronized int read() {
+ if (available() > 0) {
+ return byteBuffer.get() & OBSCommonUtils.BYTE_TO_INT_MASK;
+ } else {
+ return -1;
+ }
+ }
+
+ @Override
+ public synchronized long skip(final long offset)
+ throws IOException {
+ verifyOpen();
+ long newPos = position() + offset;
+ if (newPos < 0) {
+ throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK);
+ }
+ if (newPos > size) {
+ throw new EOFException(
+ FSExceptionMessages.CANNOT_SEEK_PAST_EOF);
+ }
+ byteBuffer.position((int) newPos);
+ return newPos;
+ }
+
+ @Override
+ public synchronized int available() {
+ Preconditions.checkState(byteBuffer != null,
+ FSExceptionMessages.STREAM_IS_CLOSED);
+ return byteBuffer.remaining();
+ }
+
+ /**
+ * Get the current buffer position.
+ *
+ * @return the buffer position
+ */
+ public synchronized int position() {
+ return byteBuffer.position();
+ }
+
+ /**
+ * Check if there is data left.
+ *
+ * @return true if there is data remaining in the buffer.
+ */
+ public synchronized boolean hasRemaining() {
+ return byteBuffer.hasRemaining();
+ }
+
+ @Override
+ public synchronized void mark(final int readlimit) {
+ LOG.debug("mark at {}", position());
+ byteBuffer.mark();
+ }
+
+ @Override
+ public synchronized void reset() {
+ LOG.debug("reset");
+ byteBuffer.reset();
+ }
+
+ @Override
+ public boolean markSupported() {
+ return true;
+ }
+
+ /**
+ * Read in data.
+ *
+ * @param b destination buffer
+ * @param offset offset within the buffer
+ * @param length length of bytes to read
+ * @return read size
+ * @throws EOFException if the position is negative
+ * @throws IndexOutOfBoundsException if there isn't space for the amount
+ * of data requested.
+ * @throws IllegalArgumentException other arguments are invalid.
+ */
+ public synchronized int read(final byte[] b, final int offset,
+ final int length)
+ throws IOException {
+ Preconditions.checkArgument(length >= 0, "length is negative");
+ Preconditions.checkArgument(b != null, "Null buffer");
+ if (b.length - offset < length) {
+ throw new IndexOutOfBoundsException(
+ FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER
+ + ": request length ="
+ + length
+ + ", with offset ="
+ + offset
+ + "; buffer capacity ="
+ + (b.length - offset));
+ }
+ verifyOpen();
+ if (!hasRemaining()) {
+ return -1;
+ }
+
+ int toRead = Math.min(length, available());
+ byteBuffer.get(b, offset, toRead);
+ return toRead;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder(
+ "ByteBufferInputStream{");
+ sb.append("size=").append(size);
+ ByteBuffer buf = this.byteBuffer;
+ if (buf != null) {
+ sb.append(", available=").append(buf.remaining());
+ }
+ sb.append(", ").append(ByteBufferBlock.super.toString());
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+ }
+
+ /**
+ * Buffer blocks to disk.
+ */
+ static class DiskBlockFactory extends BlockFactory {
+ /**
+ * Allocator the local directory.
+ */
+ private static LocalDirAllocator directoryAllocator;
+
+ DiskBlockFactory(final OBSFileSystem owner) {
+ super(owner);
+ }
+
+ /**
+ * Create a temp file and a {@link DiskBlock} instance to manage it.
+ *
+ * @param index block index
+ * @param limit limit of the block.
+ * @return the new block
+ * @throws IOException IO problems
+ */
+ @Override
+ DataBlock create(final long index, final int limit) throws IOException {
+ File destFile = createTmpFileForWrite(
+ String.format("obs-block-%04d-", index), limit,
+ getOwner().getConf());
+ return new DiskBlock(destFile, limit, index);
+ }
+
+ /**
+ * Demand create the directory allocator, then create a temporary file.
+ * {@link LocalDirAllocator#createTmpFileForWrite(String, long,
+ * Configuration)}.
+ *
+ * @param pathStr prefix for the temporary file
+ * @param size the size of the file that is going to be written
+ * @param conf the Configuration object
+ * @return a unique temporary file
+ * @throws IOException IO problems
+ */
+ static synchronized File createTmpFileForWrite(final String pathStr,
+ final long size, final Configuration conf)
+ throws IOException {
+ if (directoryAllocator == null) {
+ String bufferDir = conf.get(OBSConstants.BUFFER_DIR) != null
+ ? OBSConstants.BUFFER_DIR
+ : "hadoop.tmp.dir";
+ directoryAllocator = new LocalDirAllocator(bufferDir);
+ }
+ return directoryAllocator.createTmpFileForWrite(pathStr, size,
+ conf);
+ }
+ }
+
+ /**
+ * Stream to a file. This will stop at the limit; the caller is expected to
+ * create a new block.
+ */
+ static class DiskBlock extends DataBlock {
+
+ /**
+ * Create buffer file.
+ */
+ private final File bufferFile;
+
+ /**
+ * Buffer size limit.
+ */
+ private final int limit;
+
+ /**
+ * Verify block has closed or not.
+ */
+ private final AtomicBoolean closed = new AtomicBoolean(false);
+
+ /**
+ * Written bytes count.
+ */
+ private int bytesWritten;
+
+ /**
+ * Out put stream buffer.
+ */
+ private BufferedOutputStream out;
+
+ DiskBlock(final File destBufferFile, final int limitSize,
+ final long index)
+ throws FileNotFoundException {
+ super(index);
+ this.limit = limitSize;
+ this.bufferFile = destBufferFile;
+ out = new BufferedOutputStream(
+ new FileOutputStream(destBufferFile));
+ }
+
+ @Override
+ int dataSize() {
+ return bytesWritten;
+ }
+
+ @Override
+ boolean hasCapacity(final long bytes) {
+ return dataSize() + bytes <= limit;
+ }
+
+ @Override
+ int remainingCapacity() {
+ return limit - bytesWritten;
+ }
+
+ @Override
+ int write(final byte[] b, final int offset, final int len)
+ throws IOException {
+ super.write(b, offset, len);
+ int written = Math.min(remainingCapacity(), len);
+ out.write(b, offset, written);
+ bytesWritten += written;
+ return written;
+ }
+
+ @Override
+ File startUpload() throws IOException {
+ super.startUpload();
+ try {
+ out.flush();
+ } finally {
+ out.close();
+ out = null;
+ }
+ return bufferFile;
+ }
+
+ /**
+ * The close operation will delete the destination file if it still exists.
+ */
+ @Override
+ protected void innerClose() {
+ final DestState state = getState();
+ LOG.debug("Closing {}", this);
+ switch (state) {
+ case Writing:
+ if (bufferFile.exists()) {
+ // file was not uploaded
+ LOG.debug(
+ "Block[{}]: Deleting buffer file as upload "
+ + "did not start",
+ getIndex());
+ closeBlock();
+ }
+ break;
+
+ case Upload:
+ LOG.debug(
+ "Block[{}]: Buffer file {} exists close upload stream",
+ getIndex(), bufferFile);
+ break;
+
+ case Closed:
+ closeBlock();
+ break;
+
+ default:
+ // this state can never be reached, but checkstyle
+ // complains, so it is here.
+ }
+ }
+
+ /**
+ * Flush operation will flush to disk.
+ *
+ * @throws IOException IOE raised on FileOutputStream
+ */
+ @Override
+ void flush() throws IOException {
+ super.flush();
+ out.flush();
+ }
+
+ @Override
+ public String toString() {
+ return "FileBlock{index=" + getIndex() + ", destFile=" + bufferFile
+ + ", state=" + getState() + ", dataSize="
+ + dataSize() + ", limit=" + limit + '}';
+ }
+
+ /**
+ * Close the block. This will delete the block's buffer file if the block
+ * has not previously been closed.
+ */
+ void closeBlock() {
+ LOG.debug("block[{}]: closeBlock()", getIndex());
+ if (!closed.getAndSet(true)) {
+ if (!bufferFile.delete() && bufferFile.exists()) {
+ LOG.warn("delete({}) returned false",
+ bufferFile.getAbsoluteFile());
+ }
+ } else {
+ LOG.debug("block[{}]: skipping re-entrant closeBlock()",
+ getIndex());
+ }
+ }
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileStatus.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileStatus.java
new file mode 100644
index 0000000000000..448115554f84c
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileStatus.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * File status for an OBS file.
+ *
+ * The subclass is private as it should not be created directly.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+class OBSFileStatus extends FileStatus {
+ /**
+ * Create a directory status.
+ *
+ * @param path the path
+ * @param owner the owner
+ */
+ OBSFileStatus(final Path path, final String owner) {
+ super(0, true, 1, 0, 0, path);
+ setOwner(owner);
+ setGroup(owner);
+ }
+
+ /**
+ * Create a directory status.
+ *
+ * @param modificationTime modification time
+ * @param path the path
+ * @param owner the owner
+ */
+ OBSFileStatus(final Path path, final long modificationTime,
+ final String owner) {
+ super(0, true, 1, 0, modificationTime, path);
+ setOwner(owner);
+ setGroup(owner);
+ }
+
+ /**
+ * Create a directory status.
+ *
+ * @param modificationTime modification time
+ * @param accessTime access time
+ * @param path the path
+ * @param owner the owner
+ */
+ OBSFileStatus(final Path path, final long modificationTime,
+ final long accessTime,
+ final String owner) {
+ super(0, true, 1, 0, modificationTime, accessTime, null, owner, owner,
+ path);
+ }
+
+ /**
+ * A simple file.
+ *
+ * @param length file length
+ * @param modificationTime mod time
+ * @param path path
+ * @param blockSize block size
+ * @param owner owner
+ */
+ OBSFileStatus(
+ final long length, final long modificationTime, final Path path,
+ final long blockSize,
+ final String owner) {
+ super(length, false, 1, blockSize, modificationTime, path);
+ setOwner(owner);
+ setGroup(owner);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileSystem.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileSystem.java
new file mode 100644
index 0000000000000..aa38c93f80c3b
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSFileSystem.java
@@ -0,0 +1,1562 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import com.obs.services.ObsClient;
+import com.obs.services.exception.ObsException;
+import com.obs.services.model.AccessControlList;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
+import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.EnumSet;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * The core OBS Filesystem implementation.
+ *
+ * This subclass is marked as private as code should not be creating it
+ * directly; use {@link FileSystem#get(Configuration)} and variants to create
+ * one.
+ *
+ * If cast to {@code OBSFileSystem}, extra methods and features may be
+ * accessed. Consider those private and unstable.
+ *
+ * Because it prints some of the state of the instrumentation, the output of
+ * {@link #toString()} must also be considered unstable.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public final class OBSFileSystem extends FileSystem {
+ /**
+ * Class logger.
+ */
+ public static final Logger LOG = LoggerFactory.getLogger(
+ OBSFileSystem.class);
+
+ /**
+ * Flag indicating if the filesystem instance is closed.
+ */
+ private final AtomicBoolean closed = new AtomicBoolean(false);
+
+ /**
+ * URI of the filesystem.
+ */
+ private URI uri;
+
+ /**
+ * Current working directory of the filesystem.
+ */
+ private Path workingDir;
+
+ /**
+ * Short name of the user who instantiated the filesystem.
+ */
+ private String username;
+
+ /**
+ * OBS client instance.
+ */
+ private ObsClient obs;
+
+ /**
+ * Flag indicating if posix bucket is used.
+ */
+ private boolean enablePosix = false;
+
+ /**
+ * Flag indicating if multi-object delete recursion is enabled.
+ */
+ private boolean enableMultiObjectDeleteRecursion = true;
+
+ /**
+ * Flag indicating if OBS specific content summary is enabled.
+ */
+ private boolean obsContentSummaryEnable = true;
+
+ /**
+ * Flag indicating if OBS client specific depth first search (DFS) list is
+ * enabled.
+ */
+ private boolean obsClientDFSListEnable = true;
+
+ /**
+ * Bucket name.
+ */
+ private String bucket;
+
+ /**
+ * Max number of keys to get while paging through a directory listing.
+ */
+ private int maxKeys;
+
+ /**
+ * OBSListing instance.
+ */
+ private OBSListing obsListing;
+
+ /**
+ * Helper for an ongoing write operation.
+ */
+ private OBSWriteOperationHelper writeHelper;
+
+ /**
+ * Part size for multipart upload.
+ */
+ private long partSize;
+
+ /**
+ * Flag indicating if multi-object delete is enabled.
+ */
+ private boolean enableMultiObjectDelete;
+
+ /**
+ * Minimum number of objects in one multi-object delete call.
+ */
+ private int multiDeleteThreshold;
+
+ /**
+ * Maximum number of entries in one multi-object delete call.
+ */
+ private int maxEntriesToDelete;
+
+ /**
+ * Bounded thread pool for multipart upload.
+ */
+ private ExecutorService boundedMultipartUploadThreadPool;
+
+ /**
+ * Bounded thread pool for copy.
+ */
+ private ThreadPoolExecutor boundedCopyThreadPool;
+
+ /**
+ * Bounded thread pool for delete.
+ */
+ private ThreadPoolExecutor boundedDeleteThreadPool;
+
+ /**
+ * Bounded thread pool for copy part.
+ */
+ private ThreadPoolExecutor boundedCopyPartThreadPool;
+
+ /**
+ * Bounded thread pool for list.
+ */
+ private ThreadPoolExecutor boundedListThreadPool;
+
+ /**
+ * List parallel factor.
+ */
+ private int listParallelFactor;
+
+ /**
+ * Read ahead range.
+ */
+ private long readAheadRange;
+
+ /**
+ * Flag indicating if {@link OBSInputStream#read(long, byte[], int, int)} will
+ * be transformed into {@link org.apache.hadoop.fs.FSInputStream#read(long,
+ * byte[], int, int)}.
+ */
+ private boolean readTransformEnable = true;
+
+ /**
+ * Factory for creating blocks.
+ */
+ private OBSDataBlocks.BlockFactory blockFactory;
+
+ /**
+ * Maximum Number of active blocks a single output stream can submit to {@link
+ * #boundedMultipartUploadThreadPool}.
+ */
+ private int blockOutputActiveBlocks;
+
+ /**
+ * Copy part size.
+ */
+ private long copyPartSize;
+
+ /**
+ * Flag indicating if fast delete is enabled.
+ */
+ private boolean enableTrash = false;
+
+ /**
+ * Trash directory for fast delete.
+ */
+ private String trashDir;
+
+ /**
+ * OBS redefined access control list.
+ */
+ private AccessControlList cannedACL;
+
+ /**
+ * Server-side encryption wrapper.
+ */
+ private SseWrapper sse;
+
+ /**
+ * Block size for {@link FileSystem#getDefaultBlockSize()}.
+ */
+ private long blockSize;
+
+ /**
+ * Initialize a FileSystem. Called after a new FileSystem instance is
+ * constructed.
+ *
+ * @param name a URI whose authority section names the host, port,
+ * etc. for this FileSystem
+ * @param originalConf the configuration to use for the FS. The
+ * bucket-specific options are patched over the base ones
+ * before any use is made of the config.
+ */
+ @Override
+ public void initialize(final URI name, final Configuration originalConf)
+ throws IOException {
+ uri = URI.create(name.getScheme() + "://" + name.getAuthority());
+ bucket = name.getAuthority();
+ // clone the configuration into one with propagated bucket options
+ Configuration conf = OBSCommonUtils.propagateBucketOptions(originalConf,
+ bucket);
+ OBSCommonUtils.patchSecurityCredentialProviders(conf);
+ super.initialize(name, conf);
+ setConf(conf);
+ try {
+
+ // Username is the current user at the time the FS was instantiated.
+ username = UserGroupInformation.getCurrentUser().getShortUserName();
+ workingDir = new Path("/user", username).makeQualified(this.uri,
+ this.getWorkingDirectory());
+
+ Class extends OBSClientFactory> obsClientFactoryClass =
+ conf.getClass(
+ OBSConstants.OBS_CLIENT_FACTORY_IMPL,
+ OBSConstants.DEFAULT_OBS_CLIENT_FACTORY_IMPL,
+ OBSClientFactory.class);
+ obs = ReflectionUtils.newInstance(obsClientFactoryClass, conf)
+ .createObsClient(name);
+ sse = new SseWrapper(conf);
+
+ OBSCommonUtils.verifyBucketExists(this);
+ enablePosix = OBSCommonUtils.getBucketFsStatus(obs, bucket);
+
+ maxKeys = OBSCommonUtils.intOption(conf,
+ OBSConstants.MAX_PAGING_KEYS,
+ OBSConstants.DEFAULT_MAX_PAGING_KEYS, 1);
+ obsListing = new OBSListing(this);
+ partSize = OBSCommonUtils.getMultipartSizeProperty(conf,
+ OBSConstants.MULTIPART_SIZE,
+ OBSConstants.DEFAULT_MULTIPART_SIZE);
+
+ // check but do not store the block size
+ blockSize = OBSCommonUtils.longBytesOption(conf,
+ OBSConstants.FS_OBS_BLOCK_SIZE,
+ OBSConstants.DEFAULT_FS_OBS_BLOCK_SIZE, 1);
+ enableMultiObjectDelete = conf.getBoolean(
+ OBSConstants.ENABLE_MULTI_DELETE, true);
+ maxEntriesToDelete = conf.getInt(
+ OBSConstants.MULTI_DELETE_MAX_NUMBER,
+ OBSConstants.DEFAULT_MULTI_DELETE_MAX_NUMBER);
+ enableMultiObjectDeleteRecursion = conf.getBoolean(
+ OBSConstants.MULTI_DELETE_RECURSION, true);
+ obsContentSummaryEnable = conf.getBoolean(
+ OBSConstants.OBS_CONTENT_SUMMARY_ENABLE, true);
+ readAheadRange = OBSCommonUtils.longBytesOption(conf,
+ OBSConstants.READAHEAD_RANGE,
+ OBSConstants.DEFAULT_READAHEAD_RANGE, 0);
+ readTransformEnable = conf.getBoolean(
+ OBSConstants.READ_TRANSFORM_ENABLE, true);
+ multiDeleteThreshold = conf.getInt(
+ OBSConstants.MULTI_DELETE_THRESHOLD,
+ OBSConstants.MULTI_DELETE_DEFAULT_THRESHOLD);
+
+ initThreadPools(conf);
+
+ writeHelper = new OBSWriteOperationHelper(this);
+
+ initCannedAcls(conf);
+
+ OBSCommonUtils.initMultipartUploads(this, conf);
+
+ String blockOutputBuffer = conf.getTrimmed(
+ OBSConstants.FAST_UPLOAD_BUFFER,
+ OBSConstants.FAST_UPLOAD_BUFFER_DISK);
+ partSize = OBSCommonUtils.ensureOutputParameterInRange(
+ OBSConstants.MULTIPART_SIZE, partSize);
+ blockFactory = OBSDataBlocks.createFactory(this, blockOutputBuffer);
+ blockOutputActiveBlocks =
+ OBSCommonUtils.intOption(conf,
+ OBSConstants.FAST_UPLOAD_ACTIVE_BLOCKS,
+ OBSConstants.DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
+ LOG.debug(
+ "Using OBSBlockOutputStream with buffer = {}; block={};"
+ + " queue limit={}",
+ blockOutputBuffer,
+ partSize,
+ blockOutputActiveBlocks);
+
+ enableTrash = conf.getBoolean(OBSConstants.TRASH_ENABLE,
+ OBSConstants.DEFAULT_TRASH);
+ if (enableTrash) {
+ if (!isFsBucket()) {
+ String errorMsg = String.format(
+ "The bucket [%s] is not posix. not supported for "
+ + "trash.", bucket);
+ LOG.warn(errorMsg);
+ enableTrash = false;
+ trashDir = null;
+ } else {
+ trashDir = conf.get(OBSConstants.TRASH_DIR);
+ if (StringUtils.isEmpty(trashDir)) {
+ String errorMsg =
+ String.format(
+ "The trash feature(fs.obs.trash.enable) is "
+ + "enabled, but the "
+ + "configuration(fs.obs.trash.dir [%s]) "
+ + "is empty.",
+ trashDir);
+ LOG.error(errorMsg);
+ throw new ObsException(errorMsg);
+ }
+ trashDir = OBSCommonUtils.maybeAddBeginningSlash(trashDir);
+ trashDir = OBSCommonUtils.maybeAddTrailingSlash(trashDir);
+ }
+ }
+ } catch (ObsException e) {
+ throw OBSCommonUtils.translateException("initializing ",
+ new Path(name), e);
+ }
+ }
+
+ private void initThreadPools(final Configuration conf) {
+ long keepAliveTime = OBSCommonUtils.longOption(conf,
+ OBSConstants.KEEPALIVE_TIME,
+ OBSConstants.DEFAULT_KEEPALIVE_TIME, 0);
+
+ int maxThreads = conf.getInt(OBSConstants.MAX_THREADS,
+ OBSConstants.DEFAULT_MAX_THREADS);
+ if (maxThreads < 2) {
+ LOG.warn(OBSConstants.MAX_THREADS
+ + " must be at least 2: forcing to 2.");
+ maxThreads = 2;
+ }
+ int totalTasks = OBSCommonUtils.intOption(conf,
+ OBSConstants.MAX_TOTAL_TASKS,
+ OBSConstants.DEFAULT_MAX_TOTAL_TASKS, 1);
+ boundedMultipartUploadThreadPool =
+ BlockingThreadPoolExecutorService.newInstance(
+ maxThreads,
+ maxThreads + totalTasks,
+ keepAliveTime,
+ TimeUnit.SECONDS,
+ "obs-transfer-shared");
+
+ int maxDeleteThreads = conf.getInt(OBSConstants.MAX_DELETE_THREADS,
+ OBSConstants.DEFAULT_MAX_DELETE_THREADS);
+ if (maxDeleteThreads < 2) {
+ LOG.warn(OBSConstants.MAX_DELETE_THREADS
+ + " must be at least 2: forcing to 2.");
+ maxDeleteThreads = 2;
+ }
+ int coreDeleteThreads = (int) Math.ceil(maxDeleteThreads / 2.0);
+ boundedDeleteThreadPool =
+ new ThreadPoolExecutor(
+ coreDeleteThreads,
+ maxDeleteThreads,
+ keepAliveTime,
+ TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(),
+ BlockingThreadPoolExecutorService.newDaemonThreadFactory(
+ "obs-delete-transfer-shared"));
+ boundedDeleteThreadPool.allowCoreThreadTimeOut(true);
+
+ if (enablePosix) {
+ obsClientDFSListEnable = conf.getBoolean(
+ OBSConstants.OBS_CLIENT_DFS_LIST_ENABLE, true);
+ if (obsClientDFSListEnable) {
+ int coreListThreads = conf.getInt(
+ OBSConstants.CORE_LIST_THREADS,
+ OBSConstants.DEFAULT_CORE_LIST_THREADS);
+ int maxListThreads = conf.getInt(OBSConstants.MAX_LIST_THREADS,
+ OBSConstants.DEFAULT_MAX_LIST_THREADS);
+ int listWorkQueueCapacity = conf.getInt(
+ OBSConstants.LIST_WORK_QUEUE_CAPACITY,
+ OBSConstants.DEFAULT_LIST_WORK_QUEUE_CAPACITY);
+ listParallelFactor = conf.getInt(
+ OBSConstants.LIST_PARALLEL_FACTOR,
+ OBSConstants.DEFAULT_LIST_PARALLEL_FACTOR);
+ if (listParallelFactor < 1) {
+ LOG.warn(OBSConstants.LIST_PARALLEL_FACTOR
+ + " must be at least 1: forcing to 1.");
+ listParallelFactor = 1;
+ }
+ boundedListThreadPool =
+ new ThreadPoolExecutor(
+ coreListThreads,
+ maxListThreads,
+ keepAliveTime,
+ TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(listWorkQueueCapacity),
+ BlockingThreadPoolExecutorService
+ .newDaemonThreadFactory(
+ "obs-list-transfer-shared"));
+ boundedListThreadPool.allowCoreThreadTimeOut(true);
+ }
+ } else {
+ int maxCopyThreads = conf.getInt(OBSConstants.MAX_COPY_THREADS,
+ OBSConstants.DEFAULT_MAX_COPY_THREADS);
+ if (maxCopyThreads < 2) {
+ LOG.warn(OBSConstants.MAX_COPY_THREADS
+ + " must be at least 2: forcing to 2.");
+ maxCopyThreads = 2;
+ }
+ int coreCopyThreads = (int) Math.ceil(maxCopyThreads / 2.0);
+ boundedCopyThreadPool =
+ new ThreadPoolExecutor(
+ coreCopyThreads,
+ maxCopyThreads,
+ keepAliveTime,
+ TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(),
+ BlockingThreadPoolExecutorService.newDaemonThreadFactory(
+ "obs-copy-transfer-shared"));
+ boundedCopyThreadPool.allowCoreThreadTimeOut(true);
+
+ copyPartSize = OBSCommonUtils.longOption(conf,
+ OBSConstants.COPY_PART_SIZE,
+ OBSConstants.DEFAULT_COPY_PART_SIZE, 0);
+ if (copyPartSize > OBSConstants.MAX_COPY_PART_SIZE) {
+ LOG.warn(
+ "obs: {} capped to ~5GB (maximum allowed part size with "
+ + "current output mechanism)",
+ OBSConstants.COPY_PART_SIZE);
+ copyPartSize = OBSConstants.MAX_COPY_PART_SIZE;
+ }
+
+ int maxCopyPartThreads = conf.getInt(
+ OBSConstants.MAX_COPY_PART_THREADS,
+ OBSConstants.DEFAULT_MAX_COPY_PART_THREADS);
+ if (maxCopyPartThreads < 2) {
+ LOG.warn(OBSConstants.MAX_COPY_PART_THREADS
+ + " must be at least 2: forcing to 2.");
+ maxCopyPartThreads = 2;
+ }
+ int coreCopyPartThreads = (int) Math.ceil(maxCopyPartThreads / 2.0);
+ boundedCopyPartThreadPool =
+ new ThreadPoolExecutor(
+ coreCopyPartThreads,
+ maxCopyPartThreads,
+ keepAliveTime,
+ TimeUnit.SECONDS,
+ new LinkedBlockingQueue<>(),
+ BlockingThreadPoolExecutorService.newDaemonThreadFactory(
+ "obs-copy-part-transfer-shared"));
+ boundedCopyPartThreadPool.allowCoreThreadTimeOut(true);
+ }
+ }
+
+ /**
+ * Is posix bucket or not.
+ *
+ * @return is it posix bucket
+ */
+ boolean isFsBucket() {
+ return enablePosix;
+ }
+
+ /**
+ * Get read transform switch stat.
+ *
+ * @return is read transform enabled
+ */
+ boolean isReadTransformEnabled() {
+ return readTransformEnable;
+ }
+
+ /**
+ * Initialize bucket acl for upload, write operation.
+ *
+ * @param conf the configuration to use for the FS.
+ */
+ private void initCannedAcls(final Configuration conf) {
+ // No canned acl in obs
+ String cannedACLName = conf.get(OBSConstants.CANNED_ACL,
+ OBSConstants.DEFAULT_CANNED_ACL);
+ if (!cannedACLName.isEmpty()) {
+ switch (cannedACLName) {
+ case "Private":
+ case "PublicRead":
+ case "PublicReadWrite":
+ case "AuthenticatedRead":
+ case "LogDeliveryWrite":
+ case "BucketOwnerRead":
+ case "BucketOwnerFullControl":
+ cannedACL = new AccessControlList();
+ break;
+ default:
+ cannedACL = null;
+ }
+ } else {
+ cannedACL = null;
+ }
+ }
+
+ /**
+ * Get the bucket acl of user setting.
+ *
+ * @return bucket acl {@link AccessControlList}
+ */
+ AccessControlList getCannedACL() {
+ return cannedACL;
+ }
+
+ /**
+ * Return the protocol scheme for the FileSystem.
+ *
+ * @return "obs"
+ */
+ @Override
+ public String getScheme() {
+ return "obs";
+ }
+
+ /**
+ * Return a URI whose scheme and authority identify this FileSystem.
+ *
+ * @return the URI of this filesystem.
+ */
+ @Override
+ public URI getUri() {
+ return uri;
+ }
+
+ /**
+ * Return the default port for this FileSystem.
+ *
+ * @return -1 to indicate the port is undefined, which agrees with the
+ * contract of {@link URI#getPort()}
+ */
+ @Override
+ public int getDefaultPort() {
+ return OBSConstants.OBS_DEFAULT_PORT;
+ }
+
+ /**
+ * Return the OBS client used by this filesystem.
+ *
+ * @return OBS client
+ */
+ @VisibleForTesting
+ ObsClient getObsClient() {
+ return obs;
+ }
+
+ /**
+ * Return the read ahead range used by this filesystem.
+ *
+ * @return read ahead range
+ */
+ @VisibleForTesting
+ long getReadAheadRange() {
+ return readAheadRange;
+ }
+
+ /**
+ * Return the bucket of this filesystem.
+ *
+ * @return the bucket
+ */
+ String getBucket() {
+ return bucket;
+ }
+
+ /**
+ * Check that a Path belongs to this FileSystem. Unlike the superclass, this
+ * version does not look at authority, but only hostname.
+ *
+ * @param path the path to check
+ * @throws IllegalArgumentException if there is an FS mismatch
+ */
+ @Override
+ public void checkPath(final Path path) {
+ OBSLoginHelper.checkPath(getConf(), getUri(), path, getDefaultPort());
+ }
+
+ /**
+ * Canonicalize the given URI.
+ *
+ * @param rawUri the URI to be canonicalized
+ * @return the canonicalized URI
+ */
+ @Override
+ protected URI canonicalizeUri(final URI rawUri) {
+ return OBSLoginHelper.canonicalizeUri(rawUri, getDefaultPort());
+ }
+
+ /**
+ * Open an FSDataInputStream at the indicated Path.
+ *
+ * @param f the file path to open
+ * @param bufferSize the size of the buffer to be used
+ * @return the FSDataInputStream for the file
+ * @throws IOException on any failure to open the file
+ */
+ @Override
+ public FSDataInputStream open(final Path f, final int bufferSize)
+ throws IOException {
+ LOG.debug("Opening '{}' for reading.", f);
+ final FileStatus fileStatus = getFileStatus(f);
+ if (fileStatus.isDirectory()) {
+ throw new FileNotFoundException(
+ "Can't open " + f + " because it is a directory");
+ }
+
+ return new FSDataInputStream(
+ new OBSInputStream(bucket, OBSCommonUtils.pathToKey(this, f),
+ fileStatus.getLen(),
+ obs, statistics, readAheadRange, this));
+ }
+
+ /**
+ * Create an FSDataOutputStream at the indicated Path with write-progress
+ * reporting.
+ *
+ * @param f the file path to create
+ * @param permission the permission to set
+ * @param overwrite if a file with this name already exists, then if true,
+ * the file will be overwritten, and if false an error will
+ * be thrown
+ * @param bufferSize the size of the buffer to be used
+ * @param replication required block replication for the file
+ * @param blkSize the requested block size
+ * @param progress the progress reporter
+ * @throws IOException on any failure to create the file
+ * @see #setPermission(Path, FsPermission)
+ */
+ @Override
+ public FSDataOutputStream create(
+ final Path f,
+ final FsPermission permission,
+ final boolean overwrite,
+ final int bufferSize,
+ final short replication,
+ final long blkSize,
+ final Progressable progress)
+ throws IOException {
+ String key = OBSCommonUtils.pathToKey(this, f);
+ FileStatus status;
+ long objectLen = 0;
+ try {
+ // get the status or throw an exception
+ status = getFileStatus(f);
+ objectLen = status.getLen();
+ // if the thread reaches here, there is something at the path
+ if (status.isDirectory()) {
+ // path references a directory: automatic error
+ throw new FileAlreadyExistsException(f + " is a directory");
+ }
+ if (!overwrite) {
+ // path references a file and overwrite is disabled
+ throw new FileAlreadyExistsException(f + " already exists");
+ }
+ LOG.debug("create: Overwriting file {}", f);
+ } catch (FileNotFoundException e) {
+ // this means the file is not found
+ LOG.debug("create: Creating new file {}", f);
+ }
+ return new FSDataOutputStream(
+ new OBSBlockOutputStream(
+ this,
+ key,
+ objectLen,
+ new SemaphoredDelegatingExecutor(
+ boundedMultipartUploadThreadPool,
+ blockOutputActiveBlocks, true),
+ false),
+ null);
+ }
+
+ /**
+ * Return the part size for multipart upload used by {@link
+ * OBSBlockOutputStream}.
+ *
+ * @return the part size
+ */
+ long getPartSize() {
+ return partSize;
+ }
+
+ /**
+ * Return the block factory used by {@link OBSBlockOutputStream}.
+ *
+ * @return the block factory
+ */
+ OBSDataBlocks.BlockFactory getBlockFactory() {
+ return blockFactory;
+ }
+
+ /**
+ * Return the write helper used by {@link OBSBlockOutputStream}.
+ *
+ * @return the write helper
+ */
+ OBSWriteOperationHelper getWriteHelper() {
+ return writeHelper;
+ }
+
+ /**
+ * Create an FSDataOutputStream at the indicated Path with write-progress
+ * reporting.
+ *
+ * @param f the file name to create
+ * @param permission permission of
+ * @param flags {@link CreateFlag}s to use for this stream
+ * @param bufferSize the size of the buffer to be used
+ * @param replication required block replication for the file
+ * @param blkSize block size
+ * @param progress progress
+ * @param checksumOpt check sum option
+ * @throws IOException io exception
+ */
+ @Override
+ @SuppressWarnings("checkstyle:parameternumber")
+ public FSDataOutputStream create(
+ final Path f,
+ final FsPermission permission,
+ final EnumSet
+ * If a returned status is a file, it contains the file's block locations.
+ *
+ * @param f is the path
+ * @return an iterator that traverses statuses of the files/directories in the
+ * given path
+ * @throws FileNotFoundException If As this stream seeks withing an object, it may close then re-open the
+ * stream. When this happens, any updated stream data may be retrieved, and,
+ * given the consistency model of Huawei OBS, outdated data may in fact be
+ * picked up.
+ *
+ * As a result, the outcome of reading from a stream of an object which is
+ * actively manipulated during the read process is "undefined".
+ *
+ * The class is marked as private as code should not be creating instances
+ * themselves. Any extra feature (e.g instrumentation) should be considered
+ * unstable.
+ *
+ * Because it prints some of the state of the instrumentation, the output of
+ * {@link #toString()} must also be considered unstable.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+class OBSInputStream extends FSInputStream
+ implements CanSetReadahead, ByteBufferReadable {
+ /**
+ * Class logger.
+ */
+ public static final Logger LOG = LoggerFactory.getLogger(
+ OBSInputStream.class);
+
+ /**
+ * Read retry times.
+ */
+ private static final int READ_RETRY_TIME = 3;
+
+ /**
+ * Seek retry times.
+ */
+ private static final int SEEK_RETRY_TIME = 9;
+
+ /**
+ * Delay times.
+ */
+ private static final long DELAY_TIME = 10;
+
+ /**
+ * The statistics for OBS file system.
+ */
+ private final FileSystem.Statistics statistics;
+
+ /**
+ * Obs client.
+ */
+ private final ObsClient client;
+
+ /**
+ * Bucket name.
+ */
+ private final String bucket;
+
+ /**
+ * Bucket key.
+ */
+ private final String key;
+
+ /**
+ * Content length.
+ */
+ private final long contentLength;
+
+ /**
+ * Object uri.
+ */
+ private final String uri;
+
+ /**
+ * Obs file system instance.
+ */
+ private OBSFileSystem fs;
+
+ /**
+ * This is the public position; the one set in {@link #seek(long)} and
+ * returned in {@link #getPos()}.
+ */
+ private long streamCurrentPos;
+
+ /**
+ * Closed bit. Volatile so reads are non-blocking. Updates must be in a
+ * synchronized block to guarantee an atomic check and set
+ */
+ private volatile boolean closed;
+
+ /**
+ * Input stream.
+ */
+ private InputStream wrappedStream = null;
+
+ /**
+ * Read ahead range.
+ */
+ private long readAheadRange = OBSConstants.DEFAULT_READAHEAD_RANGE;
+
+ /**
+ * This is the actual position within the object, used by lazy seek to decide
+ * whether to seek on the next read or not.
+ */
+ private long nextReadPos;
+
+ /**
+ * The end of the content range of the last request. This is an absolute value
+ * of the range, not a length field.
+ */
+ private long contentRangeFinish;
+
+ /**
+ * The start of the content range of the last request.
+ */
+ private long contentRangeStart;
+
+ OBSInputStream(
+ final String bucketName,
+ final String bucketKey,
+ final long fileStatusLength,
+ final ObsClient obsClient,
+ final FileSystem.Statistics stats,
+ final long readaheadRange,
+ final OBSFileSystem obsFileSystem) {
+ Preconditions.checkArgument(StringUtils.isNotEmpty(bucketName),
+ "No Bucket");
+ Preconditions.checkArgument(StringUtils.isNotEmpty(bucketKey),
+ "No Key");
+ Preconditions.checkArgument(fileStatusLength >= 0,
+ "Negative content length");
+ this.bucket = bucketName;
+ this.key = bucketKey;
+ this.contentLength = fileStatusLength;
+ this.client = obsClient;
+ this.statistics = stats;
+ this.uri = "obs://" + this.bucket + "/" + this.key;
+ this.fs = obsFileSystem;
+ setReadahead(readaheadRange);
+ }
+
+ /**
+ * Calculate the limit for a get request, based on input policy and state of
+ * object.
+ *
+ * @param targetPos position of the read
+ * @param length length of bytes requested; if less than zero
+ * "unknown"
+ * @param contentLength total length of file
+ * @param readahead current readahead value
+ * @return the absolute value of the limit of the request.
+ */
+ static long calculateRequestLimit(
+ final long targetPos, final long length, final long contentLength,
+ final long readahead) {
+ // cannot read past the end of the object
+ return Math.min(contentLength, length < 0 ? contentLength
+ : targetPos + Math.max(readahead, length));
+ }
+
+ /**
+ * Opens up the stream at specified target position and for given length.
+ *
+ * @param reason reason for reopen
+ * @param targetPos target position
+ * @param length length requested
+ * @throws IOException on any failure to open the object
+ */
+ private synchronized void reopen(final String reason, final long targetPos,
+ final long length)
+ throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ if (wrappedStream != null) {
+ closeStream("reopen(" + reason + ")", contentRangeFinish);
+ }
+
+ contentRangeFinish =
+ calculateRequestLimit(targetPos, length, contentLength,
+ readAheadRange);
+
+ try {
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
+ request.setRangeStart(targetPos);
+ request.setRangeEnd(contentRangeFinish);
+ if (fs.getSse().isSseCEnable()) {
+ request.setSseCHeader(fs.getSse().getSseCHeader());
+ }
+ wrappedStream = client.getObject(request).getObjectContent();
+ contentRangeStart = targetPos;
+ if (wrappedStream == null) {
+ throw new IOException(
+ "Null IO stream from reopen of (" + reason + ") " + uri);
+ }
+ } catch (ObsException e) {
+ throw translateException("Reopen at position " + targetPos, uri, e);
+ }
+
+ this.streamCurrentPos = targetPos;
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "reopen({}) for {} range[{}-{}], length={},"
+ + " streamPosition={}, nextReadPosition={}, thread={}, "
+ + "timeUsedInMilliSec={}",
+ uri,
+ reason,
+ targetPos,
+ contentRangeFinish,
+ length,
+ streamCurrentPos,
+ nextReadPos,
+ threadId,
+ endTime - startTime
+ );
+ }
+
+ @Override
+ public synchronized long getPos() {
+ return nextReadPos < 0 ? 0 : nextReadPos;
+ }
+
+ @Override
+ public synchronized void seek(final long targetPos) throws IOException {
+ checkNotClosed();
+
+ // Do not allow negative seek
+ if (targetPos < 0) {
+ throw new EOFException(
+ FSExceptionMessages.NEGATIVE_SEEK + " " + targetPos);
+ }
+
+ if (this.contentLength <= 0) {
+ return;
+ }
+
+ // Lazy seek
+ nextReadPos = targetPos;
+ }
+
+ /**
+ * Seek without raising any exception. This is for use in {@code finally}
+ * clauses
+ *
+ * @param positiveTargetPos a target position which must be positive.
+ */
+ private void seekQuietly(final long positiveTargetPos) {
+ try {
+ seek(positiveTargetPos);
+ } catch (IOException ioe) {
+ LOG.debug("Ignoring IOE on seek of {} to {}", uri,
+ positiveTargetPos, ioe);
+ }
+ }
+
+ /**
+ * Adjust the stream to a specific position.
+ *
+ * @param targetPos target seek position
+ * @throws IOException on any failure to seek
+ */
+ private void seekInStream(final long targetPos) throws IOException {
+ checkNotClosed();
+ if (wrappedStream == null) {
+ return;
+ }
+ // compute how much more to skip
+ long diff = targetPos - streamCurrentPos;
+ if (diff > 0) {
+ // forward seek -this is where data can be skipped
+
+ int available = wrappedStream.available();
+ // always seek at least as far as what is available
+ long forwardSeekRange = Math.max(readAheadRange, available);
+ // work out how much is actually left in the stream
+ // then choose whichever comes first: the range or the EOF
+ long remainingInCurrentRequest = remainingInCurrentRequest();
+
+ long forwardSeekLimit = Math.min(remainingInCurrentRequest,
+ forwardSeekRange);
+ boolean skipForward = remainingInCurrentRequest > 0
+ && diff <= forwardSeekLimit;
+ if (skipForward) {
+ // the forward seek range is within the limits
+ LOG.debug("Forward seek on {}, of {} bytes", uri, diff);
+ long skippedOnce = wrappedStream.skip(diff);
+ while (diff > 0 && skippedOnce > 0) {
+ streamCurrentPos += skippedOnce;
+ diff -= skippedOnce;
+ incrementBytesRead(skippedOnce);
+ skippedOnce = wrappedStream.skip(diff);
+ }
+
+ if (streamCurrentPos == targetPos) {
+ // all is well
+ return;
+ } else {
+ // log a warning; continue to attempt to re-open
+ LOG.info("Failed to seek on {} to {}. Current position {}",
+ uri, targetPos, streamCurrentPos);
+ }
+ }
+ } else if (diff == 0 && remainingInCurrentRequest() > 0) {
+ // targetPos == streamCurrentPos
+ // if there is data left in the stream, keep going
+ return;
+ }
+
+ // if the code reaches here, the stream needs to be reopened.
+ // close the stream; if read the object will be opened at the
+ // new streamCurrentPos
+ closeStream("seekInStream()", this.contentRangeFinish);
+ streamCurrentPos = targetPos;
+ }
+
+ @Override
+ public boolean seekToNewSource(final long targetPos) {
+ return false;
+ }
+
+ /**
+ * Perform lazy seek and adjust stream to correct position for reading.
+ *
+ * @param targetPos position from where data should be read
+ * @param len length of the content that needs to be read
+ * @throws IOException on any failure to lazy seek
+ */
+ private void lazySeek(final long targetPos, final long len)
+ throws IOException {
+ for (int i = 0; i < SEEK_RETRY_TIME; i++) {
+ try {
+ // For lazy seek
+ seekInStream(targetPos);
+
+ // re-open at specific location if needed
+ if (wrappedStream == null) {
+ reopen("read from new offset", targetPos, len);
+ }
+
+ break;
+ } catch (IOException e) {
+ if (wrappedStream != null) {
+ closeStream("lazySeek() seekInStream has exception ",
+ this.contentRangeFinish);
+ }
+ Throwable cause = e.getCause();
+ if (cause instanceof ObsException) {
+ ObsException obsException = (ObsException) cause;
+ int status = obsException.getResponseCode();
+ switch (status) {
+ case OBSCommonUtils.UNAUTHORIZED_CODE:
+ case OBSCommonUtils.FORBIDDEN_CODE:
+ case OBSCommonUtils.NOT_FOUND_CODE:
+ case OBSCommonUtils.GONE_CODE:
+ case OBSCommonUtils.EOF_CODE:
+ throw e;
+ default:
+ break;
+ }
+ }
+
+ LOG.warn("IOException occurred in lazySeek, retry: {}", i, e);
+ if (i == SEEK_RETRY_TIME - 1) {
+ throw e;
+ }
+ try {
+ Thread.sleep(DELAY_TIME);
+ } catch (InterruptedException ie) {
+ throw e;
+ }
+ }
+ }
+ }
+
+ /**
+ * Increment the bytes read counter if there is a stats instance and the
+ * number of bytes read is more than zero.
+ *
+ * @param bytesRead number of bytes read
+ */
+ private void incrementBytesRead(final long bytesRead) {
+ if (statistics != null && bytesRead > 0) {
+ statistics.incrementBytesRead(bytesRead);
+ }
+ }
+
+ private void sleepInLock() throws InterruptedException {
+ long start = System.currentTimeMillis();
+ long now = start;
+ while (now - start < OBSInputStream.DELAY_TIME) {
+ wait(start + OBSInputStream.DELAY_TIME - now);
+ now = System.currentTimeMillis();
+ }
+ }
+
+ @Override
+ public synchronized int read() throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ checkNotClosed();
+ if (this.contentLength == 0 || nextReadPos >= contentLength) {
+ return -1;
+ }
+
+ int byteRead = -1;
+ try {
+ lazySeek(nextReadPos, 1);
+ } catch (EOFException e) {
+ onReadFailure(e, 1);
+ return -1;
+ }
+
+ IOException exception = null;
+ for (int retryTime = 1; retryTime <= READ_RETRY_TIME; retryTime++) {
+ try {
+ byteRead = wrappedStream.read();
+ exception = null;
+ break;
+ } catch (EOFException e) {
+ onReadFailure(e, 1);
+ return -1;
+ } catch (IOException e) {
+ exception = e;
+ onReadFailure(e, 1);
+ LOG.warn(
+ "read of [{}] failed, retry time[{}], due to exception[{}]",
+ uri, retryTime, exception);
+ if (retryTime < READ_RETRY_TIME) {
+ try {
+ sleepInLock();
+ } catch (InterruptedException ie) {
+ LOG.error(
+ "read of [{}] failed, retry time[{}], due to "
+ + "exception[{}]",
+ uri, retryTime,
+ exception);
+ throw exception;
+ }
+ }
+ }
+ }
+
+ if (exception != null) {
+ LOG.error(
+ "read of [{}] failed, retry time[{}], due to exception[{}]",
+ uri, READ_RETRY_TIME, exception);
+ throw exception;
+ }
+
+ if (byteRead >= 0) {
+ streamCurrentPos++;
+ nextReadPos++;
+ }
+
+ if (byteRead >= 0) {
+ incrementBytesRead(1);
+ }
+
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "read-0arg uri:{}, contentLength:{}, position:{}, readValue:{}, "
+ + "thread:{}, timeUsedMilliSec:{}",
+ uri, contentLength, byteRead >= 0 ? nextReadPos - 1 : nextReadPos,
+ byteRead, threadId,
+ endTime - startTime);
+ return byteRead;
+ }
+
+ /**
+ * Handle an IOE on a read by attempting to re-open the stream. The
+ * filesystem's readException count will be incremented.
+ *
+ * @param ioe exception caught.
+ * @param length length of data being attempted to read
+ * @throws IOException any exception thrown on the re-open attempt.
+ */
+ private void onReadFailure(final IOException ioe, final int length)
+ throws IOException {
+ LOG.debug(
+ "Got exception while trying to read from stream {}"
+ + " trying to recover: " + ioe, uri);
+ int i = 1;
+ while (true) {
+ try {
+ reopen("failure recovery", streamCurrentPos, length);
+ return;
+ } catch (OBSIOException e) {
+ LOG.warn(
+ "OBSIOException occurred in reopen for failure recovery, "
+ + "the {} retry time",
+ i, e);
+ if (i == READ_RETRY_TIME) {
+ throw e;
+ }
+ try {
+ Thread.sleep(DELAY_TIME);
+ } catch (InterruptedException ie) {
+ throw e;
+ }
+ }
+ i++;
+ }
+ }
+
+ @Override
+ public synchronized int read(final ByteBuffer byteBuffer)
+ throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ LOG.debug("read byteBuffer: {}", byteBuffer.toString());
+ checkNotClosed();
+
+ int len = byteBuffer.remaining();
+ if (len == 0) {
+ return 0;
+ }
+
+ byte[] buf = new byte[len];
+
+ if (this.contentLength == 0 || nextReadPos >= contentLength) {
+ return -1;
+ }
+
+ try {
+ lazySeek(nextReadPos, len);
+ } catch (EOFException e) {
+ onReadFailure(e, len);
+ // the end of the file has moved
+ return -1;
+ }
+
+ int bytesRead = 0;
+ IOException exception = null;
+ for (int retryTime = 1; retryTime <= READ_RETRY_TIME; retryTime++) {
+ try {
+ bytesRead = tryToReadFromInputStream(wrappedStream, buf, 0,
+ len);
+ if (bytesRead == -1) {
+ return -1;
+ }
+ exception = null;
+ break;
+ } catch (EOFException e) {
+ onReadFailure(e, len);
+ return -1;
+ } catch (IOException e) {
+ exception = e;
+ onReadFailure(e, len);
+ LOG.warn(
+ "read len[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}]",
+ len, uri, retryTime, exception);
+ if (retryTime < READ_RETRY_TIME) {
+ try {
+ sleepInLock();
+ } catch (InterruptedException ie) {
+ LOG.error(
+ "read len[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}]",
+ len, uri, retryTime, exception);
+ throw exception;
+ }
+ }
+ }
+ }
+
+ if (exception != null) {
+ LOG.error(
+ "read len[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}]",
+ len, uri, READ_RETRY_TIME, exception);
+ throw exception;
+ }
+
+ if (bytesRead > 0) {
+ streamCurrentPos += bytesRead;
+ nextReadPos += bytesRead;
+ byteBuffer.put(buf, 0, bytesRead);
+ }
+ incrementBytesRead(bytesRead);
+
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "Read-ByteBuffer uri:{}, contentLength:{}, destLen:{}, readLen:{}, "
+ + "position:{}, thread:{}, timeUsedMilliSec:{}",
+ uri, contentLength, len, bytesRead,
+ bytesRead >= 0 ? nextReadPos - bytesRead : nextReadPos, threadId,
+ endTime - startTime);
+ return bytesRead;
+ }
+
+ private int tryToReadFromInputStream(final InputStream in, final byte[] buf,
+ final int off, final int len) throws IOException {
+ int bytesRead = 0;
+ while (bytesRead < len) {
+ int bytes = in.read(buf, off + bytesRead, len - bytesRead);
+ if (bytes == -1) {
+ if (bytesRead == 0) {
+ return -1;
+ } else {
+ break;
+ }
+ }
+ bytesRead += bytes;
+ }
+
+ return bytesRead;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * This updates the statistics on read operations started and whether or
+ * not the read operation "completed", that is: returned the exact number of
+ * bytes requested.
+ *
+ * @throws IOException if there are other problems
+ */
+ @Override
+ public synchronized int read(@NotNull final byte[] buf, final int off,
+ final int len) throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ checkNotClosed();
+ validatePositionedReadArgs(nextReadPos, buf, off, len);
+ if (len == 0) {
+ return 0;
+ }
+
+ if (this.contentLength == 0 || nextReadPos >= contentLength) {
+ return -1;
+ }
+
+ try {
+ lazySeek(nextReadPos, len);
+ } catch (EOFException e) {
+ onReadFailure(e, len);
+ // the end of the file has moved
+ return -1;
+ }
+
+ int bytesRead = 0;
+ IOException exception = null;
+ for (int retryTime = 1; retryTime <= READ_RETRY_TIME; retryTime++) {
+ try {
+ bytesRead = tryToReadFromInputStream(wrappedStream, buf, off,
+ len);
+ if (bytesRead == -1) {
+ return -1;
+ }
+ exception = null;
+ break;
+ } catch (EOFException e) {
+ onReadFailure(e, len);
+ return -1;
+ } catch (IOException e) {
+ exception = e;
+ onReadFailure(e, len);
+ LOG.warn(
+ "read offset[{}] len[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}]",
+ off, len, uri, retryTime, exception);
+ if (retryTime < READ_RETRY_TIME) {
+ try {
+ sleepInLock();
+ } catch (InterruptedException ie) {
+ LOG.error(
+ "read offset[{}] len[{}] of [{}] failed, "
+ + "retry time[{}], due to exception[{}]",
+ off, len, uri, retryTime, exception);
+ throw exception;
+ }
+ }
+ }
+ }
+
+ if (exception != null) {
+ LOG.error(
+ "read offset[{}] len[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}]",
+ off, len, uri, READ_RETRY_TIME, exception);
+ throw exception;
+ }
+
+ if (bytesRead > 0) {
+ streamCurrentPos += bytesRead;
+ nextReadPos += bytesRead;
+ }
+ incrementBytesRead(bytesRead);
+
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "Read-3args uri:{}, contentLength:{}, destLen:{}, readLen:{}, "
+ + "position:{}, thread:{}, timeUsedMilliSec:{}",
+ uri, contentLength, len, bytesRead,
+ bytesRead >= 0 ? nextReadPos - bytesRead : nextReadPos, threadId,
+ endTime - startTime);
+ return bytesRead;
+ }
+
+ /**
+ * Verify that the input stream is open. Non blocking; this gives the last
+ * state of the volatile {@link #closed} field.
+ *
+ * @throws IOException if the connection is closed.
+ */
+ private void checkNotClosed() throws IOException {
+ if (closed) {
+ throw new IOException(
+ uri + ": " + FSExceptionMessages.STREAM_IS_CLOSED);
+ }
+ }
+
+ /**
+ * Close the stream. This triggers publishing of the stream statistics back to
+ * the filesystem statistics. This operation is synchronized, so that only one
+ * thread can attempt to close the connection; all later/blocked calls are
+ * no-ops.
+ *
+ * @throws IOException on any problem
+ */
+ @Override
+ public synchronized void close() throws IOException {
+ if (!closed) {
+ closed = true;
+ // close or abort the stream
+ closeStream("close() operation", this.contentRangeFinish);
+ // this is actually a no-op
+ super.close();
+ }
+ }
+
+ /**
+ * Close a stream: decide whether to abort or close, based on the length of
+ * the stream and the current position. If a close() is attempted and fails,
+ * the operation escalates to an abort.
+ *
+ * This does not set the {@link #closed} flag.
+ *
+ * @param reason reason for stream being closed; used in messages
+ * @param length length of the stream
+ * @throws IOException on any failure to close stream
+ */
+ private synchronized void closeStream(final String reason,
+ final long length)
+ throws IOException {
+ if (wrappedStream != null) {
+ try {
+ wrappedStream.close();
+ } catch (IOException e) {
+ // exception escalates to an abort
+ LOG.debug("When closing {} stream for {}", uri, reason, e);
+ throw e;
+ }
+
+ LOG.debug(
+ "Stream {} : {}; streamPos={}, nextReadPos={},"
+ + " request range {}-{} length={}",
+ uri,
+ reason,
+ streamCurrentPos,
+ nextReadPos,
+ contentRangeStart,
+ contentRangeFinish,
+ length);
+ wrappedStream = null;
+ }
+ }
+
+ @Override
+ public synchronized int available() throws IOException {
+ checkNotClosed();
+
+ long remaining = remainingInFile();
+ if (remaining > Integer.MAX_VALUE) {
+ return Integer.MAX_VALUE;
+ }
+ return (int) remaining;
+ }
+
+ /**
+ * Bytes left in stream.
+ *
+ * @return how many bytes are left to read
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ public synchronized long remainingInFile() {
+ return this.contentLength - this.streamCurrentPos;
+ }
+
+ /**
+ * Bytes left in the current request. Only valid if there is an active
+ * request.
+ *
+ * @return how many bytes are left to read in the current GET.
+ */
+ @InterfaceAudience.Private
+ @InterfaceStability.Unstable
+ public synchronized long remainingInCurrentRequest() {
+ return this.contentRangeFinish - this.streamCurrentPos;
+ }
+
+ @Override
+ public boolean markSupported() {
+ return false;
+ }
+
+ /**
+ * String value includes statistics as well as stream state. Important:
+ * there are no guarantees as to the stability of this value.
+ *
+ * @return a string value for printing in logs/diagnostics
+ */
+ @Override
+ @InterfaceStability.Unstable
+ public String toString() {
+ synchronized (this) {
+ return "OBSInputStream{" + uri
+ + " wrappedStream=" + (wrappedStream != null
+ ? "open"
+ : "closed")
+ + " streamCurrentPos=" + streamCurrentPos
+ + " nextReadPos=" + nextReadPos
+ + " contentLength=" + contentLength
+ + " contentRangeStart=" + contentRangeStart
+ + " contentRangeFinish=" + contentRangeFinish
+ + " remainingInCurrentRequest=" + remainingInCurrentRequest()
+ + '}';
+ }
+ }
+
+ /**
+ * Subclass {@code readFully()} operation which only seeks at the start of the
+ * series of operations; seeking back at the end.
+ *
+ * This is significantly higher performance if multiple read attempts
+ * are needed to fetch the data, as it does not break the HTTP connection.
+ *
+ * To maintain thread safety requirements, this operation is
+ * synchronized for the duration of the sequence. {@inheritDoc}
+ */
+ @Override
+ public void readFully(final long position, final byte[] buffer,
+ final int offset,
+ final int length)
+ throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ checkNotClosed();
+ validatePositionedReadArgs(position, buffer, offset, length);
+ if (length == 0) {
+ return;
+ }
+ int nread = 0;
+ synchronized (this) {
+ long oldPos = getPos();
+ try {
+ seek(position);
+ while (nread < length) {
+ int nbytes = read(buffer, offset + nread, length - nread);
+ if (nbytes < 0) {
+ throw new EOFException(
+ FSExceptionMessages.EOF_IN_READ_FULLY);
+ }
+ nread += nbytes;
+ }
+ } finally {
+ seekQuietly(oldPos);
+ }
+ }
+
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "ReadFully uri:{}, contentLength:{}, destLen:{}, readLen:{}, "
+ + "position:{}, thread:{}, timeUsedMilliSec:{}",
+ uri, contentLength, length, nread, position, threadId,
+ endTime - startTime);
+ }
+
+ /**
+ * Read bytes starting from the specified position.
+ *
+ * @param position start read from this position
+ * @param buffer read buffer
+ * @param offset offset into buffer
+ * @param length number of bytes to read
+ * @return actual number of bytes read
+ * @throws IOException on any failure to read
+ */
+ @Override
+ public int read(final long position, final byte[] buffer, final int offset,
+ final int length)
+ throws IOException {
+ int len = length;
+ checkNotClosed();
+ validatePositionedReadArgs(position, buffer, offset, len);
+ if (position < 0 || position >= contentLength) {
+ return -1;
+ }
+ if ((position + len) > contentLength) {
+ len = (int) (contentLength - position);
+ }
+
+ if (fs.isReadTransformEnabled()) {
+ return super.read(position, buffer, offset, len);
+ }
+
+ return randomReadWithNewInputStream(position, buffer, offset, len);
+ }
+
+ private int randomReadWithNewInputStream(final long position,
+ final byte[] buffer, final int offset, final int length)
+ throws IOException {
+ long startTime = System.currentTimeMillis();
+ long threadId = Thread.currentThread().getId();
+ int bytesRead = 0;
+ InputStream inputStream = null;
+ IOException exception = null;
+ GetObjectRequest request = new GetObjectRequest(bucket, key);
+ request.setRangeStart(position);
+ request.setRangeEnd(position + length);
+ if (fs.getSse().isSseCEnable()) {
+ request.setSseCHeader(fs.getSse().getSseCHeader());
+ }
+
+ for (int retryTime = 1; retryTime <= READ_RETRY_TIME; retryTime++) {
+ try {
+ inputStream = client.getObject(request).getObjectContent();
+ if (inputStream == null) {
+ break;
+ }
+ bytesRead = tryToReadFromInputStream(inputStream, buffer,
+ offset, length);
+ if (bytesRead == -1) {
+ return -1;
+ }
+
+ exception = null;
+ break;
+ } catch (ObsException | IOException e) {
+ if (e instanceof ObsException) {
+ exception = translateException(
+ "Read at position " + position, uri, (ObsException) e);
+ } else {
+ exception = (IOException) e;
+ }
+ LOG.warn(
+ "read position[{}] destLen[{}] offset[{}] readLen[{}] "
+ + "of [{}] failed, retry time[{}], due to "
+ + "exception[{}] e[{}]",
+ position, length, offset, bytesRead, uri, retryTime,
+ exception, e);
+ if (retryTime < READ_RETRY_TIME) {
+ try {
+ Thread.sleep(DELAY_TIME);
+ } catch (InterruptedException ie) {
+ LOG.error(
+ "read position[{}] destLen[{}] offset[{}] "
+ + "readLen[{}] of [{}] failed, retry time[{}], "
+ + "due to exception[{}] e[{}]",
+ position, length, offset, bytesRead, uri, retryTime,
+ exception, e);
+ throw exception;
+ }
+ }
+ } finally {
+ if (inputStream != null) {
+ inputStream.close();
+ }
+ }
+ }
+
+ if (inputStream == null || exception != null) {
+ LOG.error(
+ "read position[{}] destLen[{}] offset[{}] len[{}] failed, "
+ + "retry time[{}], due to exception[{}]",
+ position, length, offset, bytesRead, READ_RETRY_TIME,
+ exception);
+ throw new IOException("read failed of " + uri + ", inputStream is "
+ + (inputStream == null ? "null" : "not null"), exception);
+
+ }
+
+ long endTime = System.currentTimeMillis();
+ LOG.debug(
+ "Read-4args uri:{}, contentLength:{}, destLen:{}, readLen:{}, "
+ + "position:{}, thread:{}, timeUsedMilliSec:{}",
+ uri, contentLength, length, bytesRead, position, threadId,
+ endTime - startTime);
+ return bytesRead;
+ }
+
+ @Override
+ public synchronized void setReadahead(final Long newReadaheadRange) {
+ if (newReadaheadRange == null) {
+ this.readAheadRange = OBSConstants.DEFAULT_READAHEAD_RANGE;
+ } else {
+ Preconditions.checkArgument(newReadaheadRange >= 0,
+ "Negative readahead value");
+ this.readAheadRange = newReadaheadRange;
+ }
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSListing.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSListing.java
new file mode 100644
index 0000000000000..4072feb2cac9d
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSListing.java
@@ -0,0 +1,656 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import com.obs.services.exception.ObsException;
+import com.obs.services.model.ListObjectsRequest;
+import com.obs.services.model.ObjectListing;
+import com.obs.services.model.ObsObject;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+/**
+ * OBS listing implementation.
+ */
+class OBSListing {
+ /**
+ * A Path filter which accepts all filenames.
+ */
+ static final PathFilter ACCEPT_ALL =
+ new PathFilter() {
+ @Override
+ public boolean accept(final Path file) {
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return "ACCEPT_ALL";
+ }
+ };
+
+ /**
+ * Class logger.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(OBSListing.class);
+
+ /**
+ * OBS File System instance.
+ */
+ private final OBSFileSystem owner;
+
+ OBSListing(final OBSFileSystem ownerFS) {
+ this.owner = ownerFS;
+ }
+
+ /**
+ * Create a FileStatus iterator against a path, with a given list object
+ * request.
+ *
+ * @param listPath path of the listing
+ * @param request initial request to make
+ * @param filter the filter on which paths to accept
+ * @param acceptor the class/predicate to decide which entries to accept in
+ * the listing based on the full file status.
+ * @return the iterator
+ * @throws IOException IO Problems
+ */
+ FileStatusListingIterator createFileStatusListingIterator(
+ final Path listPath,
+ final ListObjectsRequest request,
+ final PathFilter filter,
+ final FileStatusAcceptor acceptor)
+ throws IOException {
+ return new FileStatusListingIterator(
+ new ObjectListingIterator(listPath, request), filter, acceptor);
+ }
+
+ /**
+ * Create a located status iterator over a file status iterator.
+ *
+ * @param statusIterator an iterator over the remote status entries
+ * @return a new remote iterator
+ */
+ LocatedFileStatusIterator createLocatedFileStatusIterator(
+ final RemoteIterator If the status value is null, the iterator declares that it has no
+ * data. This iterator is used to handle
+ * {@link OBSFileSystem#listStatus(Path)}calls where the path handed in
+ * refers to a file, not a directory: this is
+ * the iterator returned.
+ */
+ static final class SingleStatusRemoteIterator
+ implements RemoteIterator This is a complex operation, especially the process to determine if
+ * there are more entries remaining. If there are no more results remaining in
+ * the (filtered) results of the current listing request, then another request
+ * is made
+ * and those results filtered before the iterator can declare that
+ * there is more data available.
+ *
+ * The need to filter the results precludes the iterator from simply
+ * declaring that if the {@link ObjectListingIterator#hasNext()} is true then
+ * there are more results. Instead the next batch of results must be retrieved
+ * and filtered.
+ *
+ * What does this mean? It means that remote requests to retrieve new
+ * batches of object listings are made in the {@link #hasNext()} call; the
+ * {@link #next()} call simply returns the filtered results of the last
+ * listing processed. However, do note that {@link #next()} calls {@link
+ * #hasNext()} during its operation. This is critical to ensure that a listing
+ * obtained through a sequence of {@link #next()} will complete with the same
+ * set of results as a classic {@code while(it.hasNext()} loop.
+ *
+ * Thread safety: None.
+ */
+ class FileStatusListingIterator implements RemoteIterator That is:
+ *
+ * 1. The first invocation of the {@link #next()} call will return the
+ * results of the first request, the one created during the construction of
+ * the instance.
+ *
+ * 2. Second and later invocations will continue the ongoing listing,
+ * calling {@link OBSCommonUtils#continueListObjects} to request the next
+ * batch of results.
+ *
+ * 3. The {@link #hasNext()} predicate returns true for the initial call,
+ * where {@link #next()} will return the initial results. It declares that it
+ * has future results iff the last executed request was truncated.
+ *
+ * Thread safety: none.
+ */
+ class ObjectListingIterator implements RemoteIterator This strips out login information.
+ *
+ * @param uri the URI to canonicalize
+ * @param defaultPort default port to use in canonicalized URI if the input
+ * URI has no port and this value is greater than 0
+ * @return a new, canonicalized URI.
+ */
+ public static URI canonicalizeUri(final URI uri, final int defaultPort) {
+ URI newUri = uri;
+ if (uri.getPort() == -1 && defaultPort > 0) {
+ // reconstruct the uri with the default port set
+ try {
+ newUri =
+ new URI(
+ newUri.getScheme(),
+ null,
+ newUri.getHost(),
+ defaultPort,
+ newUri.getPath(),
+ newUri.getQuery(),
+ newUri.getFragment());
+ } catch (URISyntaxException e) {
+ // Should never happen!
+ throw new AssertionError(
+ "Valid URI became unparseable: " + newUri);
+ }
+ }
+
+ return newUri;
+ }
+
+ /**
+ * Check the path, ignoring authentication details. See {@link
+ * OBSFileSystem#checkPath(Path)} for the operation of this.
+ *
+ * Essentially
+ *
+ *
+ * That all originates in the core FS; the sole change here being to use
+ * {@link URI#getHost()}over {@link URI#getAuthority()}. Some of that code
+ * looks a relic of the code anti-pattern of using "hdfs:file.txt" to define
+ * the path without declaring the hostname. It's retained for compatibility.
+ *
+ * @param conf FS configuration
+ * @param fsUri the FS URI
+ * @param path path to check
+ * @param defaultPort default port of FS
+ */
+ public static void checkPath(final Configuration conf, final URI fsUri,
+ final Path path, final int defaultPort) {
+ URI pathUri = path.toUri();
+ String thatScheme = pathUri.getScheme();
+ if (thatScheme == null) {
+ // fs is relative
+ return;
+ }
+ URI thisUri = canonicalizeUri(fsUri, defaultPort);
+ String thisScheme = thisUri.getScheme();
+ // hostname and scheme are not case sensitive in these checks
+ if (equalsIgnoreCase(thisScheme, thatScheme)) { // schemes match
+ String thisHost = thisUri.getHost();
+ String thatHost = pathUri.getHost();
+ if (thatHost == null
+ && // path's host is null
+ thisHost != null) { // fs has a host
+ URI defaultUri = FileSystem.getDefaultUri(conf);
+ if (equalsIgnoreCase(thisScheme, defaultUri.getScheme())) {
+ pathUri
+ = defaultUri; // schemes match, so use this uri instead
+ } else {
+ pathUri = null; // can't determine auth of the path
+ }
+ }
+ if (pathUri != null) {
+ // canonicalize uri before comparing with this fs
+ pathUri = canonicalizeUri(pathUri, defaultPort);
+ thatHost = pathUri.getHost();
+ if (equalsIgnoreCase(thisHost, thatHost)) {
+ return;
+ }
+ }
+ }
+ // make sure the exception strips out any auth details
+ throw new IllegalArgumentException(
+ "Wrong FS " + OBSLoginHelper.toString(pathUri) + " -expected "
+ + fsUri);
+ }
+
+ /**
+ * Simple tuple of login details.
+ */
+ public static class Login {
+ /**
+ * Defined empty login instance.
+ */
+ public static final Login EMPTY = new Login();
+
+ /**
+ * Defined user name.
+ */
+ private final String user;
+
+ /**
+ * Defined password.
+ */
+ private final String password;
+
+ /**
+ * Login token.
+ */
+ private final String token;
+
+ /**
+ * Create an instance with no login details. Calls to {@link #hasLogin()}
+ * return false.
+ */
+ Login() {
+ this("", "");
+ }
+
+ Login(final String userName, final String passwd) {
+ this(userName, passwd, null);
+ }
+
+ Login(final String userName, final String passwd,
+ final String sessionToken) {
+ this.user = userName;
+ this.password = passwd;
+ this.token = sessionToken;
+ }
+
+ /**
+ * Predicate to verify login details are defined.
+ *
+ * @return true if the username is defined (not null, not empty).
+ */
+ public boolean hasLogin() {
+ return StringUtils.isNotEmpty(user);
+ }
+
+ /**
+ * Equality test matches user and password.
+ *
+ * @param o other object
+ * @return true if the objects are considered equivalent.
+ */
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Login that = (Login) o;
+ return Objects.equals(user, that.user) && Objects.equals(password,
+ that.password);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(user, password);
+ }
+
+ public String getUser() {
+ return user;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public String getToken() {
+ return token;
+ }
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSObjectBucketUtils.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSObjectBucketUtils.java
new file mode 100644
index 0000000000000..e632f61ca2db7
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSObjectBucketUtils.java
@@ -0,0 +1,892 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import com.obs.services.exception.ObsException;
+import com.obs.services.model.AbortMultipartUploadRequest;
+import com.obs.services.model.CompleteMultipartUploadRequest;
+import com.obs.services.model.CopyObjectRequest;
+import com.obs.services.model.CopyObjectResult;
+import com.obs.services.model.CopyPartRequest;
+import com.obs.services.model.CopyPartResult;
+import com.obs.services.model.DeleteObjectsRequest;
+import com.obs.services.model.GetObjectMetadataRequest;
+import com.obs.services.model.InitiateMultipartUploadRequest;
+import com.obs.services.model.InitiateMultipartUploadResult;
+import com.obs.services.model.KeyAndVersion;
+import com.obs.services.model.ListObjectsRequest;
+import com.obs.services.model.ObjectListing;
+import com.obs.services.model.ObjectMetadata;
+import com.obs.services.model.ObsObject;
+import com.obs.services.model.PartEtag;
+import com.obs.services.model.PutObjectRequest;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.ParentNotDirectoryException;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InterruptedIOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+/**
+ * Object bucket specific utils for {@link OBSFileSystem}.
+ */
+final class OBSObjectBucketUtils {
+ /**
+ * Class logger.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(
+ OBSObjectBucketUtils.class);
+
+ private OBSObjectBucketUtils() {
+
+ }
+
+ /**
+ * The inner rename operation.
+ *
+ * @param owner OBS File System instance
+ * @param src path to be renamed
+ * @param dst new path after rename
+ * @return boolean
+ * @throws RenameFailedException if some criteria for a state changing rename
+ * was not met. This means work didn't happen;
+ * it's not something which is reported upstream
+ * to the FileSystem APIs, for which the
+ * semantics of "false" are pretty vague.
+ * @throws FileNotFoundException there's no source file.
+ * @throws IOException on IO failure.
+ * @throws ObsException on failures inside the OBS SDK
+ */
+ static boolean renameBasedOnObject(final OBSFileSystem owner,
+ final Path src, final Path dst) throws RenameFailedException,
+ FileNotFoundException, IOException,
+ ObsException {
+ String srcKey = OBSCommonUtils.pathToKey(owner, src);
+ String dstKey = OBSCommonUtils.pathToKey(owner, dst);
+
+ if (srcKey.isEmpty()) {
+ LOG.error("rename: src [{}] is root directory", src);
+ throw new IOException(src + " is root directory");
+ }
+
+ // get the source file status; this raises a FNFE if there is no source
+ // file.
+ FileStatus srcStatus = owner.getFileStatus(src);
+
+ FileStatus dstStatus;
+ try {
+ dstStatus = owner.getFileStatus(dst);
+ // if there is no destination entry, an exception is raised.
+ // hence this code sequence can assume that there is something
+ // at the end of the path; the only detail being what it is and
+ // whether or not it can be the destination of the rename.
+ if (dstStatus.isDirectory()) {
+ String newDstKey = OBSCommonUtils.maybeAddTrailingSlash(dstKey);
+ String filename = srcKey.substring(
+ OBSCommonUtils.pathToKey(owner, src.getParent()).length()
+ + 1);
+ newDstKey = newDstKey + filename;
+ dstKey = newDstKey;
+ dstStatus = owner.getFileStatus(
+ OBSCommonUtils.keyToPath(dstKey));
+ if (dstStatus.isDirectory()) {
+ throw new RenameFailedException(src, dst,
+ "new destination is an existed directory")
+ .withExitCode(false);
+ } else {
+ throw new RenameFailedException(src, dst,
+ "new destination is an existed file")
+ .withExitCode(false);
+ }
+ } else {
+
+ if (srcKey.equals(dstKey)) {
+ LOG.warn(
+ "rename: src and dest refer to the same file or"
+ + " directory: {}",
+ dst);
+ return true;
+ } else {
+ throw new RenameFailedException(src, dst,
+ "destination is an existed file")
+ .withExitCode(false);
+ }
+ }
+ } catch (FileNotFoundException e) {
+ LOG.debug("rename: destination path {} not found", dst);
+
+ // Parent must exist
+ checkDestinationParent(owner, src, dst);
+ }
+
+ if (dstKey.startsWith(srcKey)
+ && dstKey.charAt(srcKey.length()) == Path.SEPARATOR_CHAR) {
+ LOG.error("rename: dest [{}] cannot be a descendant of src [{}]",
+ dst, src);
+ return false;
+ }
+
+ // Ok! Time to start
+ if (srcStatus.isFile()) {
+ LOG.debug("rename: renaming file {} to {}", src, dst);
+
+ renameFile(owner, srcKey, dstKey, srcStatus);
+ } else {
+ LOG.debug("rename: renaming directory {} to {}", src, dst);
+
+ // This is a directory to directory copy
+ dstKey = OBSCommonUtils.maybeAddTrailingSlash(dstKey);
+ srcKey = OBSCommonUtils.maybeAddTrailingSlash(srcKey);
+
+ renameFolder(owner, srcKey, dstKey);
+ }
+
+ if (src.getParent() != dst.getParent()) {
+ // deleteUnnecessaryFakeDirectories(dst.getParent());
+ createFakeDirectoryIfNecessary(owner, src.getParent());
+ }
+
+ return true;
+ }
+
+ private static void checkDestinationParent(final OBSFileSystem owner,
+ final Path src,
+ final Path dst) throws IOException {
+ Path parent = dst.getParent();
+ if (!OBSCommonUtils.pathToKey(owner, parent).isEmpty()) {
+ try {
+ FileStatus dstParentStatus = owner.getFileStatus(
+ dst.getParent());
+ if (!dstParentStatus.isDirectory()) {
+ throw new ParentNotDirectoryException(
+ "destination parent [" + dst.getParent()
+ + "] is not a directory");
+ }
+ } catch (FileNotFoundException e2) {
+ throw new RenameFailedException(src, dst,
+ "destination has no parent ");
+ }
+ }
+ }
+
+ /**
+ * Implement rename file.
+ *
+ * @param owner OBS File System instance
+ * @param srcKey source object key
+ * @param dstKey destination object key
+ * @param srcStatus source object status
+ * @throws IOException any problem with rename operation
+ */
+ private static void renameFile(final OBSFileSystem owner,
+ final String srcKey,
+ final String dstKey,
+ final FileStatus srcStatus)
+ throws IOException {
+ long startTime = System.nanoTime();
+
+ copyFile(owner, srcKey, dstKey, srcStatus.getLen());
+ objectDelete(owner, srcStatus, false);
+
+ if (LOG.isDebugEnabled()) {
+ long delay = System.nanoTime() - startTime;
+ LOG.debug("OBSFileSystem rename: "
+ + ", {src="
+ + srcKey
+ + ", dst="
+ + dstKey
+ + ", delay="
+ + delay
+ + "}");
+ }
+ }
+
+ static boolean objectDelete(final OBSFileSystem owner,
+ final FileStatus status,
+ final boolean recursive) throws IOException {
+ Path f = status.getPath();
+ String key = OBSCommonUtils.pathToKey(owner, f);
+
+ if (status.isDirectory()) {
+ LOG.debug("delete: Path is a directory: {} - recursive {}", f,
+ recursive);
+
+ key = OBSCommonUtils.maybeAddTrailingSlash(key);
+ if (!key.endsWith("/")) {
+ key = key + "/";
+ }
+
+ boolean isEmptyDir = OBSCommonUtils.isFolderEmpty(owner, key);
+ if (key.equals("/")) {
+ return OBSCommonUtils.rejectRootDirectoryDelete(
+ owner.getBucket(), isEmptyDir, recursive);
+ }
+
+ if (!recursive && !isEmptyDir) {
+ throw new PathIsNotEmptyDirectoryException(f.toString());
+ }
+
+ if (isEmptyDir) {
+ LOG.debug(
+ "delete: Deleting fake empty directory {} - recursive {}",
+ f, recursive);
+ OBSCommonUtils.deleteObject(owner, key);
+ } else {
+ LOG.debug(
+ "delete: Deleting objects for directory prefix {} "
+ + "- recursive {}",
+ f, recursive);
+ deleteNonEmptyDir(owner, recursive, key);
+ }
+
+ } else {
+ LOG.debug("delete: Path is a file");
+ OBSCommonUtils.deleteObject(owner, key);
+ }
+
+ Path parent = f.getParent();
+ if (parent != null) {
+ createFakeDirectoryIfNecessary(owner, parent);
+ }
+ return true;
+ }
+
+ /**
+ * Implement rename folder.
+ *
+ * @param owner OBS File System instance
+ * @param srcKey source folder key
+ * @param dstKey destination folder key
+ * @throws IOException any problem with rename folder
+ */
+ static void renameFolder(final OBSFileSystem owner, final String srcKey,
+ final String dstKey)
+ throws IOException {
+ long startTime = System.nanoTime();
+
+ List It hides direct access to the OBS API from the output stream, and is a
+ * location where the object upload process can be evolved/enhanced.
+ *
+ * Features
+ *
+ *
+ * Each instance of this state is unique to a single output stream.
+ */
+class OBSWriteOperationHelper {
+ /**
+ * Class logger.
+ */
+ public static final Logger LOG = LoggerFactory.getLogger(
+ OBSWriteOperationHelper.class);
+
+ /**
+ * Part number of the multipart task.
+ */
+ static final int PART_NUMBER = 10000;
+
+ /**
+ * Owning filesystem.
+ */
+ private final OBSFileSystem owner;
+
+ /**
+ * Bucket of the owner FS.
+ */
+ private final String bucket;
+
+ /**
+ * Define obs client.
+ */
+ private final ObsClient obs;
+
+ protected OBSWriteOperationHelper(final OBSFileSystem fs) {
+ this.owner = fs;
+ this.bucket = fs.getBucket();
+ this.obs = fs.getObsClient();
+ }
+
+ /**
+ * Create a {@link PutObjectRequest} request. If {@code length} is set, the
+ * metadata is configured with the size of the upload.
+ *
+ * @param destKey key of object
+ * @param inputStream source data
+ * @param length size, if known. Use -1 for not known
+ * @return the request
+ */
+ PutObjectRequest newPutRequest(final String destKey,
+ final InputStream inputStream,
+ final long length) {
+ return OBSCommonUtils.newPutObjectRequest(owner, destKey,
+ newObjectMetadata(length), inputStream);
+ }
+
+ /**
+ * Create a {@link PutObjectRequest} request to upload a file.
+ *
+ * @param destKey object key for request
+ * @param sourceFile source file
+ * @return the request
+ */
+ PutObjectRequest newPutRequest(final String destKey,
+ final File sourceFile) {
+ int length = (int) sourceFile.length();
+ return OBSCommonUtils.newPutObjectRequest(owner, destKey,
+ newObjectMetadata(length), sourceFile);
+ }
+
+ /**
+ * Callback on a successful write.
+ *
+ * @param destKey object key
+ */
+ void writeSuccessful(final String destKey) {
+ LOG.debug("Finished write to {}", destKey);
+ }
+
+ /**
+ * Create a new object metadata instance. Any standard metadata headers are
+ * added here, for example: encryption.
+ *
+ * @param length size, if known. Use -1 for not known
+ * @return a new metadata instance
+ */
+ public ObjectMetadata newObjectMetadata(final long length) {
+ return OBSObjectBucketUtils.newObjectMetadata(length);
+ }
+
+ /**
+ * Start the multipart upload process.
+ *
+ * @param destKey object key
+ * @return the upload result containing the ID
+ * @throws IOException IO problem
+ */
+ String initiateMultiPartUpload(final String destKey) throws IOException {
+ LOG.debug("Initiating Multipart upload");
+ final InitiateMultipartUploadRequest initiateMPURequest =
+ new InitiateMultipartUploadRequest(bucket, destKey);
+ initiateMPURequest.setAcl(owner.getCannedACL());
+ initiateMPURequest.setMetadata(newObjectMetadata(-1));
+ if (owner.getSse().isSseCEnable()) {
+ initiateMPURequest.setSseCHeader(owner.getSse().getSseCHeader());
+ } else if (owner.getSse().isSseKmsEnable()) {
+ initiateMPURequest.setSseKmsHeader(
+ owner.getSse().getSseKmsHeader());
+ }
+ try {
+ return obs.initiateMultipartUpload(initiateMPURequest)
+ .getUploadId();
+ } catch (ObsException ace) {
+ throw OBSCommonUtils.translateException("Initiate MultiPartUpload",
+ destKey, ace);
+ }
+ }
+
+ /**
+ * Complete a multipart upload operation.
+ *
+ * @param destKey Object key
+ * @param uploadId multipart operation Id
+ * @param partETags list of partial uploads
+ * @return the result
+ * @throws ObsException on problems.
+ */
+ CompleteMultipartUploadResult completeMultipartUpload(
+ final String destKey, final String uploadId,
+ final List
+ * OBS supports two kinds of buckets: object bucket and posix bucket. Posix
+ * bucket provides more POSIX-like semantics than object bucket, and is
+ * recommended for Hadoop. Object bucket is deprecated for Hadoop.
+ */
+
+package org.apache.hadoop.fs.obs;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000000000..e77425ab52989
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hadoop.fs.obs.OBSFileSystem
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/site/markdown/index.md b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/site/markdown/index.md
new file mode 100644
index 0000000000000..723da89e2beb2
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/site/markdown/index.md
@@ -0,0 +1,370 @@
+
+
+# OBSA: HuaweiCloud OBS Adapter for Hadoop Support
+
+
+
+## Introduction
+
+The `hadoop-huaweicloud` module provides support for integration with the
+[HuaweiCloud Object Storage Service (OBS)](https://www.huaweicloud.com/en-us/product/obs.html).
+This support comes via the JAR file `hadoop-huaweicloud.jar`.
+
+## Features
+
+* Read and write data stored in a HuaweiCloud OBS account.
+* Reference file system paths using URLs using the `obs` scheme.
+* Present a hierarchical file system view by implementing the standard Hadoop `FileSystem` interface.
+* Support multipart upload for a large file.
+* Can act as a source of data in a MapReduce job, or a sink.
+* Uses HuaweiCloud OBS’s Java SDK with support for latest OBS features and authentication schemes.
+* Tested for scale.
+
+## Limitations
+
+Partial or no support for the following operations :
+
+* Symbolic link operations.
+* Proxy users.
+* File truncate.
+* File concat.
+* File checksum.
+* File replication factor.
+* Extended Attributes(XAttrs) operations.
+* Snapshot operations.
+* Storage policy.
+* Quota.
+* POSIX ACL.
+* Delegation token operations.
+
+## Getting Started
+
+### Packages
+
+OBSA depends upon two JARs, alongside `hadoop-common` and its dependencies.
+
+* `hadoop-huaweicloud` JAR.
+* `esdk-obs-java` JAR.
+
+The versions of `hadoop-common` and `hadoop-huaweicloud` must be identical.
+
+To import the libraries into a Maven build, add `hadoop-huaweicloud` JAR to the
+build dependencies; it will pull in a compatible `esdk-obs-java` JAR.
+
+The `hadoop-huaweicloud` JAR *does not* declare any dependencies other than that
+dependencies unique to it, the OBS SDK JAR. This is simplify excluding/tuning
+Hadoop dependency JARs in downstream applications. The `hadoop-client` or
+`hadoop-common` dependency must be declared.
+
+
+```xml
+
+ * If the test.fs.obs.name property is not set, this will trigger a JUnit
+ * failure.
+ *
+ * Multipart purging is enabled.
+ *
+ * @param conf configuration
+ * @return the FS
+ * @throws IOException IO Problems
+ * @throws AssumptionViolatedException if the FS is not named
+ */
+ public static OBSFileSystem createTestFileSystem(Configuration conf)
+ throws IOException {
+ return createTestFileSystem(conf, false);
+ }
+
+ /**
+ * Create the test filesystem with or without multipart purging
+ *
+ * If the test.fs.obs.name property is not set, this will trigger a JUnit
+ * failure.
+ *
+ * @param conf configuration
+ * @param purge flag to enable Multipart purging
+ * @return the FS
+ * @throws IOException IO Problems
+ * @throws AssumptionViolatedException if the FS is not named
+ */
+ @SuppressWarnings("deprecation")
+ public static OBSFileSystem createTestFileSystem(Configuration conf,
+ boolean purge)
+ throws IOException {
+
+ String fsname = conf.getTrimmed(TEST_FS_OBS_NAME, "");
+
+ boolean liveTest = !StringUtils.isEmpty(fsname);
+ URI testURI = null;
+ if (liveTest) {
+ testURI = URI.create(fsname);
+ liveTest = testURI.getScheme().equals(OBSConstants.OBS_SCHEME);
+ }
+ if (!liveTest) {
+ // This doesn't work with our JUnit 3 style test cases, so instead we'll
+ // make this whole class not run by default
+ throw new AssumptionViolatedException(
+ "No test filesystem in " + TEST_FS_OBS_NAME);
+ }
+ OBSFileSystem fs1 = new OBSFileSystem();
+ //enable purging in tests
+ if (purge) {
+ conf.setBoolean(PURGE_EXISTING_MULTIPART, true);
+ // but a long delay so that parallel multipart tests don't
+ // suddenly start timing out
+ conf.setInt(PURGE_EXISTING_MULTIPART_AGE, 30 * 60);
+ }
+ fs1.initialize(testURI, conf);
+ return fs1;
+ }
+
+ /**
+ * Create a test path, using the value of
+ * {@link OBSTestConstants#TEST_UNIQUE_FORK_ID}
+ * if it is set.
+ *
+ * @param defVal default value
+ * @return a path
+ */
+ public static Path createTestPath(Path defVal) {
+ String testUniqueForkId = System.getProperty(
+ OBSTestConstants.TEST_UNIQUE_FORK_ID);
+ return testUniqueForkId == null ? defVal :
+ new Path("/" + testUniqueForkId, "test");
+ }
+
+ /**
+ * This class should not be instantiated.
+ */
+ private OBSTestUtils() {
+ }
+
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractAppend.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractAppend.java
new file mode 100644
index 0000000000000..a4fb8153e7ca4
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractAppend.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractAppendTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.junit.Assume;
+
+/**
+ * Append test cases on obs file system.
+ */
+public class TestOBSContractAppend extends AbstractContractAppendTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+
+ @Override
+ public void testRenameFileBeingAppended() {
+ Assume.assumeTrue("unsupport.", false);
+ }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksumCompositeCrc.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractCreate.java
similarity index 58%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksumCompositeCrc.java
rename to hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractCreate.java
index 87fb7da6e2e6f..d3966a13b95ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksumCompositeCrc.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractCreate.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -15,33 +15,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.hadoop.hdfs;
+
+package org.apache.hadoop.fs.obs;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
+import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.junit.Assume;
/**
- * End-to-end tests for COMPOSITE_CRC combine mode.
+ * Create test cases on obs file system.
*/
-public class TestFileChecksumCompositeCrc extends TestFileChecksum {
- @Override
- protected void customizeConf(Configuration conf) {
- conf.set(
- HdfsClientConfigKeys.DFS_CHECKSUM_COMBINE_MODE_KEY, "COMPOSITE_CRC");
- }
+public class TestOBSContractCreate extends AbstractContractCreateTest {
@Override
- protected boolean expectComparableStripedAndReplicatedFiles() {
- return true;
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
}
@Override
- protected boolean expectComparableDifferentBlockSizeReplicatedFiles() {
- return true;
+ public void testCreatedFileIsImmediatelyVisible() {
+ Assume.assumeTrue("unsupport.", false);
}
@Override
- protected boolean expectSupportForSingleFileMixedBytesPerChecksum() {
- return true;
+ public void testCreatedFileIsVisibleOnFlush() {
+ Assume.assumeTrue("unsupport", false);
}
}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractDelete.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractDelete.java
new file mode 100644
index 0000000000000..9dd67ad779beb
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractDelete.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Delete test cases on obs file system.
+ */
+public class TestOBSContractDelete extends AbstractContractDeleteTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractGetFileStatus.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractGetFileStatus.java
new file mode 100644
index 0000000000000..15ffd97e0904c
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractGetFileStatus.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Get file status test cases on obs file system.
+ */
+public class TestOBSContractGetFileStatus extends
+ AbstractContractGetFileStatusTest {
+
+ @Override
+ protected AbstractFSContract createContract(
+ final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractMkdir.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractMkdir.java
new file mode 100644
index 0000000000000..e06ad860e21aa
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractMkdir.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Mkdir test cases on obs file system.
+ */
+public class TestOBSContractMkdir extends AbstractContractMkdirTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractOpen.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractOpen.java
new file mode 100644
index 0000000000000..c8641dfd627c6
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractOpen.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Open test cases on obs file system.
+ */
+public class TestOBSContractOpen extends AbstractContractOpenTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRename.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRename.java
new file mode 100644
index 0000000000000..25502a23f27d8
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRename.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractRenameTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.junit.Assume;
+
+/**
+ * Rename test cases on obs file system.
+ */
+public class TestOBSContractRename extends AbstractContractRenameTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+
+ @Override
+ public void testRenameFileUnderFileSubdir() {
+ Assume.assumeTrue("unsupport.", false);
+ }
+
+ @Override
+ public void testRenameFileUnderFile() {
+ Assume.assumeTrue("unsupport.", false);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRootDir.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRootDir.java
new file mode 100644
index 0000000000000..ba961a300efb3
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractRootDir.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Root directory test cases on obs file system.
+ */
+public class TestOBSContractRootDir extends AbstractContractRootDirectoryTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractSeek.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractSeek.java
new file mode 100644
index 0000000000000..48751ea669698
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSContractSeek.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+
+/**
+ * Seek test cases on obs file system.
+ */
+public class TestOBSContractSeek extends AbstractContractSeekTest {
+
+ @Override
+ protected AbstractFSContract createContract(final Configuration conf) {
+ return new OBSContract(conf);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSFSMainOperations.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSFSMainOperations.java
new file mode 100644
index 0000000000000..b62023b642486
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/java/org/apache/hadoop/fs/obs/TestOBSFSMainOperations.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.TestFSMainOperationsLocalFileSystem;
+import org.junit.After;
+import org.junit.Assume;
+import org.junit.Before;
+
+/**
+ *
+ * A collection of tests for the {@link FileSystem}. This test should be used
+ * for testing an instance of FileSystem that has been initialized to a specific
+ * default FileSystem such a LocalFileSystem, HDFS,OBS, etc.
+ *
+ * To test a given {@link FileSystem} implementation create a subclass of this
+ * test and override {@link #setUp()} to initialize the
+ * Since this a junit 4 you can also do a single setup before the start of any
+ * tests. E.g.
+ *
+ *
+ *
+ * A collection of tests for the {@link FileContext} to test path names passed
+ * as URIs. This test should be used for testing an instance of FileContext that
+ * has been initialized to a specific default FileSystem such a LocalFileSystem,
+ * HDFS,OBS, etc, and where path names are passed that are URIs in a different
+ * FileSystem.
+ *
+ * To test a given {@link FileSystem} implementation create a subclass of this
+ * test and override {@link #setUp()} to initialize the
+ * The tests will do operations on fc1 that use a URI in fc2
+ *
+ * {@link FileContext} instance variable.
+ *
+ * A collection of Util tests for the {@link FileContext#util()}. This test
+ * should be used for testing an instance of {@link FileContext#util()} that has
+ * been initialized to a specific default FileSystem such a LocalFileSystem,
+ * HDFS,OBS, etc.
+ *
+ * To test a given {@link FileSystem} implementation create a subclass of this
+ * test and override {@link #setUp()} to initialize the
+ * This uses BlockJUnit4ClassRunner because FileSystemContractBaseTest from
+ * TestCase which uses the old Junit3 runner that doesn't ignore assumptions
+ * properly making it impossible to skip the tests if we don't have a valid
+ * bucket.
+ **/
+public class TestOBSFileSystemContract extends FileSystemContractBaseTest {
+
+ @Before
+ public void setUp() throws Exception {
+ skipTestCheck();
+ Configuration conf = new Configuration();
+ conf.addResource(OBSContract.CONTRACT_XML);
+ fs = OBSTestUtils.createTestFileSystem(conf);
+ }
+
+ @Override
+ public void testMkdirsWithUmask() {
+ Assume.assumeTrue("unspport.", false);
+ }
+
+ @Override
+ public void testRenameRootDirForbidden() {
+ Assume.assumeTrue("unspport.", false);
+ }
+
+ public void skipTestCheck() {
+ Assume.assumeTrue(OBSContract.isContractTestEnabled());
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/resources/contract/obs.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/resources/contract/obs.xml
new file mode 100644
index 0000000000000..30b2cf04234d9
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/test/resources/contract/obs.xml
@@ -0,0 +1,139 @@
+
+
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of
+
+
+
+
+ @param out the writer to write to.]]>
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+ Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath: Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+ Value strings are first processed for variable expansion. The
+ available properties are: For example, if a configuration resource contains the following property
+ definitions:
+ When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+ When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+ By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+ Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+ For example, we can tag existing property as:
+ Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
+
+ Use the CreateFlag as follows:
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+ The file system related server-side defaults are:
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
+
+
+
+
+ FileConflictException with the specified detail
+ * message. The string s can be retrieved later by the
+ * {@link Throwable#getMessage}
+ * method of class java.lang.Throwable.
+ *
+ * @param s the detail message.
+ */
+ FileConflictException(final String s) {
+ super(s);
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBS.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBS.java
new file mode 100644
index 0000000000000..3f05f007ee578
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBS.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.DelegateToFileSystem;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+/**
+ * OBS implementation of AbstractFileSystem, which delegates to the {@link
+ * OBSFileSystem}.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public final class OBS extends DelegateToFileSystem {
+
+ /**
+ * @param theUri URI of the file system
+ * @param conf Configuration for the file system
+ * @throws IOException on any failure to initialize this instance
+ * @throws URISyntaxException theUri has syntax error
+ */
+ public OBS(final URI theUri, final Configuration conf)
+ throws IOException, URISyntaxException {
+ super(theUri, new OBSFileSystem(), conf, "obs", false);
+ }
+
+ @Override
+ public int getUriDefaultPort() {
+ return OBSConstants.OBS_DEFAULT_PORT;
+ }
+}
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java
new file mode 100644
index 0000000000000..d19674027763b
--- /dev/null
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java
@@ -0,0 +1,814 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.obs;
+
+import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures;
+import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture;
+import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
+import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
+import com.obs.services.exception.ObsException;
+import com.obs.services.model.CompleteMultipartUploadResult;
+import com.obs.services.model.PartEtag;
+import com.obs.services.model.PutObjectRequest;
+import com.obs.services.model.UploadPartRequest;
+import com.obs.services.model.UploadPartResult;
+import com.obs.services.model.fs.WriteFileRequest;
+import com.sun.istack.NotNull;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.Syncable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * OBS output stream based on block buffering.
+ *
+ * If the path is a directory,
+ * if recursive is false, returns files in the directory;
+ * if recursive is true, return files in the subtree rooted at the path.
+ * If the path is a file, return the file's status and block locations.
+ *
+ *
+ * @param f a path
+ * @param recursive if the subdirectories need to be traversed recursively
+ * @return an iterator that traverses statuses of the files/directories in the
+ * given path
+ * @throws FileNotFoundException if {@code path} does not exist
+ * @throws IOException if any I/O error occurred
+ */
+ @Override
+ public RemoteIteratorf does not exist
+ * @throws IOException If an I/O error occurred
+ */
+ @Override
+ public RemoteIteratorf does not exist
+ * @throws IOException if any I/O error occurred
+ */
+ @Override
+ public RemoteIterator
+ *
+ *
+ *
+ * fSys {@link
+ * FileSystem} instance variable.
+ * fc1 and
+ * fc2
+ * fc {@link
+ * FileContext} instance variable.
+ *
+ * true if the key is deprecated and
+ false otherwise.]]>
+ null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+ name exists without value]]>
+ String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+ String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+ name property or
+ its replacing property and null if no such property exists.]]>
+ name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+ name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+ defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+ int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+ int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+ long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+ long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+ long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+ float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+ float.
+
+ @param name property name.
+ @param value property value.]]>
+ double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+ double.
+
+ @param name property name.
+ @param value property value.]]>
+ boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+ boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+ set(<name>, value.toString()).
+ @param name property name
+ @param value new value]]>
+ set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+ Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+ Strings.
+ If no such property is specified then empty collection is returned.
+ Strings.]]>
+ Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+ Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+ Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+ Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+ Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+ host:port.]]>
+ host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+ host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+ Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+ Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+ Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+ List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+ {
+ "property": {
+ "key" : "key1",
+ "value" : "value1",
+ "isFinal" : "key1.isFinal",
+ "resource" : "key1.resource"
+ }
+ }
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+ false
+ to turn it off.]]>
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+ Final Parameters
+
+
+ <property>
+ <name>dfs.hosts.include</name>
+ <value>/etc/hadoop/conf/hosts.include</value>
+ <final>true</final>
+ </property>
+
+ Administrators typically define parameters as final in
+ core-site.xml for values that user applications may not alter.
+
+ Variable Expansion
+
+
+
+
+
+ <property>
+ <name>basedir</name>
+ <value>/user/${user.name}</value>
+ </property>
+
+ <property>
+ <name>tempdir</name>
+ <value>${basedir}/tmp</value>
+ </property>
+
+ <property>
+ <name>otherdir</name>
+ <value>${env.BASE_DIR}/other</value>
+ </property>
+
+
+ Tags
+
+
+ <property>
+ <name>dfs.replication</name>
+ <value>3</value>
+ <tag>HDFS,REQUIRED</tag>
+ </property>
+
+ <property>
+ <name>dfs.data.transfer.protection</name>
+ <value>3</value>
+ <tag>HDFS,SECURITY</tag>
+ </property>
+
+ KeyProvider implementations must be thread safe.]]>
+ uri is not supported.]]>
+
+ BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ "host2:9866","host3:9866","host4:9866","host5:9866"})
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+ EnumSet.of(CreateFlag.CREATE, CreateFlag.APPEND)
+
+
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+ absOrFqPath could
+ not be instantiated.]]>
+
+
+
+ Illegal WDs:
+
+
]]>
+ f is not valid]]>
+
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+ dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+ f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+ f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ linkcode> already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+ Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+
+
+ Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
+ Example 3: FileContext with local file system as the default
+
+
+ Example 4: Use a specific config, ignoring $HADOOP_CONFIG
+ Generally you should not need use a config unless you are doing
+
+
]]>
+ UnsupportedOperationException.
+
+ @return the protocol scheme for this FileSystem.
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+ BlockLocation( { "localhost:9866" }, { "localhost" }, 0, file.getLen())
+
+
+ In HDFS, if file is three-replicated, the returned array contains
+ elements like:
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+ BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+ hosts: {"host2:9866", "host3:9866, host4:9866"})
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
+ BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ "host2:9866","host3:9866","host4:9866","host5:9866"})
+ BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+ "host4:9866", "host5:9866"})
+
+
+ @param file FilesStatus to get data from
+ @param start offset into the given file
+ @param len length for which to get locations for
+ @throws IOException IO failure]]>
+ true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+ The local implementation is {@link LocalFileSystem} and distributed + implementation is DistributedFileSystem. There are other implementations + for object stores and (outside the Apache Hadoop codebase), + third party filesystems. +
+ Notes +
FilterFileSystem
+ itself simply overrides all methods of
+ FileSystem with versions that
+ pass all requests to the contained file
+ system. Subclasses of FilterFileSystem
+ may further override some of these methods
+ and may also provide additional methods
+ and fields.]]>
+ file]]>
+ pathname
+ should be included]]>
+ ftp]]>
+ + Fencing is configured by the operator as an ordered list of methods to + attempt. Each method will be tried in turn, and the next in the list + will only be attempted if the previous one fails. See {@link NodeFencer} + for more information. +
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+ Generic Writable implements {@link Configurable} interface, so that it will be + configured by the framework. The configuration is passed to the wrapped objects + implementing {@link Configurable} interface before deserialization. +
+ + how to use it:getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+ public class GenericObject extends GenericWritable {
+
+ private static Class[] CLASSES = {
+ ClassType1.class,
+ ClassType2.class,
+ ClassType3.class,
+ };
+
+ protected Class[] getTypes() {
+ return CLASSES;
+ }
+
+ }
+
+
+ @since Nov 8, 2006]]>
+ data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+ The index file is read entirely into memory. Thus key implementations + should try to keep themselves small. + +
Map files are created by adding entries in-order. To maintain a large + database, perform updates by copying the previous version of a database and + merging in a sorted change list, to create a new version of the database in + a new file. Sorting large change lists can be done with {@link + SequenceFile.Sorter}.]]> +
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
SequenceFile Writers based on the
+ {@link CompressionType} used to compress key/value pairs:
+ Writer : Uncompressed records.
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+ The actual compression algorithm used to compress key and/or values can be + specified by using the appropriate {@link CompressionCodec}.
+ +The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+ 100 kilobytes or so.
+ 100 kilobytes or so.
+ The compressed blocks of key lengths and value lengths consist of the + actual lengths of individual keys/values encoded in ZeroCompressedInteger + format.
+ + @see CompressionCodec]]> +start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+ new byte[0]).]]>
+ Also includes utilities for + serializing/deserialing a string, coding/decoding a string, checking if a + byte array contains valid UTF8 code, calculating the length of an encoded + string.]]> +
DataOuput to serialize this object into.
+ @throws IOException]]>
+ For efficiency, implementations should attempt to re-use storage in the + existing object where possible.
+ + @param inDataInput to deseriablize this object from.
+ @throws IOException]]>
+ key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+ Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+ ]]>
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+ Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
Example:
+]]> ++ public class MyWritableComparable implements WritableComparable{ + // Some data + private int counter; + private long timestamp; + + public void write(DataOutput out) throws IOException { + out.writeInt(counter); + out.writeLong(timestamp); + } + + public void readFields(DataInput in) throws IOException { + counter = in.readInt(); + timestamp = in.readLong(); + } + + public int compareTo(MyWritableComparable o) { + int thisValue = this.value; + int thatValue = o.value; + return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1)); + } + + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + counter; + result = prime * result + (int) (timestamp ^ (timestamp >>> 32)); + return result + } + } +
One may optimize compare-intensive operations by overriding + {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are + provided to assist in optimized implementations of this method.]]> +
Compressor
+ @param conf the Configuration object which contains confs for creating or reinit the compressor
+ @return Compressor for the given
+ CompressionCodec from the pool or a new one]]>
+ Decompressor
+ @return Decompressor for the given
+ CompressionCodec the pool or a new one]]>
+ b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+ true if a preset dictionary is needed for decompression]]>
+ true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+ true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+ false when reset() is called.]]>
+ + The behavior of TFile can be customized by the following variables through + Configuration: +
+ Suggestions on performance optimization. +
+To add a new serialization framework write an implementation of +{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the +"io.serializations" property. +
]]> ++Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for +serialization of classes generated by Avro's 'specific' compiler. +
+ ++Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for +other classes. +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for +any class which is either in the package list configured via +{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES} +or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable} +interface. +
]]> +basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+ The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses + {@link org.apache.hadoop.fs.FileSystem} to access the target file system, + however, it can be used to write to the local file system, Amazon S3, or any + other supported file system. The base path for the sink will determine the + file system used. An unqualified path will write to the default file system + set by the configuration.
+ +Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
Instead of appending to an existing file, by default the sink + will create a new file with a suffix of ".<n>&quet;, where + n is the next lowest integer that isn't already used in a file name, + similar to the Hadoop daemon logs. NOTE: the file with the highest + sequence number is the newest file, unlike the Hadoop daemon logs.
+ +For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
Note also that when writing to HDFS, the file size information is not + updated until the file is closed (at the end of the interval) even though + the data is being written successfully. This is a known HDFS limitation that + exists because of the performance cost of updating the metadata. See + HDFS-5478.
+ +When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink + [prefix].sink.statsd.server.host= + [prefix].sink.statsd.server.port= + [prefix].sink.statsd.skip.hostname=true|false (optional) + [prefix].sink.statsd.service.name=NameNode (name you want for service) +]]> +
Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+ RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+ + This class uses the configuration parameter {@code + net.topology.table.file.name} to locate the mapping file. +
++ Calls to {@link #resolve(List)} will look up the address as defined in the + mapping file. If no entry corresponding to the address is found, the value + {@code /default-rack} is returned. +
]]> +null the default one will be used.]]>
+ null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+ TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+ FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+ doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+ AuthenticatedURL instances are not thread-safe.]]>
+ + This permits implementations to change the configuration before + the init operation. As the ServiceLauncher only creates + an instance of the base {@link Configuration} class, it is + recommended to instantiate any subclass (such as YarnConfiguration) + that injects new resources. +
+ @param config the initial configuration build up by the + service launcher. + @param args list of arguments passed to the command line + after any launcher-specific commands have been stripped. + @return the configuration to init the service with. + Recommended: pass down the config parameter with any changes + @throws Exception any problem]]> +
+ If an exception is raised, the policy is: +
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]> +
+ Many of the exit codes are designed to resemble HTTP error codes, + squashed into a single byte. e.g 44 , "not found" is the equivalent + of 404. The various 2XX HTTP error codes aren't followed; + the Unix standard of "0" for success is used. +
+ 0-10: general command issues + 30-39: equivalent to the 3XX responses, where those responses are + considered errors by the application. + 40-49: client-side/CLI/config problems + 50-59: service-side problems. + 60+ : application specific error codes +]]> +
+ If the last argument is a throwable, it becomes the cause of the exception. + It will also be used as a parameter for the format. + @param exitCode exit code + @param format format for message to use in exception + @param args list of arguments]]> +
Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+ Class of the given object.]]>
+ ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+ ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+ Shell
+ processes and destroys them one by one. This method is thread safe]]>
+ + @deprecated use one of the exception-raising getter methods, + specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]> +
du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+ ShutdownHookManager singleton.]]>
+ TimeUnit]]>
+ Tool, is the standard for any Map-Reduce tool/application.
+ The tool/application should delegate the handling of
+
+ standard command-line options to {@link ToolRunner#run(Tool, String[])}
+ and only handle its custom arguments.
+
+ Here is how a typical Tool is implemented:
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+ Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+ + The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + the networking research community in the past decade thanks to the bandwidth efficiencies that it + offers for the transmission of set membership information between networked hosts. A sender encodes + the information into a bit vector, the Bloom filter, that is more compact than a conventional + representation. Computation and space costs for construction are linear in the number of elements. + The receiver uses the filter to test whether various elements are members of the set. Though the + filter will occasionally return a false positive, it will never return a false negative. When creating + the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + +
+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Space/Time Trade-Offs in Hash Coding with Allowable Errors]]> +
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + @param key The key to remove.]]> +
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it + allows dynamic additions and deletions of set membership information. This + is achieved through the use of a counting vector instead of a bit vector. +
+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + + @see Summary cache: a scalable wide-area web cache sharing protocol]]> +
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr <= n (n is
+ the cardinality of the set A to record in the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + + @see Theory and Network Applications of Dynamic Bloom Filters]]> +
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+ It allows the removal of selected false positives at the cost of introducing + random false negatives, and with the benefit of eliminating some random false + positives at the same time. + +
+ Originally created by + European Commission One-Lab Project 034819. + + @see Filter The general behavior of a filter + @see BloomFilter A Bloom filter + @see RemoveScheme The different selective clearing algorithms + + @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives]]> +
BufferedFSInputStream
* with the specified buffer size,
@@ -126,4 +132,34 @@ public FileDescriptor getFileDescriptor() throws IOException {
return null;
}
}
+
+ /**
+ * If the inner stream supports {@link StreamCapabilities},
+ * forward the probe to it.
+ * Otherwise: return false.
+ *
+ * @param capability string to query the stream support for.
+ * @return true if a capability is known to be supported.
+ */
+ @Override
+ public boolean hasCapability(final String capability) {
+ if (in instanceof StreamCapabilities) {
+ return ((StreamCapabilities) in).hasCapability(capability);
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public IOStatistics getIOStatistics() {
+ return retrieveIOStatistics(in);
+ }
+
+ @Override
+ public String toString() {
+ return new StringJoiner(", ",
+ BufferedFSInputStream.class.getSimpleName() + "[", "]")
+ .add("in=" + in)
+ .toString();
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java
index 2e2d98b9c5462..0077838920a9e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CanSetDropBehind.java
@@ -36,6 +36,6 @@ public interface CanSetDropBehind {
* UnsupportedOperationException If this stream doesn't support
* setting the drop-behind.
*/
- public void setDropBehind(Boolean dropCache)
+ void setDropBehind(Boolean dropCache)
throws IOException, UnsupportedOperationException;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
index f081742ce59db..0256a58f46368 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
@@ -38,11 +38,15 @@
import org.apache.hadoop.fs.impl.OpenFileParameters;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsSource;
+import org.apache.hadoop.fs.statistics.IOStatisticsSupport;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.LambdaUtils;
import org.apache.hadoop.util.Progressable;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
+import static org.apache.hadoop.fs.impl.StoreImplementationUtils.isProbeForSyncable;
/****************************************************************
* Abstract Checksumed FileSystem.
@@ -134,7 +138,8 @@ private int getSumBufferSize(int bytesPerSum, int bufferSize) {
* For open()'s FSInputStream
* It verifies that data matches checksums.
*******************************************************/
- private static class ChecksumFSInputChecker extends FSInputChecker {
+ private static class ChecksumFSInputChecker extends FSInputChecker implements
+ IOStatisticsSource {
private ChecksumFileSystem fs;
private FSDataInputStream datas;
private FSDataInputStream sums;
@@ -270,6 +275,17 @@ protected int readChunk(long pos, byte[] buf, int offset, int len,
}
return nread;
}
+
+ /**
+ * Get the IO Statistics of the nested stream, falling back to
+ * null if the stream does not implement the interface
+ * {@link IOStatisticsSource}.
+ * @return an IOStatistics instance or null
+ */
+ @Override
+ public IOStatistics getIOStatistics() {
+ return IOStatisticsSupport.retrieveIOStatistics(datas);
+ }
}
private static class FSDataBoundedInputStream extends FSDataInputStream {
@@ -395,7 +411,8 @@ public static long getChecksumLength(long size, int bytesPerSum) {
/** This class provides an output stream for a checksummed file.
* It generates checksums for data. */
- private static class ChecksumFSOutputSummer extends FSOutputSummer {
+ private static class ChecksumFSOutputSummer extends FSOutputSummer
+ implements IOStatisticsSource, StreamCapabilities {
private FSDataOutputStream datas;
private FSDataOutputStream sums;
private static final float CHKSUM_AS_FRACTION = 0.01f;
@@ -449,6 +466,31 @@ protected void checkClosed() throws IOException {
throw new ClosedChannelException();
}
}
+
+ /**
+ * Get the IO Statistics of the nested stream, falling back to
+ * null if the stream does not implement the interface
+ * {@link IOStatisticsSource}.
+ * @return an IOStatistics instance or null
+ */
+ @Override
+ public IOStatistics getIOStatistics() {
+ return IOStatisticsSupport.retrieveIOStatistics(datas);
+ }
+
+ /**
+ * Probe the inner stream for a capability.
+ * Syncable operations are rejected before being passed down.
+ * @param capability string to query the stream support for.
+ * @return true if a capability is known to be supported.
+ */
+ @Override
+ public boolean hasCapability(final String capability) {
+ if (isProbeForSyncable(capability)) {
+ return false;
+ }
+ return datas.hasCapability(capability);
+ }
}
@Override
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index c08af395ad2f9..07776763e97a4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -58,7 +58,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
public static final String IPC_CLIENT_RPC_TIMEOUT_KEY =
"ipc.client.rpc-timeout.ms";
/** Default value for IPC_CLIENT_RPC_TIMEOUT_KEY. */
- public static final int IPC_CLIENT_RPC_TIMEOUT_DEFAULT = 0;
+ public static final int IPC_CLIENT_RPC_TIMEOUT_DEFAULT = 120000;
/** Responses larger than this will be logged */
public static final String IPC_SERVER_RPC_MAX_RESPONSE_SIZE_KEY =
"ipc.server.max.response.size";
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
index 57446d3d64e3e..20bb0350d191b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
@@ -949,6 +949,15 @@ public class CommonConfigurationKeysPublic {
/** Defalt value for HADOOP_HTTP_LOGS_ENABLED */
public static final boolean HADOOP_HTTP_LOGS_ENABLED_DEFAULT = true;
+ /**
+ * @see
+ *
+ * core-default.xml
+ */
+ public static final String HADOOP_HTTP_METRICS_ENABLED =
+ "hadoop.http.metrics.enabled";
+ public static final boolean HADOOP_HTTP_METRICS_ENABLED_DEFAULT = true;
+
/**
* @see
*
@@ -1037,6 +1046,6 @@ public class CommonConfigurationKeysPublic {
*/
public static final String HADOOP_HTTP_IDLE_TIMEOUT_MS_KEY =
"hadoop.http.idle_timeout.ms";
- public static final int HADOOP_HTTP_IDLE_TIMEOUT_MS_DEFAULT = 1000;
+ public static final int HADOOP_HTTP_IDLE_TIMEOUT_MS_DEFAULT = 60000;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
index 539b3e27c0351..df932df43aebd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
@@ -139,4 +139,11 @@ private CommonPathCapabilities() {
public static final String FS_MULTIPART_UPLOADER =
"fs.capability.multipart.uploader";
+
+ /**
+ * Stream abort() capability implemented by {@link Abortable#abort()}.
+ * Value: {@value}.
+ */
+ public static final String ABORTABLE_STREAM =
+ "fs.capability.outputstream.abortable";
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
index 193c52c60d949..8cc9e78b7936b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
@@ -97,7 +97,7 @@ public int hashCode() {
public boolean equals(final Object that) {
if (this == that) {
return true;
- } else if (that == null || !(that instanceof RenewAction)) {
+ } else if (!(that instanceof RenewAction)) {
return false;
}
return token.equals(((RenewAction>)that).token);
@@ -107,7 +107,7 @@ public boolean equals(final Object that) {
* Set a new time for the renewal.
* It can only be called when the action is not in the queue or any
* collection because the hashCode may change
- * @param newTime the new time
+ * @param delay the renewal time
*/
private void updateRenewalTime(long delay) {
renewalTime = Time.now() + delay - delay/10;
@@ -223,7 +223,7 @@ public * The following names are considered general and preserved across different * StorageStatistics classes. When implementing a new StorageStatistics, it is * highly recommended to use the common statistic names. - * + *
* When adding new common statistic name constants, please make them unique. * By convention, they are implicitly unique: *
+ * This is for reporting and testing. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class DurationStatisticSummary implements Serializable { + + private static final long serialVersionUID = 6776381340896518486L; + + /** Statistic key. */ + private final String key; + + /** Are these success or failure statistics. */ + private final boolean success; + + /** Count of operation invocations. */ + private final long count; + + /** Max duration; -1 if unknown. */ + private final long max; + + /** Min duration; -1 if unknown. */ + private final long min; + + /** Mean duration -may be null. */ + private final MeanStatistic mean; + + /** + * Constructor. + * @param key Statistic key. + * @param success Are these success or failure statistics. + * @param count Count of operation invocations. + * @param max Max duration; -1 if unknown. + * @param min Min duration; -1 if unknown. + * @param mean Mean duration -may be null. (will be cloned) + */ + public DurationStatisticSummary(final String key, + final boolean success, + final long count, + final long max, + final long min, + @Nullable final MeanStatistic mean) { + this.key = key; + this.success = success; + this.count = count; + this.max = max; + this.min = min; + this.mean = mean == null ? null : mean.clone(); + } + + public String getKey() { + return key; + } + + public boolean isSuccess() { + return success; + } + + public long getCount() { + return count; + } + + public long getMax() { + return max; + } + + public long getMin() { + return min; + } + + public MeanStatistic getMean() { + return mean; + } + + @Override + public String toString() { + return "DurationStatisticSummary{" + + "key='" + key + '\'' + + ", success=" + success + + ", counter=" + count + + ", max=" + max + + ", mean=" + mean + + '}'; + } + + /** + * Fetch the duration timing summary of success or failure operations + * from an IO Statistics source. + * If the duration key is unknown, the summary will be incomplete. + * @param source source of data + * @param key duration statistic key + * @param success fetch success statistics, or if false, failure stats. + * @return a summary of the statistics. + */ + public static DurationStatisticSummary fetchDurationSummary( + IOStatistics source, + String key, + boolean success) { + String fullkey = success ? key : key + SUFFIX_FAILURES; + return new DurationStatisticSummary(key, success, + source.counters().getOrDefault(fullkey, 0L), + source.maximums().getOrDefault(fullkey + SUFFIX_MAX, -1L), + source.minimums().getOrDefault(fullkey + SUFFIX_MIN, -1L), + source.meanStatistics() + .get(fullkey + SUFFIX_MEAN)); + } + + /** + * Fetch the duration timing summary from an IOStatistics source. + * If the duration key is unknown, the summary will be incomplete. + * @param source source of data + * @param key duration statistic key + * @return a summary of the statistics. + */ + public static DurationStatisticSummary fetchSuccessSummary( + IOStatistics source, + String key) { + return fetchDurationSummary(source, key, true); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java new file mode 100644 index 0000000000000..5a15c7ad66c4f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTracker.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.time.Duration; + +/** + * Interface to be implemented by objects which can track duration. + * It extends AutoCloseable to fit into a try-with-resources statement, + * but then strips out the {@code throws Exception} aspect of the signature + * so it doesn't force code to add extra handling for any failures. + * + * If a duration is declared as "failed()" then the failure counters + * will be updated. + */ +public interface DurationTracker extends AutoCloseable { + + /** + * The operation failed. Failure statistics will be updated. + */ + void failed(); + + /** + * Finish tracking: update the statistics with the timings. + */ + void close(); + + /** + * Get the duration of an operation as a java Duration + * instance. If the duration tracker hasn't completed, + * or its duration tracking doesn't actually measure duration, + * returns Duration.ZERO. + * @return a duration, value of ZERO until close(). + */ + default Duration asDuration() { + return Duration.ZERO; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java new file mode 100644 index 0000000000000..641d7e8368bb1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/DurationTrackerFactory.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTracker; + +/** + * Interface for a source of duration tracking. + * + * This is intended for uses where it can be passed into classes + * which update operation durations, without tying those + * classes to internal implementation details. + */ +public interface DurationTrackerFactory { + + /** + * Initiate a duration tracking operation by creating/returning + * an object whose {@code close()} call will + * update the statistics. + * + * The statistics counter with the key name will be incremented + * by the given count. + * + * The expected use is within a try-with-resources clause. + * + * The default implementation returns a stub duration tracker. + * @param key statistic key prefix + * @param count #of times to increment the matching counter in this + * operation. + * @return an object to close after an operation completes. + */ + default DurationTracker trackDuration(String key, long count) { + return stubDurationTracker(); + } + + /** + * Initiate a duration tracking operation by creating/returning + * an object whose {@code close()} call will + * update the statistics. + * The expected use is within a try-with-resources clause. + * @param key statistic key + * @return an object to close after an operation completes. + */ + default DurationTracker trackDuration(String key) { + return trackDuration(key, 1); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java new file mode 100644 index 0000000000000..75d9965128101 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatistics.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * IO Statistics. + *
+ * These are low-cost per-instance statistics provided by any Hadoop + * I/O class instance. + *
+ * Consult the filesystem specification document for the requirements
+ * of an implementation of this interface.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public interface IOStatistics {
+
+ /**
+ * Map of counters.
+ * @return the current map of counters.
+ */
+ Map
+ * Exceptions are caught and downgraded to debug logging.
+ * @param source source of statistics.
+ * @return a string for logging.
+ */
+ public static String ioStatisticsSourceToString(@Nullable Object source) {
+ try {
+ return ioStatisticsToString(retrieveIOStatistics(source));
+ } catch (RuntimeException e) {
+ LOG.debug("Ignoring", e);
+ return "";
+ }
+ }
+
+ /**
+ * Convert IOStatistics to a string form.
+ * @param statistics A statistics instance.
+ * @return string value or the empty string if null
+ */
+ public static String ioStatisticsToString(
+ @Nullable final IOStatistics statistics) {
+ if (statistics != null) {
+ StringBuilder sb = new StringBuilder();
+ mapToString(sb, "counters", statistics.counters(), " ");
+ mapToString(sb, "gauges", statistics.gauges(), " ");
+ mapToString(sb, "minimums", statistics.minimums(), " ");
+ mapToString(sb, "maximums", statistics.maximums(), " ");
+ mapToString(sb, "means", statistics.meanStatistics(), " ");
+
+ return sb.toString();
+ } else {
+ return "";
+ }
+ }
+
+ /**
+ * Convert IOStatistics to a string form, with all the metrics sorted
+ * and empty value stripped.
+ * This is more expensive than the simple conversion, so should only
+ * be used for logging/output where it's known/highly likely that the
+ * caller wants to see the values. Not for debug logging.
+ * @param statistics A statistics instance.
+ * @return string value or the empty string if null
+ */
+ public static String ioStatisticsToPrettyString(
+ @Nullable final IOStatistics statistics) {
+ if (statistics != null) {
+ StringBuilder sb = new StringBuilder();
+ mapToSortedString(sb, "counters", statistics.counters(),
+ p -> p == 0);
+ mapToSortedString(sb, "\ngauges", statistics.gauges(),
+ p -> p == 0);
+ mapToSortedString(sb, "\nminimums", statistics.minimums(),
+ p -> p < 0);
+ mapToSortedString(sb, "\nmaximums", statistics.maximums(),
+ p -> p < 0);
+ mapToSortedString(sb, "\nmeans", statistics.meanStatistics(),
+ MeanStatistic::isEmpty);
+
+ return sb.toString();
+ } else {
+ return "";
+ }
+ }
+
+ /**
+ * Given a map, add its entryset to the string.
+ * The entries are only sorted if the source entryset
+ * iterator is sorted, such as from a TreeMap.
+ * @param sb string buffer to append to
+ * @param type type (for output)
+ * @param map map to evaluate
+ * @param separator separator
+ * @param
+ * Whenever this object's toString() method is called, it evaluates the
+ * statistics.
+ *
+ * This is designed to affordable to use in log statements.
+ * @param source source of statistics -may be null.
+ * @return an object whose toString() operation returns the current values.
+ */
+ public static Object demandStringifyIOStatisticsSource(
+ @Nullable IOStatisticsSource source) {
+ return new SourceToString(source);
+ }
+
+ /**
+ * On demand stringifier of an IOStatistics instance.
+ *
+ * Whenever this object's toString() method is called, it evaluates the
+ * statistics.
+ *
+ * This is for use in log statements where for the cost of creation
+ * of this entry is low; it is affordable to use in log statements.
+ * @param statistics statistics to stringify -may be null.
+ * @return an object whose toString() operation returns the current values.
+ */
+ public static Object demandStringifyIOStatistics(
+ @Nullable IOStatistics statistics) {
+ return new StatisticsToString(statistics);
+ }
+
+ /**
+ * Extract any statistics from the source and log at debug, if
+ * the log is set to log at debug.
+ * No-op if logging is not at debug or the source is null/of
+ * the wrong type/doesn't provide statistics.
+ * @param log log to log to
+ * @param message message for log -this must contain "{}" for the
+ * statistics report to actually get logged.
+ * @param source source object
+ */
+ public static void logIOStatisticsAtDebug(
+ Logger log,
+ String message,
+ Object source) {
+ if (log.isDebugEnabled()) {
+ // robust extract and convert to string
+ String stats = ioStatisticsSourceToString(source);
+ if (!stats.isEmpty()) {
+ log.debug(message, stats);
+ }
+ }
+ }
+
+ /**
+ * Extract any statistics from the source and log to
+ * this class's log at debug, if
+ * the log is set to log at debug.
+ * No-op if logging is not at debug or the source is null/of
+ * the wrong type/doesn't provide statistics.
+ * @param message message for log -this must contain "{}" for the
+ * statistics report to actually get logged.
+ * @param source source object
+ */
+ public static void logIOStatisticsAtDebug(
+ String message,
+ Object source) {
+ logIOStatisticsAtDebug(LOG, message, source);
+ }
+
+ /**
+ * On demand stringifier.
+ *
+ * Whenever this object's toString() method is called, it
+ * retrieves the latest statistics instance and re-evaluates it.
+ */
+ private static final class SourceToString {
+
+ private final IOStatisticsSource source;
+
+ private SourceToString(@Nullable IOStatisticsSource source) {
+ this.source = source;
+ }
+
+ @Override
+ public String toString() {
+ return source != null
+ ? ioStatisticsSourceToString(source)
+ : IOStatisticsBinding.NULL_SOURCE;
+ }
+ }
+
+ /**
+ * Stringifier of statistics: low cost to instantiate and every
+ * toString/logging will re-evaluate the statistics.
+ */
+ private static final class StatisticsToString {
+
+ private final IOStatistics statistics;
+
+ /**
+ * Constructor.
+ * @param statistics statistics
+ */
+ private StatisticsToString(@Nullable IOStatistics statistics) {
+ this.statistics = statistics;
+ }
+
+ /**
+ * Evaluate and stringify the statistics.
+ * @return a string value.
+ */
+ @Override
+ public String toString() {
+ return statistics != null
+ ? ioStatisticsToString(statistics)
+ : IOStatisticsBinding.NULL_SOURCE;
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java
new file mode 100644
index 0000000000000..5b8b2e284cc11
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSnapshot.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding;
+import org.apache.hadoop.util.JsonSerialization;
+
+import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToString;
+import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.aggregateMaps;
+import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.snapshotMap;
+
+/**
+ * Snapshot of statistics from a different source.
+ *
+ * It is serializable so that frameworks which can use java serialization
+ * to propagate data (Spark, Flink...) can send the statistics
+ * back. For this reason, TreeMaps are explicitly used as field types,
+ * even though IDEs can recommend use of Map instead.
+ * For security reasons, untrusted java object streams should never be
+ * deserialized. If for some reason this is required, use
+ * {@link #requiredSerializationClasses()} to get the list of classes
+ * used when deserializing instances of this object.
+ *
+ *
+ * It is annotated for correct serializations with jackson2.
+ *
+ * These statistics MUST be instance specific, not thread local.
+ *
+ * It is not a requirement that the same instance is returned every time.
+ * {@link IOStatisticsSource}.
+ *
+ * If the object implementing this is Closeable, this method
+ * may return null if invoked on a closed object, even if
+ * it returns a valid instance when called earlier.
+ * @return an IOStatistics instance or null
+ */
+ default IOStatistics getIOStatistics() {
+ return null;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java
new file mode 100644
index 0000000000000..75977047c0f2a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsSupport.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.statistics.impl.StubDurationTracker;
+import org.apache.hadoop.fs.statistics.impl.StubDurationTrackerFactory;
+
+/**
+ * Support for working with IOStatistics.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public final class IOStatisticsSupport {
+
+ private IOStatisticsSupport() {
+ }
+
+ /**
+ * Take a snapshot of the current statistics state.
+ *
+ * This is not an atomic option.
+ *
+ * The instance can be serialized, and its
+ * {@code toString()} method lists all the values.
+ * @param statistics statistics
+ * @return a snapshot of the current values.
+ */
+ public static IOStatisticsSnapshot
+ snapshotIOStatistics(IOStatistics statistics) {
+
+ return new IOStatisticsSnapshot(statistics);
+ }
+
+ /**
+ * Create a snapshot statistics instance ready to aggregate data.
+ *
+ * The instance can be serialized, and its
+ * {@code toString()} method lists all the values.
+ * @return an empty snapshot
+ */
+ public static IOStatisticsSnapshot
+ snapshotIOStatistics() {
+
+ return new IOStatisticsSnapshot();
+ }
+
+ /**
+ * Get the IOStatistics of the source, casting it
+ * if it is of the relevant type, otherwise,
+ * if it implements {@link IOStatisticsSource}
+ * extracting the value.
+ *
+ * Returns null if the source isn't of the write type
+ * or the return value of
+ * {@link IOStatisticsSource#getIOStatistics()} was null.
+ * @return an IOStatistics instance or null
+ */
+
+ public static IOStatistics retrieveIOStatistics(
+ final Object source) {
+ if (source instanceof IOStatistics) {
+ return (IOStatistics) source;
+ } else if (source instanceof IOStatisticsSource) {
+ return ((IOStatisticsSource) source).getIOStatistics();
+ } else {
+ // null source or interface not implemented
+ return null;
+ }
+ }
+
+ /**
+ * Return a stub duration tracker factory whose returned trackers
+ * are always no-ops.
+ *
+ * As singletons are returned, this is very low-cost to use.
+ * @return a duration tracker factory.
+ */
+ public static DurationTrackerFactory stubDurationTrackerFactory() {
+ return StubDurationTrackerFactory.STUB_DURATION_TRACKER_FACTORY;
+ }
+
+ /**
+ * Get a stub duration tracker.
+ * @return a stub tracker.
+ */
+ public static DurationTracker stubDurationTracker() {
+ return StubDurationTracker.STUB_DURATION_TRACKER;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java
new file mode 100644
index 0000000000000..d9ff0c25c6a21
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/MeanStatistic.java
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * A mean statistic represented as the sum and the sample count;
+ * the mean is calculated on demand.
+ *
+ * It can be used to accrue values so as to dynamically update
+ * the mean. If so, know that there is no synchronization
+ * on the methods.
+ *
+ * If a statistic has 0 samples then it is considered to be empty.
+ *
+ * All 'empty' statistics are equivalent, independent of the sum value.
+ *
+ * For non-empty statistics, sum and sample values must match
+ * for equality.
+ *
+ * It is serializable and annotated for correct serializations with jackson2.
+ *
+ * Thread safety. The operations to add/copy sample data, are thread safe.
+ *
+ * So is the {@link #mean()} method. This ensures that when
+ * used to aggregated statistics, the aggregate value and sample
+ * count are set and evaluated consistently.
+ *
+ * Other methods marked as synchronized because Findbugs overreacts
+ * to the idea that some operations to update sum and sample count
+ * are synchronized, but that things like equals are not.
+ *
+ * When adding new common statistic name constants, please make them unique.
+ * By convention:
+ *
+ * When adding new common statistic name constants, please make them unique.
+ * By convention, they are implicitly unique:
+ *
+ * No-op if the gauge is unknown.
+ *
+ * No-op if the maximum is unknown.
+ *
+ * No-op if the minimum is unknown.
+ *
+ * No-op if the minimum is unknown.
+ *
+ * No-op if the minimum is unknown.
+ *
+ * No-op if the key is unknown.
+ *
+ * No-op if the key is unknown.
+ *
+ * No-op if the key is unknown.
+ *
+ * A ConcurrentHashMap of each set of statistics is created;
+ * the AtomicLong/MeanStatistic entries are fetched as required.
+ * When the statistics are updated, the referenced objects
+ * are updated rather than new values set in the map.
+ *
+ * The update is non -atomic, even though each individual statistic
+ * is updated thread-safely. If two threads update the values
+ * simultaneously, at the end of each operation the state will
+ * be correct. It is only during the sequence that the statistics
+ * may be observably inconsistent.
+ *
+ * This package defines two interfaces:
+ *
+ * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource}:
+ * a source of statistic data, which can be retrieved
+ * through a call to
+ * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource#getIOStatistics()} .
+ *
+ * {@link org.apache.hadoop.fs.statistics.IOStatistics} the statistics retrieved
+ * from a statistics source.
+ *
+ * The retrieved statistics may be an immutable snapshot -in which case to get
+ * updated statistics another call to
+ * {@link org.apache.hadoop.fs.statistics.IOStatisticsSource#getIOStatistics()}
+ * must be made. Or they may be dynamic -in which case every time a specific
+ * statistic is retrieved, the latest version is returned. Callers should assume
+ * that if a statistics instance is dynamic, there is no atomicity when querying
+ * multiple statistics. If the statistics source was a closeable object (e.g. a
+ * stream), the statistics MUST remain valid after the stream is closed.
+ *
+ * Use pattern:
+ *
+ * An application probes an object (filesystem, stream etc) to see if it
+ * implements {@code IOStatisticsSource}, and, if it is,
+ * calls {@code getIOStatistics()} to get its statistics.
+ * If this is non-null, the client has statistics on the current
+ * state of the statistics.
+ *
+ * The expectation is that a statistics source is dynamic: when a value is
+ * looked up the most recent values are returned.
+ * When iterating through the set, the values of the iterator SHOULD
+ * be frozen at the time the iterator was requested.
+ *
+ * These statistics can be used to: log operations, profile applications,
+ * and make assertions about the state of the output.
+ *
+ * The names of statistics are a matter of choice of the specific source.
+ * However, {@link org.apache.hadoop.fs.statistics.StoreStatisticNames}
+ * contains a
+ * set of names recommended for object store operations.
+ * {@link org.apache.hadoop.fs.statistics.StreamStatisticNames} declares
+ * recommended names for statistics provided for
+ * input and output streams.
+ *
+ * Utility classes:
+ *
+ * Implementors notes:
+ *
- * if the trust certificates keystore file changes, the {@link TrustManager}
- * is refreshed with the new trust certificate entries (using a
- * {@link ReloadingX509TrustManager} trustmanager).
+ * If either the truststore or the keystore certificates file changes, it
+ * would be refreshed under the corresponding wrapper implementation -
+ * {@link ReloadingX509KeystoreManager} or {@link ReloadingX509TrustManager}.
+ *
+ *
+ *
+ *
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public final class StoreStatisticNames {
+
+ /** {@value}. */
+ public static final String OP_ABORT = "op_abort";
+
+ /** {@value}. */
+ public static final String OP_APPEND = "op_append";
+
+ /** {@value}. */
+ public static final String OP_COPY_FROM_LOCAL_FILE =
+ "op_copy_from_local_file";
+
+ /** {@value}. */
+ public static final String OP_CREATE = "op_create";
+
+ /** {@value}. */
+ public static final String OP_CREATE_NON_RECURSIVE =
+ "op_create_non_recursive";
+
+ /** {@value}. */
+ public static final String OP_DELETE = "op_delete";
+
+ /** {@value}. */
+ public static final String OP_EXISTS = "op_exists";
+
+ /** {@value}. */
+ public static final String OP_GET_CONTENT_SUMMARY =
+ "op_get_content_summary";
+
+ /** {@value}. */
+ public static final String OP_GET_DELEGATION_TOKEN =
+ "op_get_delegation_token";
+
+ /** {@value}. */
+ public static final String OP_GET_FILE_CHECKSUM =
+ "op_get_file_checksum";
+
+ /** {@value}. */
+ public static final String OP_GET_FILE_STATUS = "op_get_file_status";
+
+ /** {@value}. */
+ public static final String OP_GET_STATUS = "op_get_status";
+
+ /** {@value}. */
+ public static final String OP_GLOB_STATUS = "op_glob_status";
+
+ /** {@value}. */
+ public static final String OP_IS_FILE = "op_is_file";
+
+ /** {@value}. */
+ public static final String OP_IS_DIRECTORY = "op_is_directory";
+
+ /** {@value}. */
+ public static final String OP_LIST_FILES = "op_list_files";
+
+ /** {@value}. */
+ public static final String OP_LIST_LOCATED_STATUS =
+ "op_list_located_status";
+
+ /** {@value}. */
+ public static final String OP_LIST_STATUS = "op_list_status";
+
+ /** {@value}. */
+ public static final String OP_MKDIRS = "op_mkdirs";
+
+ /** {@value}. */
+ public static final String OP_MODIFY_ACL_ENTRIES = "op_modify_acl_entries";
+
+ /** {@value}. */
+ public static final String OP_OPEN = "op_open";
+
+ /** {@value}. */
+ public static final String OP_REMOVE_ACL = "op_remove_acl";
+
+ /** {@value}. */
+ public static final String OP_REMOVE_ACL_ENTRIES = "op_remove_acl_entries";
+
+ /** {@value}. */
+ public static final String OP_REMOVE_DEFAULT_ACL = "op_remove_default_acl";
+
+ /** {@value}. */
+ public static final String OP_RENAME = "op_rename";
+
+ /** {@value}. */
+ public static final String OP_SET_ACL = "op_set_acl";
+
+ /** {@value}. */
+ public static final String OP_SET_OWNER = "op_set_owner";
+
+ /** {@value}. */
+ public static final String OP_SET_PERMISSION = "op_set_permission";
+
+ /** {@value}. */
+ public static final String OP_SET_TIMES = "op_set_times";
+
+ /** {@value}. */
+ public static final String OP_TRUNCATE = "op_truncate";
+
+ /* The XAttr API */
+
+ /** Invoke {@code getXAttrs(Path path)}: {@value}. */
+ public static final String OP_XATTR_GET_MAP = "op_xattr_get_map";
+
+ /** Invoke {@code getXAttr(Path, String)}: {@value}. */
+ public static final String OP_XATTR_GET_NAMED = "op_xattr_get_named";
+
+ /**
+ * Invoke {@code getXAttrs(Path path, List
+ *
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public final class StreamStatisticNames {
+
+ /**
+ * Count of times the TCP stream was aborted.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_ABORTED = "stream_aborted";
+
+ /**
+ * Bytes read from an input stream in read() calls.
+ * Does not include bytes read and then discarded in seek/close etc.
+ * These are the bytes returned to the caller.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_BYTES
+ = "stream_read_bytes";
+
+ /**
+ * Count of bytes discarded by aborting an input stream .
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_BYTES_DISCARDED_ABORT
+ = "stream_read_bytes_discarded_in_abort";
+
+ /**
+ * Count of bytes read and discarded when closing an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_BYTES_DISCARDED_CLOSE
+ = "stream_read_bytes_discarded_in_close";
+
+ /**
+ * Count of times the TCP stream was closed.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_CLOSED = "stream_read_closed";
+
+ /**
+ * Total count of times an attempt to close an input stream was made
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_CLOSE_OPERATIONS
+ = "stream_read_close_operations";
+
+ /**
+ * Total count of times an input stream to was opened.
+ * For object stores, that means the count a GET request was initiated.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_OPENED = "stream_read_opened";
+
+ /**
+ * Count of exceptions raised during input stream reads.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_EXCEPTIONS =
+ "stream_read_exceptions";
+
+ /**
+ * Count of readFully() operations in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_FULLY_OPERATIONS
+ = "stream_read_fully_operations";
+
+ /**
+ * Count of read() operations in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_OPERATIONS =
+ "stream_read_operations";
+
+ /**
+ * Count of incomplete read() operations in an input stream,
+ * that is, when the bytes returned were less than that requested.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_OPERATIONS_INCOMPLETE
+ = "stream_read_operations_incomplete";
+
+ /**
+ * Count of version mismatches encountered while reading an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_VERSION_MISMATCHES
+ = "stream_read_version_mismatches";
+
+ /**
+ * Count of executed seek operations which went backwards in a stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_BACKWARD_OPERATIONS =
+ "stream_read_seek_backward_operations";
+
+ /**
+ * Count of bytes moved backwards during seek operations
+ * in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_BYTES_BACKWARDS
+ = "stream_read_bytes_backwards_on_seek";
+
+ /**
+ * Count of bytes read and discarded during seek() in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_BYTES_DISCARDED =
+ "stream_read_seek_bytes_discarded";
+
+ /**
+ * Count of bytes skipped during forward seek operations.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_BYTES_SKIPPED
+ = "stream_read_seek_bytes_skipped";
+
+ /**
+ * Count of executed seek operations which went forward in
+ * an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_FORWARD_OPERATIONS
+ = "stream_read_seek_forward_operations";
+
+ /**
+ * Count of times the seek policy was dynamically changed
+ * in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_POLICY_CHANGED =
+ "stream_read_seek_policy_changed";
+
+ /**
+ * Count of seek operations in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SEEK_OPERATIONS =
+ "stream_read_seek_operations";
+
+ /**
+ * Count of {@code InputStream.skip()} calls.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SKIP_OPERATIONS =
+ "stream_read_skip_operations";
+
+ /**
+ * Count bytes skipped in {@code InputStream.skip()} calls.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_SKIP_BYTES =
+ "stream_read_skip_bytes";
+
+ /**
+ * Total count of bytes read from an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_TOTAL_BYTES
+ = "stream_read_total_bytes";
+
+ /**
+ * Count of calls of {@code CanUnbuffer.unbuffer()}.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_UNBUFFERED
+ = "stream_read_unbuffered";
+
+ /**
+ * "Count of stream write failures reported.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_EXCEPTIONS =
+ "stream_write_exceptions";
+
+ /**
+ * Count of failures when finalizing a multipart upload:
+ * {@value}.
+ */
+ public static final String STREAM_WRITE_EXCEPTIONS_COMPLETING_UPLOADS =
+ "stream_write_exceptions_completing_upload";
+
+ /**
+ * Count of block/partition uploads complete.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS
+ = "stream_write_block_uploads";
+
+ /**
+ * Count of number of block uploads aborted.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS_ABORTED
+ = "stream_write_block_uploads_aborted";
+
+ /**
+ * Count of block/partition uploads active.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS_ACTIVE
+ = "stream_write_block_uploads_active";
+
+ /**
+ * Gauge of data queued to be written.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING =
+ "stream_write_block_uploads_data_pending";
+
+ /**
+ * Count of number of block uploads committed.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS_COMMITTED
+ = "stream_write_block_uploads_committed";
+
+ /**
+ * Gauge of block/partitions uploads queued to be written.
+ * Value: {@value}.
+ */
+ public static final String STREAM_WRITE_BLOCK_UPLOADS_PENDING
+ = "stream_write_block_uploads_pending";
+
+
+ /**
+ * "Count of bytes written to output stream including all not yet uploaded.
+ * {@value}.
+ */
+ public static final String STREAM_WRITE_BYTES
+ = "stream_write_bytes";
+
+ /**
+ * Count of total time taken for uploads to complete.
+ * {@value}.
+ */
+ public static final String STREAM_WRITE_TOTAL_TIME
+ = "stream_write_total_time";
+
+ /**
+ * Total queue duration of all block uploads.
+ * {@value}.
+ */
+ public static final String STREAM_WRITE_QUEUE_DURATION
+ = "stream_write_queue_duration";
+
+ public static final String STREAM_WRITE_TOTAL_DATA
+ = "stream_write_total_data";
+
+ /**
+ * Number of bytes to upload from an OutputStream.
+ */
+ public static final String BYTES_TO_UPLOAD
+ = "bytes_upload";
+
+ /**
+ * Number of bytes uploaded successfully to the object store.
+ */
+ public static final String BYTES_UPLOAD_SUCCESSFUL
+ = "bytes_upload_successfully";
+
+ /**
+ * Number of bytes failed to upload to the object store.
+ */
+ public static final String BYTES_UPLOAD_FAILED
+ = "bytes_upload_failed";
+
+ /**
+ * Total time spent on waiting for a task to complete.
+ */
+ public static final String TIME_SPENT_ON_TASK_WAIT
+ = "time_spent_task_wait";
+
+ /**
+ * Number of task queue shrunk operations.
+ */
+ public static final String QUEUE_SHRUNK_OPS
+ = "queue_shrunk_ops";
+
+ /**
+ * Number of times current buffer is written to the service.
+ */
+ public static final String WRITE_CURRENT_BUFFER_OPERATIONS
+ = "write_current_buffer_ops";
+
+ /**
+ * Total time spent on completing a PUT request.
+ */
+ public static final String TIME_SPENT_ON_PUT_REQUEST
+ = "time_spent_on_put_request";
+
+ /**
+ * Number of seeks in buffer.
+ */
+ public static final String SEEK_IN_BUFFER
+ = "seek_in_buffer";
+
+ /**
+ * Number of bytes read from the buffer.
+ */
+ public static final String BYTES_READ_BUFFER
+ = "bytes_read_buffer";
+
+ /**
+ * Total number of remote read operations performed.
+ */
+ public static final String REMOTE_READ_OP
+ = "remote_read_op";
+
+ /**
+ * Total number of bytes read from readAhead.
+ */
+ public static final String READ_AHEAD_BYTES_READ
+ = "read_ahead_bytes_read";
+
+ /**
+ * Total number of bytes read from remote operations.
+ */
+ public static final String REMOTE_BYTES_READ
+ = "remote_bytes_read";
+
+ private StreamStatisticNames() {
+ }
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java
new file mode 100644
index 0000000000000..c701a509d8951
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/AbstractIOStatisticsImpl.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics.impl;
+
+import org.apache.hadoop.fs.statistics.IOStatistics;
+
+/**
+ * Base implementation in case common methods/fields need to be added
+ * in future.
+ */
+public abstract class AbstractIOStatisticsImpl implements IOStatistics {
+
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java
new file mode 100644
index 0000000000000..50c2625c3513d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/DynamicIOStatistics.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics.impl;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.function.Function;
+
+import org.apache.hadoop.fs.statistics.MeanStatistic;
+
+/**
+ * These statistics are dynamically evaluated by the supplied
+ * String -> type functions.
+ *
+ * This allows statistic sources to supply a list of callbacks used to
+ * generate the statistics on demand; similar to some of the Coda Hale metrics.
+ *
+ * The evaluation actually takes place during the iteration's {@code next()}
+ * call.
+ */
+final class DynamicIOStatistics
+ extends AbstractIOStatisticsImpl {
+
+ /**
+ * Counter evaluators.
+ */
+ private final EvaluatingStatisticsMap
+ *
+ *
+ *
+ *
+ */
+
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+package org.apache.hadoop.fs.statistics;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
index fd7b5619b274a..79c323aa35a2b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
@@ -630,10 +630,9 @@ protected InodeTree(final Configuration config, final String viewName,
.append(theUri.getScheme()).append("://").append(mountTableName)
.append("/").toString());
}
- StringBuilder msg =
- new StringBuilder("Empty mount table detected for ").append(theUri)
- .append(" and considering itself as a linkFallback.");
- FileSystem.LOG.info(msg.toString());
+ FileSystem.LOG
+ .info("Empty mount table detected for {} and considering itself "
+ + "as a linkFallback.", theUri);
rootFallbackLink =
new INodeLinkutf8 is
+ * zero, actually clear {@link #bytes} and any existing
+ * data is lost.
*/
public void set(byte[] utf8) {
- set(utf8, 0, utf8.length);
+ if (utf8.length == 0) {
+ bytes = EMPTY_BYTES;
+ length = 0;
+ textLength = -1;
+ } else {
+ set(utf8, 0, utf8.length);
+ }
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java
index 2dfa30bf76ec4..55bb132e9c87c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionInputStream.java
@@ -25,6 +25,10 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsSource;
+import org.apache.hadoop.fs.statistics.IOStatisticsSupport;
+
/**
* A compression input stream.
*
@@ -34,7 +38,8 @@
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
-public abstract class CompressionInputStream extends InputStream implements Seekable {
+public abstract class CompressionInputStream extends InputStream
+ implements Seekable, IOStatisticsSource {
/**
* The input stream to be compressed.
*/
@@ -68,7 +73,16 @@ public void close() throws IOException {
}
}
}
-
+
+ /**
+ * Return any IOStatistics provided by the underlying stream.
+ * @return IO stats from the inner stream.
+ */
+ @Override
+ public IOStatistics getIOStatistics() {
+ return IOStatisticsSupport.retrieveIOStatistics(in);
+ }
+
/**
* Read bytes from the stream.
* Made abstract to prevent leakage to underlying stream.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
index 71c7f32e665e5..2a11ace81702c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionOutputStream.java
@@ -23,13 +23,17 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsSource;
+import org.apache.hadoop.fs.statistics.IOStatisticsSupport;
/**
* A compression output stream.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
-public abstract class CompressionOutputStream extends OutputStream {
+public abstract class CompressionOutputStream extends OutputStream
+ implements IOStatisticsSource {
/**
* The output stream to be compressed.
*/
@@ -94,4 +98,12 @@ public void flush() throws IOException {
*/
public abstract void resetState() throws IOException;
+ /**
+ * Return any IOStatistics provided by the underlying stream.
+ * @return IO stats from the inner stream.
+ */
+ @Override
+ public IOStatistics getIOStatistics() {
+ return IOStatisticsSupport.retrieveIOStatistics(out);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java
new file mode 100644
index 0000000000000..9597058b6fe74
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/DecodingValidator.java
@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.io.erasurecode.ECChunk;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * A utility class to validate decoding.
+ */
+@InterfaceAudience.Private
+public class DecodingValidator {
+
+ private final RawErasureDecoder decoder;
+ private ByteBuffer buffer;
+ private int[] newValidIndexes;
+ private int newErasedIndex;
+
+ public DecodingValidator(RawErasureDecoder decoder) {
+ this.decoder = decoder;
+ }
+
+ /**
+ * Validate outputs decoded from inputs, by decoding an input back from
+ * the outputs and comparing it with the original one.
+ *
+ * For instance, in RS (6, 3), let (d0, d1, d2, d3, d4, d5) be sources
+ * and (p0, p1, p2) be parities, and assume
+ * inputs = [d0, null (d1), d2, d3, d4, d5, null (p0), p1, null (p2)];
+ * erasedIndexes = [1, 6];
+ * outputs = [d1, p0].
+ * Then
+ * 1. Create new inputs, erasedIndexes and outputs for validation so that
+ * the inputs could contain the decoded outputs, and decode them:
+ * newInputs = [d1, d2, d3, d4, d5, p0]
+ * newErasedIndexes = [0]
+ * newOutputs = [d0']
+ * 2. Compare d0 and d0'. The comparison will fail with high probability
+ * when the initial outputs are wrong.
+ *
+ * Note that the input buffers' positions must be the ones where data are
+ * read: If the input buffers have been processed by a decoder, the buffers'
+ * positions must be reset before being passed into this method.
+ *
+ * This method does not change outputs and erasedIndexes.
+ *
+ * @param inputs input buffers used for decoding. The buffers' position
+ * are moved to the end after this method.
+ * @param erasedIndexes indexes of erased units used for decoding
+ * @param outputs decoded output buffers, which are ready to be read after
+ * the call
+ * @throws IOException
+ */
+ public void validate(ByteBuffer[] inputs, int[] erasedIndexes,
+ ByteBuffer[] outputs) throws IOException {
+ markBuffers(outputs);
+
+ try {
+ ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs);
+ boolean isDirect = validInput.isDirect();
+ int capacity = validInput.capacity();
+ int remaining = validInput.remaining();
+
+ // Init buffer
+ if (buffer == null || buffer.isDirect() != isDirect
+ || buffer.capacity() < remaining) {
+ buffer = allocateBuffer(isDirect, capacity);
+ }
+ buffer.clear().limit(remaining);
+
+ // Create newInputs and newErasedIndex for validation
+ ByteBuffer[] newInputs = new ByteBuffer[inputs.length];
+ int count = 0;
+ for (int i = 0; i < erasedIndexes.length; i++) {
+ newInputs[erasedIndexes[i]] = outputs[i];
+ count++;
+ }
+ newErasedIndex = -1;
+ boolean selected = false;
+ int numValidIndexes = CoderUtil.getValidIndexes(inputs).length;
+ for (int i = 0; i < newInputs.length; i++) {
+ if (count == numValidIndexes) {
+ break;
+ } else if (!selected && inputs[i] != null) {
+ newErasedIndex = i;
+ newInputs[i] = null;
+ selected = true;
+ } else if (newInputs[i] == null) {
+ newInputs[i] = inputs[i];
+ if (inputs[i] != null) {
+ count++;
+ }
+ }
+ }
+
+ // Keep it for testing
+ newValidIndexes = CoderUtil.getValidIndexes(newInputs);
+
+ decoder.decode(newInputs, new int[]{newErasedIndex},
+ new ByteBuffer[]{buffer});
+
+ if (!buffer.equals(inputs[newErasedIndex])) {
+ throw new InvalidDecodingException("Failed to validate decoding");
+ }
+ } finally {
+ toLimits(inputs);
+ resetBuffers(outputs);
+ }
+ }
+
+ /**
+ * Validate outputs decoded from inputs, by decoding an input back from
+ * those outputs and comparing it with the original one.
+ * @param inputs input buffers used for decoding
+ * @param erasedIndexes indexes of erased units used for decoding
+ * @param outputs decoded output buffers
+ * @throws IOException
+ */
+ public void validate(ECChunk[] inputs, int[] erasedIndexes, ECChunk[] outputs)
+ throws IOException {
+ ByteBuffer[] newInputs = CoderUtil.toBuffers(inputs);
+ ByteBuffer[] newOutputs = CoderUtil.toBuffers(outputs);
+ validate(newInputs, erasedIndexes, newOutputs);
+ }
+
+ private ByteBuffer allocateBuffer(boolean direct, int capacity) {
+ if (direct) {
+ buffer = ByteBuffer.allocateDirect(capacity);
+ } else {
+ buffer = ByteBuffer.allocate(capacity);
+ }
+ return buffer;
+ }
+
+ private static void markBuffers(ByteBuffer[] buffers) {
+ for (ByteBuffer buffer: buffers) {
+ if (buffer != null) {
+ buffer.mark();
+ }
+ }
+ }
+
+ private static void resetBuffers(ByteBuffer[] buffers) {
+ for (ByteBuffer buffer: buffers) {
+ if (buffer != null) {
+ buffer.reset();
+ }
+ }
+ }
+
+ private static void toLimits(ByteBuffer[] buffers) {
+ for (ByteBuffer buffer: buffers) {
+ if (buffer != null) {
+ buffer.position(buffer.limit());
+ }
+ }
+ }
+
+ @VisibleForTesting
+ protected int[] getNewValidIndexes() {
+ return newValidIndexes;
+ }
+
+ @VisibleForTesting
+ protected int getNewErasedIndex() {
+ return newErasedIndex;
+ }
+}
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java
new file mode 100644
index 0000000000000..37869f8eeded0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/InvalidDecodingException.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.io.IOException;
+
+/**
+ * Thrown for invalid decoding.
+ */
+@InterfaceAudience.Private
+public class InvalidDecodingException
+ extends IOException {
+ private static final long serialVersionUID = 0L;
+
+ public InvalidDecodingException(String description) {
+ super(description);
+ }
+}
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 4a0b5aec40481..abdc81bb87ad5 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -54,8 +54,8 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.concurrent.AsyncGet;
-import org.apache.htrace.core.Span;
-import org.apache.htrace.core.Tracer;
+import org.apache.hadoop.tracing.Span;
+import org.apache.hadoop.tracing.Tracer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
index b7b7ad4db65cd..a1500d52a7444 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
@@ -39,8 +39,8 @@
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.concurrent.AsyncGet;
-import org.apache.htrace.core.TraceScope;
-import org.apache.htrace.core.Tracer;
+import org.apache.hadoop.tracing.TraceScope;
+import org.apache.hadoop.tracing.Tracer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
index 5043051ce0a2c..310f44eebe213 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
@@ -35,8 +35,8 @@
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.concurrent.AsyncGet;
-import org.apache.htrace.core.TraceScope;
-import org.apache.htrace.core.Tracer;
+import org.apache.hadoop.tracing.Tracer;
+import org.apache.hadoop.tracing.TraceScope;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index 9be4ff2e930e7..d37e4a1b24b6a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -99,6 +99,7 @@
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto.SaslAuth;
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcSaslProto.SaslState;
+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RPCTraceInfoProto;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.SaslPropertiesResolver;
@@ -118,10 +119,11 @@
import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
-import org.apache.htrace.core.SpanId;
-import org.apache.htrace.core.TraceScope;
-import org.apache.htrace.core.Tracer;
-
+import org.apache.hadoop.tracing.Span;
+import org.apache.hadoop.tracing.SpanContext;
+import org.apache.hadoop.tracing.TraceScope;
+import org.apache.hadoop.tracing.Tracer;
+import org.apache.hadoop.tracing.TraceUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.protobuf.ByteString;
@@ -783,7 +785,7 @@ public static class Call implements Schedulable,
private AtomicInteger responseWaitCount = new AtomicInteger(1);
final RPC.RpcKind rpcKind;
final byte[] clientId;
- private final TraceScope traceScope; // the HTrace scope on the server side
+ private final Span span; // the trace span on the server side
private final CallerContext callerContext; // the call context
private boolean deferredResponse = false;
private int priorityLevel;
@@ -798,7 +800,7 @@ public static class Call implements Schedulable,
Call(Call call) {
this(call.callId, call.retryCount, call.rpcKind, call.clientId,
- call.traceScope, call.callerContext);
+ call.span, call.callerContext);
}
Call(int id, int retryCount, RPC.RpcKind kind, byte[] clientId) {
@@ -812,14 +814,14 @@ public Call(int id, int retryCount, Void ignore1, Void ignore2,
}
Call(int id, int retryCount, RPC.RpcKind kind, byte[] clientId,
- TraceScope traceScope, CallerContext callerContext) {
+ Span span, CallerContext callerContext) {
this.callId = id;
this.retryCount = retryCount;
this.timestampNanos = Time.monotonicNowNanos();
this.responseTimestampNanos = timestampNanos;
this.rpcKind = kind;
this.clientId = clientId;
- this.traceScope = traceScope;
+ this.span = span;
this.callerContext = callerContext;
this.clientStateId = Long.MIN_VALUE;
this.isCallCoordinated = false;
@@ -988,8 +990,8 @@ private class RpcCall extends Call {
RpcCall(Connection connection, int id, int retryCount,
Writable param, RPC.RpcKind kind, byte[] clientId,
- TraceScope traceScope, CallerContext context) {
- super(id, retryCount, kind, clientId, traceScope, context);
+ Span span, CallerContext context) {
+ super(id, retryCount, kind, clientId, span, context);
this.connection = connection;
this.rpcRequest = param;
}
@@ -2672,19 +2674,24 @@ private void processRpcRequest(RpcRequestHeaderProto header,
throw new FatalRpcServerException(
RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err);
}
-
- TraceScope traceScope = null;
+
+ Span span = null;
if (header.hasTraceInfo()) {
- if (tracer != null) {
- // If the incoming RPC included tracing info, always continue the
- // trace
- SpanId parentSpanId = new SpanId(
- header.getTraceInfo().getTraceId(),
- header.getTraceInfo().getParentId());
- traceScope = tracer.newScope(
- RpcClientUtil.toTraceName(rpcRequest.toString()),
- parentSpanId);
- traceScope.detach();
+ RPCTraceInfoProto traceInfoProto = header.getTraceInfo();
+ if (traceInfoProto.hasSpanContext()) {
+ if (tracer == null) {
+ setTracer(Tracer.curThreadTracer());
+ }
+ if (tracer != null) {
+ // If the incoming RPC included tracing info, always continue the
+ // trace
+ SpanContext spanCtx = TraceUtils.byteStringToSpanContext(
+ traceInfoProto.getSpanContext());
+ if (spanCtx != null) {
+ span = tracer.newSpan(
+ RpcClientUtil.toTraceName(rpcRequest.toString()), spanCtx);
+ }
+ }
}
}
@@ -2700,7 +2707,7 @@ private void processRpcRequest(RpcRequestHeaderProto header,
RpcCall call = new RpcCall(this, header.getCallId(),
header.getRetryCount(), rpcRequest,
ProtoUtil.convert(header.getRpcKind()),
- header.getClientId().toByteArray(), traceScope, callerContext);
+ header.getClientId().toByteArray(), span, callerContext);
// Save the priority level assignment by the scheduler
call.setPriorityLevel(callQueue.getPriorityLevel(call));
@@ -2953,10 +2960,9 @@ public void run() {
LOG.debug(Thread.currentThread().getName() + ": " + call + " for RpcKind " + call.rpcKind);
}
CurCall.set(call);
- if (call.traceScope != null) {
- call.traceScope.reattach();
- traceScope = call.traceScope;
- traceScope.getSpan().addTimelineAnnotation("called");
+ if (call.span != null) {
+ traceScope = tracer.activateSpan(call.span);
+ call.span.addTimelineAnnotation("called");
}
// always update the current call context
CallerContext.setCurrent(call.callerContext);
@@ -2971,14 +2977,14 @@ public void run() {
if (running) { // unexpected -- log it
LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e);
if (traceScope != null) {
- traceScope.getSpan().addTimelineAnnotation("unexpectedly interrupted: " +
+ traceScope.addTimelineAnnotation("unexpectedly interrupted: " +
StringUtils.stringifyException(e));
}
}
} catch (Exception e) {
LOG.info(Thread.currentThread().getName() + " caught an exception", e);
if (traceScope != null) {
- traceScope.getSpan().addTimelineAnnotation("Exception: " +
+ traceScope.addTimelineAnnotation("Exception: " +
StringUtils.stringifyException(e));
}
} finally {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
index b303f8494b63c..d790e49f5dcf2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
@@ -39,8 +39,8 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.*;
-import org.apache.htrace.core.TraceScope;
-import org.apache.htrace.core.Tracer;
+import org.apache.hadoop.tracing.TraceScope;
+import org.apache.hadoop.tracing.Tracer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetric.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetric.java
index 58d79cbdee801..ed41ccac7278d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetric.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetric.java
@@ -56,7 +56,7 @@ public void snapshot(MetricsRecordBuilder builder) {
protected void clearChanged() { changed = false; }
/**
- * @return true if metric is changed since last snapshot/snapshot
+ * @return true if metric is changed since last snapshot
*/
public boolean changed() { return changed; }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
index 207916589f694..7795343de3c20 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
@@ -163,6 +163,7 @@ private synchronized MutableRate addMetricIfNotExists(String name) {
MutableRate metric = globalMetrics.get(name);
if (metric == null) {
metric = new MutableRate(name + typePrefix, name + typePrefix, false);
+ metric.setUpdateTimeStamp(true);
globalMetrics.put(name, metric);
}
return metric;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java
index e6111e36bb76d..17233629c7d26 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRollingAverages.java
@@ -41,6 +41,7 @@
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.util.Time;
import javax.annotation.Nullable;
@@ -77,13 +78,26 @@ public class MutableRollingAverages extends MutableMetric implements Closeable {
private final String avgInfoDescTemplate;
private int numWindows;
+ /**
+ * This class maintains sub-sum and sub-total of SampleStat.
+ */
private static class SumAndCount {
private final double sum;
private final long count;
-
- SumAndCount(final double sum, final long count) {
+ private final long snapshotTimeStamp;
+
+ /**
+ * Constructor for {@link SumAndCount}.
+ *
+ * @param sum sub-sum in sliding windows
+ * @param count sub-total in sliding windows
+ * @param snapshotTimeStamp when is a new SampleStat snapshot.
+ */
+ SumAndCount(final double sum, final long count,
+ final long snapshotTimeStamp) {
this.sum = sum;
this.count = count;
+ this.snapshotTimeStamp = snapshotTimeStamp;
}
public double getSum() {
@@ -93,6 +107,10 @@ public double getSum() {
public long getCount() {
return count;
}
+
+ public long getSnapshotTimeStamp() {
+ return snapshotTimeStamp;
+ }
}
/**
@@ -110,6 +128,16 @@ public long getCount() {
private static final long WINDOW_SIZE_MS_DEFAULT = 300_000;
private static final int NUM_WINDOWS_DEFAULT = 36;
+ /**
+ * Time duration after which a record is considered stale.
+ * {@link MutableRollingAverages} should be time-sensitive, and it should use
+ * the time window length(i.e. NUM_WINDOWS_DEFAULT * WINDOW_SIZE_MS_DEFAULT)
+ * as the valid time to make sure some too old record won't be use to compute
+ * average.
+ */
+ private long recordValidityMs =
+ NUM_WINDOWS_DEFAULT * WINDOW_SIZE_MS_DEFAULT;
+
/**
* Constructor for {@link MutableRollingAverages}.
* @param metricValueName
@@ -231,7 +259,8 @@ public LinkedBlockingDeque> secondarySort,
boolean nonDataNodeReader) {
/** Sort weights for the nodes array */
- int[] weights = new int[activeLen];
- for (int i=0; i
X509KeyManager that exposes a method,
+ * {@link #loadFrom(Path)} to reload its configuration. Note that it is necessary
+ * to implement the X509ExtendedKeyManager to properly delegate
+ * the additional methods, otherwise the SSL handshake will fail.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class ReloadingX509KeystoreManager extends X509ExtendedKeyManager {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ReloadingX509TrustManager.class);
+
+ static final String RELOAD_ERROR_MESSAGE =
+ "Could not load keystore (keep using existing one) : ";
+
+ final private String type;
+ final private String storePassword;
+ final private String keyPassword;
+ private AtomicReferenceReloading509KeystoreManager
+ *
+ * @param type type of keystore file, typically 'jks'.
+ * @param location local path to the keystore file.
+ * @param storePassword password of the keystore file.
+ * @param keyPassword The password of the key.
+ * @throws IOException
+ * @throws GeneralSecurityException
+ */
+ public ReloadingX509KeystoreManager(String type, String location,
+ String storePassword, String keyPassword)
+ throws IOException, GeneralSecurityException {
+ this.type = type;
+ this.storePassword = storePassword;
+ this.keyPassword = keyPassword;
+ keyManagerRef = new AtomicReference