diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index c2d5b510de2a3..4738ccf2bba24 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -24,8 +24,8 @@ pool:
variables:
MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository
MAVEN_OPTS: '-Dmaven.repo.local=$(MAVEN_CACHE_FOLDER) -Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true'
- SPARK_VERSION: '2.4.4'
- HADOOP_VERSION: '2.7'
+ SPARK_VERSION: '3.2.1'
+ HADOOP_VERSION: '3.2'
SPARK_ARCHIVE: spark-$(SPARK_VERSION)-bin-hadoop$(HADOOP_VERSION)
stages:
@@ -48,7 +48,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'install'
- options: -T 2.5C -DskipTests
+ options: -T 2.5C -Dspark3 -DskipTests
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -57,7 +57,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Punit-tests -pl hudi-common,hudi-flink,hudi-client/hudi-spark-client
+ options: -Punit-tests,spark3 -pl hudi-common,hudi-flink,hudi-client/hudi-spark-client
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -66,7 +66,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Pfunctional-tests -pl hudi-common,hudi-flink
+ options: -Pfunctional-tests,spark3 -pl hudi-common,hudi-flink
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -87,7 +87,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'install'
- options: -T 2.5C -DskipTests
+ options: -T 2.5C -Dspark3 -DskipTests
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -96,7 +96,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Pfunctional-tests -pl hudi-client/hudi-spark-client
+ options: -Pfunctional-tests,spark3 -pl hudi-client/hudi-spark-client
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -117,7 +117,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'install'
- options: -T 2.5C -DskipTests
+ options: -T 2.5C -Dspark3 -DskipTests
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -126,7 +126,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Punit-tests -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
+ options: -Punit-tests,spark3 -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -135,7 +135,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Pfunctional-tests -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
+ options: -Pfunctional-tests,spark3 -pl hudi-client/hudi-client-common,hudi-client/hudi-flink-client,hudi-client/hudi-java-client,hudi-cli,hudi-utilities,hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -156,7 +156,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'install'
- options: -T 2.5C -DskipTests
+ options: -T 2.5C -Dspark3 -DskipTests
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -165,7 +165,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Punit-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
+ options: -Punit-tests,spark3 -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -174,7 +174,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
- options: -Pfunctional-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
+ options: -Pfunctional-tests,spark3 -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
@@ -194,5 +194,5 @@ stages:
tar -xvf $(Pipeline.Workspace)/$(SPARK_ARCHIVE).tgz -C $(Pipeline.Workspace)/
mkdir /tmp/spark-events/
- script: |
- mvn $(MAVEN_OPTS) -Pintegration-tests verify
+ mvn $(MAVEN_OPTS) -Pintegration-tests,spark3 verify
displayName: IT
diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml
index 3c1acbdfe7714..086004f121e97 100644
--- a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml
+++ b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml
@@ -184,7 +184,7 @@ services:
presto-coordinator-1:
container_name: presto-coordinator-1
hostname: presto-coordinator-1
- image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
+ image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
ports:
- '8090:8090'
environment:
@@ -201,25 +201,25 @@ services:
command: coordinator
presto-worker-1:
- container_name: presto-worker-1
- hostname: presto-worker-1
- image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
- depends_on: ["presto-coordinator-1"]
- environment:
- - PRESTO_JVM_MAX_HEAP=512M
- - PRESTO_QUERY_MAX_MEMORY=1GB
- - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- - TERM=xterm
- links:
- - "hivemetastore"
- - "hiveserver"
- - "hive-metastore-postgresql"
- - "namenode"
- volumes:
- - ${HUDI_WS}:/var/hoodie/ws
- command: worker
+ container_name: presto-worker-1
+ hostname: presto-worker-1
+ image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
+ depends_on: [ "presto-coordinator-1" ]
+ environment:
+ - PRESTO_JVM_MAX_HEAP=512M
+ - PRESTO_QUERY_MAX_MEMORY=1GB
+ - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
+ - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
+ - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
+ - TERM=xterm
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+ command: worker
trino-coordinator-1:
container_name: trino-coordinator-1
diff --git a/docker/compose/docker-compose_hadoop310_hive312_spark321.yml b/docker/compose/docker-compose_hadoop310_hive312_spark321.yml
new file mode 100644
index 0000000000000..c7a6e6d966f7e
--- /dev/null
+++ b/docker/compose/docker-compose_hadoop310_hive312_spark321.yml
@@ -0,0 +1,310 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+version: "3.3"
+
+services:
+
+ namenode:
+ image: apachehudi/hudi-hadoop_3.1.0-namenode:latest
+ hostname: namenode
+ container_name: namenode
+ environment:
+ - CLUSTER_NAME=hudi_hadoop310_hive312_spark321
+ ports:
+ - "9870:9870"
+ - "8020:8020"
+ env_file:
+ - ./hadoop.env
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://namenode:9870"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+
+ datanode1:
+ image: apachehudi/hudi-hadoop_3.1.0-datanode:latest
+ container_name: datanode1
+ hostname: datanode1
+ environment:
+ - CLUSTER_NAME=hudi_hadoop310_hive312_spark321
+ env_file:
+ - ./hadoop.env
+ ports:
+ - "50075:50075"
+ - "50010:50010"
+ links:
+ - "namenode"
+ - "historyserver"
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://datanode1:50075"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ depends_on:
+ - namenode
+
+ historyserver:
+ image: apachehudi/hudi-hadoop_3.1.0-history:latest
+ hostname: historyserver
+ container_name: historyserver
+ environment:
+ - CLUSTER_NAME=hudi_hadoop310_hive312_spark321
+ depends_on:
+ - "namenode"
+ links:
+ - "namenode"
+ ports:
+ - "58188:8188"
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://historyserver:8188"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ env_file:
+ - ./hadoop.env
+ volumes:
+ - historyserver:/hadoop/yarn/timeline
+
+ hive-metastore-postgresql:
+ image: bde2020/hive-metastore-postgresql:3.1.0
+ volumes:
+ - hive-metastore-postgresql:/var/lib/postgresql
+ hostname: hive-metastore-postgresql
+ container_name: hive-metastore-postgresql
+
+ hivemetastore:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2:latest
+ hostname: hivemetastore
+ container_name: hivemetastore
+ links:
+ - "hive-metastore-postgresql"
+ - "namenode"
+ env_file:
+ - ./hadoop.env
+ command: /opt/hive/bin/hive --service metastore
+ environment:
+ SERVICE_PRECONDITION: "namenode:9870 hive-metastore-postgresql:5432"
+ ports:
+ - "9083:9083"
+ healthcheck:
+ test: ["CMD", "nc", "-z", "hivemetastore", "9083"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ depends_on:
+ - "hive-metastore-postgresql"
+ - "namenode"
+
+ hiveserver:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2:latest
+ hostname: hiveserver
+ container_name: hiveserver
+ env_file:
+ - ./hadoop.env
+ environment:
+ SERVICE_PRECONDITION: "hivemetastore:9083"
+ ports:
+ - "10000:10000"
+ depends_on:
+ - "hivemetastore"
+ links:
+ - "hivemetastore"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+
+ sparkmaster:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkmaster_3.2.1:latest
+ hostname: sparkmaster
+ container_name: sparkmaster
+ env_file:
+ - ./hadoop.env
+ ports:
+ - "8080:8080"
+ - "7077:7077"
+ environment:
+ - INIT_DAEMON_STEP=setup_spark
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+
+ spark-worker-1:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkworker_3.2.1:latest
+ hostname: spark-worker-1
+ container_name: spark-worker-1
+ env_file:
+ - ./hadoop.env
+ depends_on:
+ - sparkmaster
+ ports:
+ - "8081:8081"
+ environment:
+ - "SPARK_MASTER=spark://sparkmaster:7077"
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+
+ zookeeper:
+ image: 'bitnami/zookeeper:3.4.12-r68'
+ hostname: zookeeper
+ container_name: zookeeper
+ ports:
+ - '2181:2181'
+ environment:
+ - ALLOW_ANONYMOUS_LOGIN=yes
+
+ kafka:
+ image: 'bitnami/kafka:2.0.0'
+ hostname: kafkabroker
+ container_name: kafkabroker
+ ports:
+ - '9092:9092'
+ environment:
+ - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
+ - ALLOW_PLAINTEXT_LISTENER=yes
+
+ presto-coordinator-1:
+ container_name: presto-coordinator-1
+ hostname: presto-coordinator-1
+ image: apachehudi/hudi-hadoop_3.1.0-prestobase_0.271:latest
+ ports:
+ - '8090:8090'
+ environment:
+ - PRESTO_JVM_MAX_HEAP=512M
+ - PRESTO_QUERY_MAX_MEMORY=1GB
+ - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
+ - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
+ - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
+ - TERM=xterm
+ links:
+ - "hivemetastore"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+ command: coordinator
+
+ presto-worker-1:
+ container_name: presto-worker-1
+ hostname: presto-worker-1
+ image: apachehudi/hudi-hadoop_3.1.0-prestobase_0.271:latest
+ depends_on: ["presto-coordinator-1"]
+ environment:
+ - PRESTO_JVM_MAX_HEAP=512M
+ - PRESTO_QUERY_MAX_MEMORY=1GB
+ - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
+ - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
+ - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
+ - TERM=xterm
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+ command: worker
+
+ trino-coordinator-1:
+ container_name: trino-coordinator-1
+ hostname: trino-coordinator-1
+ image: apachehudi/hudi-hadoop_3.1.0-trinocoordinator_368:latest
+ ports:
+ - '8091:8091'
+ links:
+ - "hivemetastore"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+ command: http://trino-coordinator-1:8091 trino-coordinator-1
+
+ trino-worker-1:
+ container_name: trino-worker-1
+ hostname: trino-worker-1
+ image: apachehudi/hudi-hadoop_3.1.0-trinoworker_368:latest
+ depends_on: [ "trino-coordinator-1" ]
+ ports:
+ - '8092:8092'
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+ command: http://trino-coordinator-1:8091 trino-worker-1
+
+ graphite:
+ container_name: graphite
+ hostname: graphite
+ image: graphiteapp/graphite-statsd
+ ports:
+ - 80:80
+ - 2003-2004:2003-2004
+ - 8126:8126
+
+ adhoc-1:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkadhoc_3.2.1:latest
+ hostname: adhoc-1
+ container_name: adhoc-1
+ env_file:
+ - ./hadoop.env
+ depends_on:
+ - sparkmaster
+ ports:
+ - '4040:4040'
+ environment:
+ - "SPARK_MASTER=spark://sparkmaster:7077"
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ - "presto-coordinator-1"
+ - "trino-coordinator-1"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+
+ adhoc-2:
+ image: apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkadhoc_3.2.1:latest
+ hostname: adhoc-2
+ container_name: adhoc-2
+ env_file:
+ - ./hadoop.env
+ depends_on:
+ - sparkmaster
+ environment:
+ - "SPARK_MASTER=spark://sparkmaster:7077"
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ - "presto-coordinator-1"
+ - "trino-coordinator-1"
+ volumes:
+ - ${HUDI_WS}:/var/hoodie/ws
+
+volumes:
+ namenode:
+ historyserver:
+ hive-metastore-postgresql:
+
+networks:
+ default:
+ name: rahil-test
diff --git a/docker/compose/hadoop.env b/docker/compose/hadoop.env
index 4e8a94246baa7..499b863c0cef5 100644
--- a/docker/compose/hadoop.env
+++ b/docker/compose/hadoop.env
@@ -21,6 +21,15 @@ HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083
+HIVE_SITE_CONF_hive_metastore_uri_resolver=org.apache.hudi.hadoop.hive.NoOpMetastoreUriResolverHook
+HIVE_SITE_CONF_hive_metastore_event_db_notification_api_auth=false
+HIVE_SITE_CONF_hive_execution_engine=mr
+HIVE_SITE_CONF_hive_metastore_schema_verification=false
+HIVE_SITE_CONF_hive_metastore_schema_verification_record_version=false
+HIVE_SITE_CONF_hive_vectorized_execution_enabled=false
+
+MAPRED_CONF_mapreduce_map_java_opts=-Xmx1024M
+MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx2048M
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
HDFS_CONF_dfs_webhdfs_enabled=true
diff --git a/docker/demo/config/log4j.properties b/docker/demo/config/log4j.properties
index df8ad3d15e07e..46b6bf5ecf0c6 100644
--- a/docker/demo/config/log4j.properties
+++ b/docker/demo/config/log4j.properties
@@ -25,8 +25,10 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=WARN
-# Set logging of integration testsuite to INFO level
+# Adjust Hudi internal logging levels
+log4j.logger.org.apache.hudi=DEBUG
log4j.logger.org.apache.hudi.integ.testsuite=INFO
+log4j.logger.org.apache.hudi.org.eclipse.jetty=ERROR
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
diff --git a/docker/hoodie/hadoop/base/Dockerfile b/docker/hoodie/hadoop/base/Dockerfile
index 2c98ce6242fb1..ebfb847c91ff0 100644
--- a/docker/hoodie/hadoop/base/Dockerfile
+++ b/docker/hoodie/hadoop/base/Dockerfile
@@ -22,7 +22,7 @@ USER root
# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
@@ -36,7 +36,6 @@ RUN set -x \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* \
&& ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
- && cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
&& mkdir /hadoop-data
ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
diff --git a/docker/hoodie/hadoop/base/entrypoint.sh b/docker/hoodie/hadoop/base/entrypoint.sh
index 7c26f29f66886..7a00ddfb9ddab 100644
--- a/docker/hoodie/hadoop/base/entrypoint.sh
+++ b/docker/hoodie/hadoop/base/entrypoint.sh
@@ -59,6 +59,7 @@ configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF
+configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF
if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"
diff --git a/docker/hoodie/hadoop/base_java11/Dockerfile b/docker/hoodie/hadoop/base_java11/Dockerfile
index 8052eae6add84..c363c00d9569e 100644
--- a/docker/hoodie/hadoop/base_java11/Dockerfile
+++ b/docker/hoodie/hadoop/base_java11/Dockerfile
@@ -22,7 +22,7 @@ USER root
# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
@@ -36,7 +36,6 @@ RUN set -x \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* \
&& ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
- && cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
&& mkdir /hadoop-data
ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
diff --git a/docker/hoodie/hadoop/base_java11/entrypoint.sh b/docker/hoodie/hadoop/base_java11/entrypoint.sh
index 7c26f29f66886..7a00ddfb9ddab 100644
--- a/docker/hoodie/hadoop/base_java11/entrypoint.sh
+++ b/docker/hoodie/hadoop/base_java11/entrypoint.sh
@@ -59,6 +59,7 @@ configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF
+configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF
if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"
diff --git a/docker/hoodie/hadoop/datanode/Dockerfile b/docker/hoodie/hadoop/datanode/Dockerfile
index 79dd798f78d95..ce66ae1b92f5a 100644
--- a/docker/hoodie/hadoop/datanode/Dockerfile
+++ b/docker/hoodie/hadoop/datanode/Dockerfile
@@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG HADOOP_DN_PORT=50075
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
diff --git a/docker/hoodie/hadoop/historyserver/Dockerfile b/docker/hoodie/hadoop/historyserver/Dockerfile
index e08adbb05411d..5af0a31960889 100644
--- a/docker/hoodie/hadoop/historyserver/Dockerfile
+++ b/docker/hoodie/hadoop/historyserver/Dockerfile
@@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG HADOOP_HISTORY_PORT=8188
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
diff --git a/docker/hoodie/hadoop/hive_base/Dockerfile b/docker/hoodie/hadoop/hive_base/Dockerfile
index 7d04d94fc60cc..a91f122beb262 100644
--- a/docker/hoodie/hadoop/hive_base/Dockerfile
+++ b/docker/hoodie/hadoop/hive_base/Dockerfile
@@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
ENV HIVE_HOME /opt/hive
@@ -24,22 +24,22 @@ ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION
WORKDIR /opt
-ARG HIVE_VERSION=2.3.3
+ARG HIVE_VERSION=3.1.2
ARG HIVE_URL=https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
ENV HIVE_VERSION ${HIVE_VERSION}
ENV HIVE_URL ${HIVE_URL}
-#Install Hive MySQL, PostgreSQL JDBC
-RUN echo "Hive URL is :${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \
+# Install Hive MySQL, PostgreSQL JDBC
+RUN echo "Hive URL is: ${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \
tar -xzvf hive.tar.gz && mv *hive*-bin hive && \
ln -s /usr/share/java/mysql-connector-java.jar $HIVE_HOME/lib/mysql-connector-java.jar && \
wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \
rm hive.tar.gz && mkdir -p /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/
-#Spark should be compiled with Hive to be able to use it
+# Spark should be compiled with Hive to be able to use it
#hive-site.xml should be copied to $SPARK_HOME/conf folder
-#Custom configuration goes here
+# Custom configuration goes here
ADD conf/hive-site.xml $HADOOP_CONF_DIR
ADD conf/beeline-log4j2.properties $HIVE_HOME/conf
ADD conf/hive-env.sh $HIVE_HOME/conf
diff --git a/docker/hoodie/hadoop/hive_base/conf/hive-env.sh b/docker/hoodie/hadoop/hive_base/conf/hive-env.sh
index f22407c0c371c..f063beee9ef2e 100644
--- a/docker/hoodie/hadoop/hive_base/conf/hive-env.sh
+++ b/docker/hoodie/hadoop/hive_base/conf/hive-env.sh
@@ -38,8 +38,7 @@
# The heap size of the jvm stared by hive shell script can be controlled via:
#
-# export HADOOP_HEAPSIZE=1024
-#
+export HADOOP_HEAPSIZE=4096
# Larger heap size may be required when running queries over large number of files or partitions.
# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be
# appropriate for hive server (hwi etc).
diff --git a/docker/hoodie/hadoop/hive_base/conf/mapred-site.xml b/docker/hoodie/hadoop/hive_base/conf/mapred-site.xml
new file mode 100644
index 0000000000000..60f393591bab5
--- /dev/null
+++ b/docker/hoodie/hadoop/hive_base/conf/mapred-site.xml
@@ -0,0 +1,18 @@
+
+
+
diff --git a/docker/hoodie/hadoop/hive_base/conf/tez-site.xml b/docker/hoodie/hadoop/hive_base/conf/tez-site.xml
new file mode 100644
index 0000000000000..f4ba9ea9fdb74
--- /dev/null
+++ b/docker/hoodie/hadoop/hive_base/conf/tez-site.xml
@@ -0,0 +1,22 @@
+
+
+
+ tez.lib.uris
+ ${fs.defaultFS}/apps/tez-${TEZ_VERSION}/tez.tar.gz
+
+
diff --git a/docker/hoodie/hadoop/hive_base/startup.sh b/docker/hoodie/hadoop/hive_base/startup.sh
index 3453d96dec635..1a6a37220fafb 100644
--- a/docker/hoodie/hadoop/hive_base/startup.sh
+++ b/docker/hoodie/hadoop/hive_base/startup.sh
@@ -22,5 +22,4 @@ hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse
cd $HIVE_HOME/bin
-export AUX_CLASSPATH=file://${HUDI_HADOOP_BUNDLE}
-./hiveserver2 --hiveconf hive.server2.enable.doAs=false --hiveconf hive.aux.jars.path=file://${HUDI_HADOOP_BUNDLE}
+./hiveserver2 --hiveconf hive.execution.engine=mr --hiveconf hive.server2.enable.doAs=false --hiveconf hive.aux.jars.path=file://${HUDI_HADOOP_BUNDLE}
diff --git a/docker/hoodie/hadoop/namenode/Dockerfile b/docker/hoodie/hadoop/namenode/Dockerfile
index d89c30eff34e3..488e34b02454b 100644
--- a/docker/hoodie/hadoop/namenode/Dockerfile
+++ b/docker/hoodie/hadoop/namenode/Dockerfile
@@ -15,7 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG HADOOP_WEBHDFS_PORT=50070
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml
index de3bd3d57832d..4d7d65190f909 100644
--- a/docker/hoodie/hadoop/pom.xml
+++ b/docker/hoodie/hadoop/pom.xml
@@ -54,10 +54,10 @@
falsetrue
- 2.4.4
- 2.3.3
- 2.8.4
- 0.268
+ 3.2.1
+ 3.1.2
+ 3.1.0
+ 0.2713681.4.13true
diff --git a/docker/hoodie/hadoop/prestobase/Dockerfile b/docker/hoodie/hadoop/prestobase/Dockerfile
index 12b644aa06314..e5124fa0e5bb3 100644
--- a/docker/hoodie/hadoop/prestobase/Dockerfile
+++ b/docker/hoodie/hadoop/prestobase/Dockerfile
@@ -18,11 +18,11 @@
## Presto docker setup is based on https://github.com/smizy/docker-presto
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest as hadoop-base
-ARG PRESTO_VERSION=0.268
+ARG PRESTO_VERSION=0.271
ENV PRESTO_VERSION ${PRESTO_VERSION}
ENV PRESTO_HOME /opt/presto-server-${PRESTO_VERSION}
@@ -79,6 +79,13 @@ RUN chmod +x /usr/local/bin/entrypoint.sh
ADD target/ /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/
ENV HUDI_PRESTO_BUNDLE /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/hudi-presto-bundle.jar
RUN cp ${HUDI_PRESTO_BUNDLE} ${PRESTO_HOME}/plugin/hive-hadoop2/
+# TODO: the latest master of Presto relies on hudi-presto-bundle instead of the hudi-common
+# and hudi-hadoop-mr. To get around the conflicts due to older Hudi jars below, they are
+# removed for integration tests, so the hudi-presto-bundle build can be used solely for testing.
+# This temporary logic must be removed once Presto has a new release depending on
+# hudi-presto-bundle and we upgrade docker setup to that release version.
+RUN rm ${PRESTO_HOME}/plugin/hive-hadoop2/hudi-common-*
+RUN rm ${PRESTO_HOME}/plugin/hive-hadoop2/hudi-hadoop-mr-*
VOLUME ["${PRESTO_LOG_DIR}"]
diff --git a/docker/hoodie/hadoop/rahil.sh b/docker/hoodie/hadoop/rahil.sh
new file mode 100644
index 0000000000000..d46fd379a8470
--- /dev/null
+++ b/docker/hoodie/hadoop/rahil.sh
@@ -0,0 +1,19 @@
+docker build base -t apachehudi/hudi-hadoop_3.1.0-base
+docker build namenode -t apachehudi/hudi-hadoop_3.1.0-namenode
+docker build datanode -t apachehudi/hudi-hadoop_3.1.0-datanode
+docker build historyserver -t apachehudi/hudi-hadoop_3.1.0-history
+
+docker build hive_base -t apachehudi/hudi-hadoop_3.1.0-hive_3.1.2
+
+docker build spark_base -t apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkbase_3.2.1
+docker build sparkmaster -t apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkmaster_3.2.1
+docker build sparkadhoc -t apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkadhoc_3.2.1
+docker build sparkworker -t apachehudi/hudi-hadoop_3.1.0-hive_3.1.2-sparkworker_3.2.1
+
+
+docker build prestobase -t apachehudi/hudi-hadoop_3.1.0-prestobase_0.271
+
+docker build base_java11 -t apachehudi/hudi-hadoop_3.1.0-base-java11
+docker build trinobase -t apachehudi/hudi-hadoop_3.1.0-trinobase_368
+docker build trinocoordinator -t apachehudi/hudi-hadoop_3.1.0-trinocoordinator_368
+docker build trinoworker -t apachehudi/hudi-hadoop_3.1.0-trinoworker_368
diff --git a/docker/hoodie/hadoop/spark_base/Dockerfile b/docker/hoodie/hadoop/spark_base/Dockerfile
index 7eeab093a930d..25f55a55a50bc 100644
--- a/docker/hoodie/hadoop/spark_base/Dockerfile
+++ b/docker/hoodie/hadoop/spark_base/Dockerfile
@@ -15,16 +15,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}
ENV ENABLE_INIT_DAEMON true
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
ENV INIT_DAEMON_STEP spark_master_init
-ARG SPARK_VERSION=2.4.4
-ARG SPARK_HADOOP_VERSION=2.7
+ARG SPARK_VERSION=3.2.1
+ARG SPARK_HADOOP_VERSION=3.2
ENV SPARK_VERSION ${SPARK_VERSION}
ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION}
@@ -34,7 +34,7 @@ COPY execute-step.sh /
COPY finish-step.sh /
RUN echo "Installing Spark-version (${SPARK_VERSION})" \
- && wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
+ && wget http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /opt/spark \
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile
index 9e5a4cb68332b..6e8d369668b4e 100644
--- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile
+++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile
@@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
-ARG SPARK_VERSION=2.4.4
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
+ARG SPARK_VERSION=3.2.1
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}
ARG PRESTO_VERSION=0.268
diff --git a/docker/hoodie/hadoop/sparkmaster/Dockerfile b/docker/hoodie/hadoop/sparkmaster/Dockerfile
index aaeb03f39d09b..fddf1082cfefb 100644
--- a/docker/hoodie/hadoop/sparkmaster/Dockerfile
+++ b/docker/hoodie/hadoop/sparkmaster/Dockerfile
@@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
-ARG SPARK_VERSION=2.4.4
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
+ARG SPARK_VERSION=3.2.1
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}
COPY master.sh /opt/spark
diff --git a/docker/hoodie/hadoop/sparkworker/Dockerfile b/docker/hoodie/hadoop/sparkworker/Dockerfile
index ba867f2d32924..4bfe202c0e4b9 100644
--- a/docker/hoodie/hadoop/sparkworker/Dockerfile
+++ b/docker/hoodie/hadoop/sparkworker/Dockerfile
@@ -15,9 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
-ARG SPARK_VERSION=2.4.4
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
+ARG SPARK_VERSION=3.2.1
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}
COPY worker.sh /opt/spark
diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile b/docker/hoodie/hadoop/trinobase/Dockerfile
index 9d7c23010fbb8..c1f57f15d2179 100644
--- a/docker/hoodie/hadoop/trinobase/Dockerfile
+++ b/docker/hoodie/hadoop/trinobase/Dockerfile
@@ -18,8 +18,8 @@
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
-ARG HADOOP_VERSION=2.8.4
-ARG HIVE_VERSION=2.3.3
+ARG HADOOP_VERSION=3.1.0
+ARG HIVE_VERSION=3.1.2
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base
ENV TRINO_VERSION=368
diff --git a/docker/hoodie/hadoop/trinocoordinator/Dockerfile b/docker/hoodie/hadoop/trinocoordinator/Dockerfile
index 67a31448d7a65..111bf8a85697d 100644
--- a/docker/hoodie/hadoop/trinocoordinator/Dockerfile
+++ b/docker/hoodie/hadoop/trinocoordinator/Dockerfile
@@ -18,7 +18,7 @@
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG TRINO_VERSION=368
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base
diff --git a/docker/hoodie/hadoop/trinoworker/Dockerfile b/docker/hoodie/hadoop/trinoworker/Dockerfile
index ae5b2766dc9d9..81b94f63315f6 100644
--- a/docker/hoodie/hadoop/trinoworker/Dockerfile
+++ b/docker/hoodie/hadoop/trinoworker/Dockerfile
@@ -18,7 +18,7 @@
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
-ARG HADOOP_VERSION=2.8.4
+ARG HADOOP_VERSION=3.1.0
ARG TRINO_VERSION=368
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base
diff --git a/docker/setup_demo.sh b/docker/setup_demo.sh
index 9f0a100da6122..d80510c25f8c4 100755
--- a/docker/setup_demo.sh
+++ b/docker/setup_demo.sh
@@ -16,17 +16,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+set -e -x -o pipefail
+
SCRIPT_PATH=$(cd `dirname $0`; pwd)
HUDI_DEMO_ENV=$1
WS_ROOT=`dirname $SCRIPT_PATH`
# restart cluster
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down
+#HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down
+HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop310_hive312_spark321.yml down
if [ "$HUDI_DEMO_ENV" != "dev" ]; then
echo "Pulling docker demo images ..."
- HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml pull
+ HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark321.yml pull
fi
sleep 5
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml up -d
+#HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml up -d
+HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop310_hive312_spark321.yml up -d
sleep 15
docker exec -it adhoc-1 /bin/bash /var/hoodie/ws/docker/demo/setup_demo_container.sh
diff --git a/docker/stop_demo.sh b/docker/stop_demo.sh
index 83b8a2c1ef5c0..ccd2e2c16dad9 100755
--- a/docker/stop_demo.sh
+++ b/docker/stop_demo.sh
@@ -20,7 +20,7 @@ SCRIPT_PATH=$(cd `dirname $0`; pwd)
# set up root directory
WS_ROOT=`dirname $SCRIPT_PATH`
# shut down cluster
-HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down
+HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop310_hive312_spark321.yml down
# remove houst mount directory
rm -rf /tmp/hadoop_data
diff --git a/hudi-aws/pom.xml b/hudi-aws/pom.xml
index d44a389a61f66..7f0b963ab3c96 100644
--- a/hudi-aws/pom.xml
+++ b/hudi-aws/pom.xml
@@ -66,6 +66,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 29bdf85ab08c5..4f8401fccb251 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -190,6 +190,14 @@
org.apache.parquetparquet-hadoop-bundle
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
@@ -257,10 +265,22 @@
org.apache.hadoophadoop-common
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoophadoop-hdfs
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml
index a55a136652728..ac0b12bada130 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -30,6 +30,13 @@
jar
+
+
+ org.scala-lang
+ scala-library
+ ${scala.version}
+
+
org.apache.hudi
@@ -136,6 +143,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -156,6 +167,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
index 1079566b782f1..5ce377901a4ba 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
@@ -21,14 +21,14 @@
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HColumnDescriptor;
-import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
public class HoodieHFileConfig {
- public static final KeyValue.KVComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator();
+ public static final CellComparator HFILE_COMPARATOR = new HoodieHBaseKVComparator();
public static final boolean PREFETCH_ON_OPEN = CacheConfig.DEFAULT_PREFETCH_ON_OPEN;
public static final boolean CACHE_DATA_IN_L1 = HColumnDescriptor.DEFAULT_CACHE_DATA_IN_L1;
// This is private in CacheConfig so have been copied here.
@@ -42,12 +42,12 @@ public class HoodieHFileConfig {
private final boolean dropBehindCacheCompaction;
private final Configuration hadoopConf;
private final BloomFilter bloomFilter;
- private final KeyValue.KVComparator hfileComparator;
+ private final CellComparator hfileComparator;
private final String keyFieldName;
public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compressionAlgorithm, int blockSize,
long maxFileSize, String keyFieldName, boolean prefetchBlocksOnOpen, boolean cacheDataInL1,
- boolean dropBehindCacheCompaction, BloomFilter bloomFilter, KeyValue.KVComparator hfileComparator) {
+ boolean dropBehindCacheCompaction, BloomFilter bloomFilter, CellComparator hfileComparator) {
this.hadoopConf = hadoopConf;
this.compressionAlgorithm = compressionAlgorithm;
this.blockSize = blockSize;
@@ -96,7 +96,7 @@ public BloomFilter getBloomFilter() {
return bloomFilter;
}
- public KeyValue.KVComparator getHfileComparator() {
+ public CellComparator getHFileComparator() {
return hfileComparator;
}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
index 2ad6d7f9220b0..5dcd2e0a32e51 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
@@ -25,6 +25,8 @@
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
@@ -38,8 +40,6 @@
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.io.Writable;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
import java.io.DataInput;
import java.io.DataOutput;
@@ -95,6 +95,7 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC
HFileContext context = new HFileContextBuilder().withBlockSize(hfileConfig.getBlockSize())
.withCompression(hfileConfig.getCompressionAlgorithm())
+ .withCellComparator(hfileConfig.getHFileComparator())
.build();
conf.set(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, String.valueOf(hfileConfig.shouldPrefetchBlocksOnOpen()));
@@ -104,7 +105,6 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC
this.writer = HFile.getWriterFactory(conf, cacheConfig)
.withPath(this.fs, this.file)
.withFileContext(context)
- .withComparator(hfileConfig.getHfileComparator())
.create();
writer.appendFileInfo(HoodieHFileReader.KEY_SCHEMA.getBytes(), schema.toString().getBytes());
diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml
index 3471bfb8ba366..cde418ce4b93e 100644
--- a/hudi-client/hudi-java-client/pom.xml
+++ b/hudi-client/hudi-java-client/pom.xml
@@ -141,6 +141,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index d6c60cb61bc45..0688fedacc2ae 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -110,6 +110,12 @@
+
+ org.apache.zookeeper
+ zookeeper
+ ${zookeeper.version}
+ test
+
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 05d7f99446e94..8a3abfd6e1cbf 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -66,6 +66,8 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import static org.apache.hudi.io.storage.HoodieHFileReader.KEY_SCHEMA;
+
/**
* Utility methods to aid testing inside the HoodieClient module.
*/
@@ -241,9 +243,9 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat
Schema schema = null;
for (String path : paths) {
try {
- HFile.Reader reader = HFile.createReader(fs, new Path(path), cacheConfig, fs.getConf());
+ HFile.Reader reader = HFile.createReader(fs, new Path(path), cacheConfig, true, fs.getConf());
if (schema == null) {
- schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get("schema".getBytes())));
+ schema = new Schema.Parser().parse(new String(reader.getHFileInfo().get(KEY_SCHEMA.getBytes())));
}
HFileScanner scanner = reader.getScanner(false, false);
if (!scanner.seekTo()) {
@@ -252,7 +254,7 @@ public static Stream readHFile(JavaSparkContext jsc, String[] pat
}
do {
- Cell c = scanner.getKeyValue();
+ Cell c = scanner.getCell();
byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
valuesAsList.add(HoodieAvroUtils.bytesToAvro(value, schema));
} while (scanner.next());
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index 1a558aeae3326..028714658bae1 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -160,17 +160,35 @@
hadoop-commonteststest
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoophadoop-hdfsprovided
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoophadoop-hdfsteststest
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -221,14 +239,13 @@
org.apache.hbasehbase-client${hbase.version}
- test
-
+
org.apache.hbasehbase-server${hbase.version}
-
+
compile
@@ -243,6 +260,10 @@
org.mortbay.jetty*
+
+ org.eclipse.jetty
+ *
+ tomcat*
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
index 3700d01a60ea6..7f36a47a4d24c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
@@ -37,6 +37,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CellComparatorImpl;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
@@ -178,9 +179,7 @@ private static String getUserKeyFromCellKey(String cellKey) {
private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) {
try {
LOG.info("Opening HFile for reading :" + hFilePath);
- HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath),
- new CacheConfig(conf), conf);
- return reader;
+ return HFile.createReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf), true, conf);
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
@@ -259,7 +258,7 @@ private void initIndexInfo() {
private HoodieBootstrapIndexInfo fetchBootstrapIndexInfo() throws IOException {
return TimelineMetadataUtils.deserializeAvroMetadata(
- partitionIndexReader().loadFileInfo().get(INDEX_INFO_KEY),
+ partitionIndexReader().getHFileInfo().get(INDEX_INFO_KEY),
HoodieBootstrapIndexInfo.class);
}
@@ -306,7 +305,7 @@ private List getAllKeys(HFileScanner scanner, Function convert
try {
boolean available = scanner.seekTo();
while (available) {
- keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getKeyValue()))));
+ keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getCell()))));
available = scanner.next();
}
} catch (IOException ioe) {
@@ -528,13 +527,13 @@ public void close() {
@Override
public void begin() {
try {
- HFileContext meta = new HFileContextBuilder().build();
+ HFileContext meta = new HFileContextBuilder().withCellComparator(new HoodieKVComparator()).build();
this.indexByPartitionWriter = HFile.getWriterFactory(metaClient.getHadoopConf(),
new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByPartitionPath)
- .withFileContext(meta).withComparator(new HoodieKVComparator()).create();
+ .withFileContext(meta).create();
this.indexByFileIdWriter = HFile.getWriterFactory(metaClient.getHadoopConf(),
new CacheConfig(metaClient.getHadoopConf())).withPath(metaClient.getFs(), indexByFileIdPath)
- .withFileContext(meta).withComparator(new HoodieKVComparator()).create();
+ .withFileContext(meta).create();
} catch (IOException ioe) {
throw new HoodieIOException(ioe.getMessage(), ioe);
}
@@ -581,6 +580,6 @@ public String getName() {
* This class is explicitly used as Key Comparator to workaround hard coded
* legacy format class names inside HBase. Otherwise we will face issues with shading.
*/
- public static class HoodieKVComparator extends KeyValue.KVComparator {
+ public static class HoodieKVComparator extends CellComparatorImpl {
}
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
index fa5117e41fa76..6a0b10fe07ea0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
@@ -424,6 +424,9 @@ private void processQueuedBlocksForInstant(Deque logBlocks, int
processDataBlock((HoodieAvroDataBlock) lastBlock, keys);
break;
case HFILE_DATA_BLOCK:
+ if (!keys.isPresent()) {
+ keys = Option.of(Collections.emptyList());
+ }
processDataBlock((HoodieHFileDataBlock) lastBlock, keys);
break;
case PARQUET_DATA_BLOCK:
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index 557a0db7cbfad..e843ad74cb31c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -18,6 +18,17 @@
package org.apache.hudi.common.table.log.block;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.fs.inline.InLineFSUtils;
+import org.apache.hudi.common.fs.inline.InLineFileSystem;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
+import org.apache.hudi.io.storage.HoodieHFileReader;
+
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
@@ -30,17 +41,6 @@
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.common.fs.inline.InLineFSUtils;
-import org.apache.hudi.common.fs.inline.InLineFileSystem;
-import org.apache.hudi.common.util.ClosableIterator;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
-import org.apache.hudi.io.storage.HoodieHFileReader;
-
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -95,6 +95,7 @@ protected byte[] serializeRecords(List records) throws IOExceptio
HFileContext context = new HFileContextBuilder()
.withBlockSize(DEFAULT_BLOCK_SIZE)
.withCompression(compressionAlgorithm.get())
+ .withCellComparator(new HoodieHBaseKVComparator())
.build();
Configuration conf = new Configuration();
@@ -128,7 +129,7 @@ protected byte[] serializeRecords(List records) throws IOExceptio
}
HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig)
- .withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
+ .withOutputStream(ostream).withFileContext(context).create();
// Write the records
sortedRecordsMap.forEach((recordKey, recordBytes) -> {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
index 5e7bef90a08ba..5c81db1b7e288 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
@@ -109,7 +109,7 @@ protected byte[] serializeRecords(List records) throws IOExceptio
ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (FSDataOutputStream outputStream = new FSDataOutputStream(baos)) {
+ try (FSDataOutputStream outputStream = new FSDataOutputStream(baos, null)) {
try (HoodieParquetStreamWriter parquetWriter = new HoodieParquetStreamWriter<>(outputStream, avroParquetConfig)) {
for (IndexedRecord record : records) {
String recordKey = getRecordKey(record).orElse(null);
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
index 2d4d96959e150..aaf1dcd7037b7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHBaseKVComparator.java
@@ -19,11 +19,11 @@
package org.apache.hudi.io.storage;
-import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.CellComparatorImpl;
/**
* This class is explicitly used as Key Comparator to work around the hard coded
* legacy format class names inside HBase. Otherwise, we will face issues with shading.
*/
-public class HoodieHBaseKVComparator extends KeyValue.KVComparator {
+public class HoodieHBaseKVComparator extends CellComparatorImpl {
}
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index 371da7675e992..5c861c9cc7a26 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -18,18 +18,16 @@
package org.apache.hudi.io.storage;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.BloomFilterFactory;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
@@ -41,24 +39,31 @@
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileInfo;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.ReaderContext;
+import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
+import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.util.Pair;
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.common.bloom.BloomFilter;
-import org.apache.hudi.common.bloom.BloomFilterFactory;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.util.ClosableIterator;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
public class HoodieHFileReader implements HoodieFileReader {
private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class);
private Path path;
@@ -80,14 +85,14 @@ public class HoodieHFileReader implements HoodieFileRea
public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig) throws IOException {
this.conf = configuration;
this.path = path;
- this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, conf);
+ this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, true, conf);
}
public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
this.conf = configuration;
this.path = path;
this.fsDataInputStream = fs.open(path);
- this.reader = HFile.createReader(fs, path, cacheConfig, configuration);
+ this.reader = HFile.createReader(fs, path, cacheConfig, true, configuration);
}
public HoodieHFileReader(byte[] content) throws IOException {
@@ -95,30 +100,34 @@ public HoodieHFileReader(byte[] content) throws IOException {
Path path = new Path("hoodie");
SeekableByteArrayInputStream bis = new SeekableByteArrayInputStream(content);
FSDataInputStream fsdis = new FSDataInputStream(bis);
- this.reader = HFile.createReader(FSUtils.getFs("hoodie", conf), path, new FSDataInputStreamWrapper(fsdis),
- content.length, new CacheConfig(conf), conf);
+ FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fsdis);
+ FileSystem fs = FSUtils.getFs("hoodie", conf);
+ HFileSystem hfs = (fs instanceof HFileSystem) ? (HFileSystem) fs : new HFileSystem(fs);
+ ReaderContext context = new ReaderContextBuilder()
+ .withFilePath(path)
+ .withInputStreamWrapper(stream)
+ .withFileSize(content.length)
+ .withFileSystem(hfs)
+ .withPrimaryReplicaReader(true)
+ .withReaderType(ReaderContext.ReaderType.STREAM)
+ .build();
+ HFileInfo fileInfo = new HFileInfo(context, conf);
+ this.reader = HFile.createReader(context, fileInfo, new CacheConfig(conf), conf);
+ fileInfo.initMetaAndIndex(reader);
}
@Override
public String[] readMinMaxRecordKeys() {
- try {
- Map fileInfo = reader.loadFileInfo();
- return new String[] { new String(fileInfo.get(KEY_MIN_RECORD.getBytes())),
- new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))};
- } catch (IOException e) {
- throw new HoodieException("Could not read min/max record key out of file information block correctly from path", e);
- }
+ HFileInfo fileInfo = reader.getHFileInfo();
+ return new String[] { new String(fileInfo.get(KEY_MIN_RECORD.getBytes())),
+ new String(fileInfo.get(KEY_MAX_RECORD.getBytes()))};
}
@Override
public Schema getSchema() {
if (schema == null) {
- try {
- Map fileInfo = reader.loadFileInfo();
- schema = new Schema.Parser().parse(new String(fileInfo.get(KEY_SCHEMA.getBytes())));
- } catch (IOException e) {
- throw new HoodieException("Could not read schema of file from path", e);
- }
+ HFileInfo fileInfo = reader.getHFileInfo();
+ schema = new Schema.Parser().parse(new String(fileInfo.get(KEY_SCHEMA.getBytes())));
}
return schema;
@@ -133,10 +142,10 @@ public void withSchema(Schema schema) {
@Override
public BloomFilter readBloomFilter() {
- Map fileInfo;
+ HFileInfo fileInfo;
try {
- fileInfo = reader.loadFileInfo();
- ByteBuffer serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false);
+ fileInfo = reader.getHFileInfo();
+ ByteBuff serializedFilter = reader.getMetaBlock(KEY_BLOOM_FILTER_META_BLOCK, false).getBufferWithoutHeader();
byte[] filterBytes = new byte[serializedFilter.remaining()];
serializedFilter.get(filterBytes); // read the bytes that were written
return BloomFilterFactory.fromString(new String(filterBytes),
@@ -206,7 +215,7 @@ private List> readAllRecords(Schema writerSchema, Schema readerS
final HFileScanner scanner = reader.getScanner(false, false);
if (scanner.seekTo()) {
do {
- Cell c = scanner.getKeyValue();
+ Cell c = scanner.getCell();
final Pair keyAndRecordPair = getRecordFromCell(c, writerSchema, readerSchema, keyFieldSchema);
recordList.add(keyAndRecordPair);
} while (scanner.next());
@@ -250,7 +259,7 @@ public List> readRecords(List keys) throws IOException {
*/
public List> readRecords(List keys, Schema schema) throws IOException {
this.schema = schema;
- reader.loadFileInfo();
+ reader.getHFileInfo();
List> records = new ArrayList<>();
for (String key: keys) {
Option value = getRecordByKey(key, schema);
@@ -263,7 +272,7 @@ public List> readRecords(List keys, Schema schema) throw
public ClosableIterator getRecordIterator(List keys, Schema schema) throws IOException {
this.schema = schema;
- reader.loadFileInfo();
+ reader.getHFileInfo();
Iterator iterator = keys.iterator();
return new ClosableIterator() {
private R next;
@@ -310,7 +319,7 @@ public boolean hasNext() {
// To handle when hasNext() is called multiple times for idempotency and/or the first time
if (this.next == null && !this.eof) {
if (!scanner.isSeeked() && scanner.seekTo()) {
- final Pair keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
+ final Pair keyAndRecordPair = getRecordFromCell(scanner.getCell(), getSchema(), readerSchema, keyFieldSchema);
this.next = keyAndRecordPair.getSecond();
}
}
@@ -331,7 +340,7 @@ public R next() {
}
R retVal = this.next;
if (scanner.next()) {
- final Pair keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
+ final Pair keyAndRecordPair = getRecordFromCell(scanner.getCell(), getSchema(), readerSchema, keyFieldSchema);
this.next = keyAndRecordPair.getSecond();
} else {
this.next = null;
@@ -371,7 +380,7 @@ public Option getRecordByKey(String key, Schema readerSchema) throws IOException
}
if (keyScanner.seekTo(kv) == 0) {
- Cell c = keyScanner.getKeyValue();
+ Cell c = keyScanner.getCell();
// Extract the byte value before releasing the lock since we cannot hold on to the returned cell afterwards
value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
}
diff --git a/hudi-common/src/main/resources/hbase-site.xml b/hudi-common/src/main/resources/hbase-site.xml
new file mode 100644
index 0000000000000..ad680e6b8999e
--- /dev/null
+++ b/hudi-common/src/main/resources/hbase-site.xml
@@ -0,0 +1,2185 @@
+
+
+
+
+
+
+
+
+
+
+
+ hbase.tmp.dir
+ ${java.io.tmpdir}/hbase-${user.name}
+ Temporary directory on the local filesystem.
+ Change this setting to point to a location more permanent
+ than '/tmp', the usual resolve for java.io.tmpdir, as the
+ '/tmp' directory is cleared on machine restart.
+
+
+
+ hbase.rootdir
+ ${hbase.tmp.dir}/hbase
+ The directory shared by region servers and into
+ which HBase persists. The URL should be 'fully-qualified'
+ to include the filesystem scheme. For example, to specify the
+ HDFS directory '/hbase' where the HDFS instance's namenode is
+ running at namenode.example.org on port 9000, set this value to:
+ hdfs://namenode.example.org:9000/hbase. By default, we write
+ to whatever ${hbase.tmp.dir} is set too -- usually /tmp --
+ so change this configuration or else all data will be lost on
+ machine restart.
+
+
+
+ hbase.cluster.distributed
+ false
+ The mode the cluster will be in. Possible values are
+ false for standalone mode and true for distributed mode. If
+ false, startup will run all HBase and ZooKeeper daemons together
+ in the one JVM.
+
+
+
+ hbase.zookeeper.quorum
+
+ 127.0.0.1
+ Comma separated list of servers in the ZooKeeper ensemble
+ (This config. should have been named hbase.zookeeper.ensemble).
+ For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
+ By default this is set to localhost for local and pseudo-distributed modes
+ of operation. For a fully-distributed setup, this should be set to a full
+ list of ZooKeeper ensemble servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
+ this is the list of servers which hbase will start/stop ZooKeeper on as
+ part of cluster start/stop. Client-side, we will take this list of
+ ensemble members and put it together with the hbase.zookeeper.property.clientPort
+ config. and pass it into zookeeper constructor as the connectString
+ parameter.
+
+
+
+
+
+ zookeeper.recovery.retry.maxsleeptime
+ 60000
+ Max sleep time before retry zookeeper operations in milliseconds,
+ a max time is needed here so that sleep time won't grow unboundedly
+
+
+
+ hbase.local.dir
+ ${hbase.tmp.dir}/local/
+ Directory on the local filesystem to be used
+ as a local storage.
+
+
+
+
+
+ hbase.master.port
+ 16000
+ The port the HBase Master should bind to.
+
+
+ hbase.master.info.port
+ 16010
+ The port for the HBase Master web UI.
+ Set to -1 if you do not want a UI instance run.
+
+
+
+ hbase.master.info.bindAddress
+ 0.0.0.0
+ The bind address for the HBase Master web UI
+
+
+
+ hbase.master.logcleaner.plugins
+
+ org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveProcedureWALCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveMasterLocalStoreWALCleaner
+
+ A comma-separated list of BaseLogCleanerDelegate invoked by
+ the LogsCleaner service. These WAL cleaners are called in order,
+ so put the cleaner that prunes the most files in front. To
+ implement your own BaseLogCleanerDelegate, just put it in HBase's classpath
+ and add the fully qualified class name here. Always add the above
+ default log cleaners in the list.
+
+
+
+ hbase.master.logcleaner.ttl
+ 600000
+ How long a WAL remain in the archive ({hbase.rootdir}/oldWALs) directory,
+ after which it will be cleaned by a Master thread. The value is in milliseconds.
+
+
+
+ hbase.master.hfilecleaner.plugins
+
+ org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner,org.apache.hadoop.hbase.master.cleaner.TimeToLiveMasterLocalStoreHFileCleaner
+
+ A comma-separated list of BaseHFileCleanerDelegate invoked by
+ the HFileCleaner service. These HFiles cleaners are called in order,
+ so put the cleaner that prunes the most files in front. To
+ implement your own BaseHFileCleanerDelegate, just put it in HBase's classpath
+ and add the fully qualified class name here. Always add the above
+ default hfile cleaners in the list as they will be overwritten in
+ hbase-site.xml.
+
+
+
+ hbase.master.infoserver.redirect
+ true
+ Whether or not the Master listens to the Master web
+ UI port (hbase.master.info.port) and redirects requests to the web
+ UI server shared by the Master and RegionServer. Config. makes
+ sense when Master is serving Regions (not the default).
+
+
+
+ hbase.master.fileSplitTimeout
+ 600000
+ Splitting a region, how long to wait on the file-splitting
+ step before aborting the attempt. Default: 600000. This setting used
+ to be known as hbase.regionserver.fileSplitTimeout in hbase-1.x.
+ Split is now run master-side hence the rename (If a
+ 'hbase.master.fileSplitTimeout' setting found, will use it to
+ prime the current 'hbase.master.fileSplitTimeout'
+ Configuration.
+
+
+
+
+
+ hbase.regionserver.port
+ 16020
+ The port the HBase RegionServer binds to.
+
+
+ hbase.regionserver.info.port
+ 16030
+ The port for the HBase RegionServer web UI
+ Set to -1 if you do not want the RegionServer UI to run.
+
+
+
+ hbase.regionserver.info.bindAddress
+ 0.0.0.0
+ The address for the HBase RegionServer web UI
+
+
+ hbase.regionserver.info.port.auto
+ false
+ Whether or not the Master or RegionServer
+ UI should search for a port to bind to. Enables automatic port
+ search if hbase.regionserver.info.port is already in use.
+ Useful for testing, turned off by default.
+
+
+
+ hbase.regionserver.handler.count
+ 30
+ Count of RPC Listener instances spun up on RegionServers.
+ Same property is used by the Master for count of master handlers.
+ Too many handlers can be counter-productive. Make it a multiple of
+ CPU count. If mostly read-only, handlers count close to cpu count
+ does well. Start with twice the CPU count and tune from there.
+
+
+
+ hbase.ipc.server.callqueue.handler.factor
+ 0.1
+ Factor to determine the number of call queues.
+ A value of 0 means a single queue shared between all the handlers.
+ A value of 1 means that each handler has its own queue.
+
+
+
+ hbase.ipc.server.callqueue.read.ratio
+ 0
+ Split the call queues into read and write queues.
+ The specified interval (which should be between 0.0 and 1.0)
+ will be multiplied by the number of call queues.
+ A value of 0 indicate to not split the call queues, meaning that both read and write
+ requests will be pushed to the same set of queues.
+ A value lower than 0.5 means that there will be less read queues than write queues.
+ A value of 0.5 means there will be the same number of read and write queues.
+ A value greater than 0.5 means that there will be more read queues than write queues.
+ A value of 1.0 means that all the queues except one are used to dispatch read requests.
+
+ Example: Given the total number of call queues being 10
+ a read.ratio of 0 means that: the 10 queues will contain both read/write requests.
+ a read.ratio of 0.3 means that: 3 queues will contain only read requests
+ and 7 queues will contain only write requests.
+ a read.ratio of 0.5 means that: 5 queues will contain only read requests
+ and 5 queues will contain only write requests.
+ a read.ratio of 0.8 means that: 8 queues will contain only read requests
+ and 2 queues will contain only write requests.
+ a read.ratio of 1 means that: 9 queues will contain only read requests
+ and 1 queues will contain only write requests.
+
+
+
+ hbase.ipc.server.callqueue.scan.ratio
+ 0
+ Given the number of read call queues, calculated from the total number
+ of call queues multiplied by the callqueue.read.ratio, the scan.ratio property
+ will split the read call queues into small-read and long-read queues.
+ A value lower than 0.5 means that there will be less long-read queues than short-read queues.
+ A value of 0.5 means that there will be the same number of short-read and long-read queues.
+ A value greater than 0.5 means that there will be more long-read queues than short-read queues
+ A value of 0 or 1 indicate to use the same set of queues for gets and scans.
+
+ Example: Given the total number of read call queues being 8
+ a scan.ratio of 0 or 1 means that: 8 queues will contain both long and short read requests.
+ a scan.ratio of 0.3 means that: 2 queues will contain only long-read requests
+ and 6 queues will contain only short-read requests.
+ a scan.ratio of 0.5 means that: 4 queues will contain only long-read requests
+ and 4 queues will contain only short-read requests.
+ a scan.ratio of 0.8 means that: 6 queues will contain only long-read requests
+ and 2 queues will contain only short-read requests.
+
+
+
+ hbase.regionserver.msginterval
+ 3000
+ Interval between messages from the RegionServer to Master
+ in milliseconds.
+
+
+
+ hbase.regionserver.logroll.period
+ 3600000
+ Period at which we will roll the commit log regardless
+ of how many edits it has.
+
+
+
+ hbase.regionserver.logroll.errors.tolerated
+ 2
+ The number of consecutive WAL close errors we will allow
+ before triggering a server abort. A setting of 0 will cause the
+ region server to abort if closing the current WAL writer fails during
+ log rolling. Even a small value (2 or 3) will allow a region server
+ to ride over transient HDFS errors.
+
+
+
+ hbase.regionserver.hlog.reader.impl
+ org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader
+ The WAL file reader implementation.
+
+
+ hbase.regionserver.hlog.writer.impl
+ org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter
+ The WAL file writer implementation.
+
+
+ hbase.regionserver.global.memstore.size
+
+ Maximum size of all memstores in a region server before new
+ updates are blocked and flushes are forced. Defaults to 40% of heap (0.4).
+ Updates are blocked and flushes are forced until size of all memstores
+ in a region server hits hbase.regionserver.global.memstore.size.lower.limit.
+ The default value in this configuration has been intentionally left empty in order to
+ honor the old hbase.regionserver.global.memstore.upperLimit property if present.
+
+
+
+ hbase.regionserver.global.memstore.size.lower.limit
+
+ Maximum size of all memstores in a region server before flushes
+ are forced. Defaults to 95% of hbase.regionserver.global.memstore.size
+ (0.95). A 100% value for this value causes the minimum possible flushing
+ to occur when updates are blocked due to memstore limiting. The default
+ value in this configuration has been intentionally left empty in order to
+ honor the old hbase.regionserver.global.memstore.lowerLimit property if
+ present.
+
+
+
+ hbase.systemtables.compacting.memstore.type
+ NONE
+ Determines the type of memstore to be used for system tables like
+ META, namespace tables etc. By default NONE is the type and hence we use the
+ default memstore for all the system tables. If we need to use compacting
+ memstore for system tables then set this property to BASIC/EAGER
+
+
+
+ hbase.regionserver.optionalcacheflushinterval
+ 3600000
+
+ Maximum amount of time an edit lives in memory before being automatically flushed.
+ Default 1 hour. Set it to 0 to disable automatic flushing.
+
+
+
+ hbase.regionserver.dns.interface
+ default
+ The name of the Network Interface from which a region server
+ should report its IP address.
+
+
+
+ hbase.regionserver.dns.nameserver
+ default
+ The host name or IP address of the name server (DNS)
+ which a region server should use to determine the host name used by the
+ master for communication and display purposes.
+
+
+
+ hbase.regionserver.region.split.policy
+ org.apache.hadoop.hbase.regionserver.SteppingSplitPolicy
+
+ A split policy determines when a region should be split. The various
+ other split policies that are available currently are BusyRegionSplitPolicy,
+ ConstantSizeRegionSplitPolicy, DisabledRegionSplitPolicy,
+ DelimitedKeyPrefixRegionSplitPolicy, KeyPrefixRegionSplitPolicy, and
+ SteppingSplitPolicy. DisabledRegionSplitPolicy blocks manual region splitting.
+
+
+
+ hbase.regionserver.regionSplitLimit
+ 1000
+
+ Limit for the number of regions after which no more region splitting
+ should take place. This is not hard limit for the number of regions
+ but acts as a guideline for the regionserver to stop splitting after
+ a certain limit. Default is set to 1000.
+
+
+
+
+
+ zookeeper.session.timeout
+ 90000
+ ZooKeeper session timeout in milliseconds. It is used in two different ways.
+ First, this value is used in the ZK client that HBase uses to connect to the ensemble.
+ It is also used by HBase when it starts a ZK server and it is passed as the 'maxSessionTimeout'.
+ See https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#ch_zkSessions.
+ For example, if an HBase region server connects to a ZK ensemble that's also managed
+ by HBase, then the session timeout will be the one specified by this configuration.
+ But, a region server that connects to an ensemble managed with a different configuration
+ will be subjected that ensemble's maxSessionTimeout. So, even though HBase might propose
+ using 90 seconds, the ensemble can have a max timeout lower than this and it will take
+ precedence. The current default maxSessionTimeout that ZK ships with is 40 seconds, which is lower than
+ HBase's.
+
+
+
+ zookeeper.znode.parent
+ /hbase
+ Root ZNode for HBase in ZooKeeper. All of HBase's ZooKeeper
+ files that are configured with a relative path will go under this node.
+ By default, all of HBase's ZooKeeper file paths are configured with a
+ relative path, so they will all go under this directory unless changed.
+
+
+
+ zookeeper.znode.acl.parent
+ acl
+ Root ZNode for access control lists.
+
+
+ hbase.zookeeper.dns.interface
+ default
+ The name of the Network Interface from which a ZooKeeper server
+ should report its IP address.
+
+
+
+ hbase.zookeeper.dns.nameserver
+ default
+ The host name or IP address of the name server (DNS)
+ which a ZooKeeper server should use to determine the host name used by the
+ master for communication and display purposes.
+
+
+
+
+ hbase.zookeeper.peerport
+ 2888
+ Port used by ZooKeeper peers to talk to each other.
+ See https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+ for more information.
+
+
+
+ hbase.zookeeper.leaderport
+ 3888
+ Port used by ZooKeeper for leader election.
+ See https://zookeeper.apache.org/doc/r3.3.3/zookeeperStarted.html#sc_RunningReplicatedZooKeeper
+ for more information.
+
+
+
+
+
+
+ hbase.zookeeper.property.initLimit
+ 10
+ Property from ZooKeeper's config zoo.cfg.
+ The number of ticks that the initial synchronization phase can take.
+
+
+
+ hbase.zookeeper.property.syncLimit
+ 5
+ Property from ZooKeeper's config zoo.cfg.
+ The number of ticks that can pass between sending a request and getting an
+ acknowledgment.
+
+
+
+ hbase.zookeeper.property.dataDir
+ ${hbase.tmp.dir}/zookeeper
+ Property from ZooKeeper's config zoo.cfg.
+ The directory where the snapshot is stored.
+
+
+
+ hbase.zookeeper.property.clientPort
+ 2181
+ Property from ZooKeeper's config zoo.cfg.
+ The port at which the clients will connect.
+
+
+
+ hbase.zookeeper.property.maxClientCnxns
+ 300
+ Property from ZooKeeper's config zoo.cfg.
+ Limit on number of concurrent connections (at the socket level) that a
+ single client, identified by IP address, may make to a single member of
+ the ZooKeeper ensemble. Set high to avoid zk connection issues running
+ standalone and pseudo-distributed.
+
+
+
+
+
+
+ hbase.client.write.buffer
+ 2097152
+ Default size of the BufferedMutator write buffer in bytes.
+ A bigger buffer takes more memory -- on both the client and server
+ side since server instantiates the passed write buffer to process
+ it -- but a larger buffer size reduces the number of RPCs made.
+ For an estimate of server-side memory-used, evaluate
+ hbase.client.write.buffer * hbase.regionserver.handler.count
+
+
+
+ hbase.client.pause
+ 100
+ General client pause value. Used mostly as value to wait
+ before running a retry of a failed get, region lookup, etc.
+ See hbase.client.retries.number for description of how we backoff from
+ this initial pause amount and how this pause works w/ retries.
+
+
+
+ hbase.client.pause.cqtbe
+
+ Whether or not to use a special client pause for
+ CallQueueTooBigException (cqtbe). Set this property to a higher value
+ than hbase.client.pause if you observe frequent CQTBE from the same
+ RegionServer and the call queue there keeps full
+
+
+
+ hbase.client.retries.number
+ 15
+ Maximum retries. Used as maximum for all retryable
+ operations such as the getting of a cell's value, starting a row update,
+ etc. Retry interval is a rough function based on hbase.client.pause. At
+ first we retry at this interval but then with backoff, we pretty quickly reach
+ retrying every ten seconds. See HConstants#RETRY_BACKOFF for how the backup
+ ramps up. Change this setting and hbase.client.pause to suit your workload.
+
+
+
+ hbase.client.max.total.tasks
+ 100
+ The maximum number of concurrent mutation tasks a single HTable instance will
+ send to the cluster.
+
+
+
+ hbase.client.max.perserver.tasks
+ 2
+ The maximum number of concurrent mutation tasks a single HTable instance will
+ send to a single region server.
+
+
+
+ hbase.client.max.perregion.tasks
+ 1
+ The maximum number of concurrent mutation tasks the client will
+ maintain to a single Region. That is, if there is already
+ hbase.client.max.perregion.tasks writes in progress for this region, new puts
+ won't be sent to this region until some writes finishes.
+
+
+
+ hbase.client.perserver.requests.threshold
+ 2147483647
+ The max number of concurrent pending requests for one server in all client threads
+ (process level). Exceeding requests will be thrown ServerTooBusyException immediately to prevent
+ user's threads being occupied and blocked by only one slow region server. If you use a fix
+ number of threads to access HBase in a synchronous way, set this to a suitable value which is
+ related to the number of threads will help you. See
+ https://issues.apache.org/jira/browse/HBASE-16388 for details.
+
+
+
+ hbase.client.scanner.caching
+ 2147483647
+ Number of rows that we try to fetch when calling next
+ on a scanner if it is not served from (local, client) memory. This configuration
+ works together with hbase.client.scanner.max.result.size to try and use the
+ network efficiently. The default value is Integer.MAX_VALUE by default so that
+ the network will fill the chunk size defined by hbase.client.scanner.max.result.size
+ rather than be limited by a particular number of rows since the size of rows varies
+ table to table. If you know ahead of time that you will not require more than a certain
+ number of rows from a scan, this configuration should be set to that row limit via
+ Scan#setCaching. Higher caching values will enable faster scanners but will eat up more
+ memory and some calls of next may take longer and longer times when the cache is empty.
+ Do not set this value such that the time between invocations is greater than the scanner
+ timeout; i.e. hbase.client.scanner.timeout.period
+
+
+
+ hbase.client.keyvalue.maxsize
+ 10485760
+ Specifies the combined maximum allowed size of a KeyValue
+ instance. This is to set an upper boundary for a single entry saved in a
+ storage file. Since they cannot be split it helps avoiding that a region
+ cannot be split any further because the data is too large. It seems wise
+ to set this to a fraction of the maximum region size. Setting it to zero
+ or less disables the check.
+
+
+
+ hbase.server.keyvalue.maxsize
+ 10485760
+ Maximum allowed size of an individual cell, inclusive of value and all key
+ components. A value of 0 or less disables the check.
+ The default value is 10MB.
+ This is a safety setting to protect the server from OOM situations.
+
+
+
+ hbase.client.scanner.timeout.period
+ 60000
+ Client scanner lease period in milliseconds.
+
+
+ hbase.client.localityCheck.threadPoolSize
+ 2
+
+
+
+
+ hbase.bulkload.retries.number
+ 10
+ Maximum retries. This is maximum number of iterations
+ to atomic bulk loads are attempted in the face of splitting operations
+ 0 means never give up.
+
+
+
+ hbase.master.balancer.maxRitPercent
+ 1.0
+ The max percent of regions in transition when balancing.
+ The default value is 1.0. So there are no balancer throttling. If set this config to 0.01,
+ It means that there are at most 1% regions in transition when balancing.
+ Then the cluster's availability is at least 99% when balancing.
+
+
+
+ hbase.balancer.period
+
+ 300000
+ Period at which the region balancer runs in the Master, in
+ milliseconds.
+
+
+
+ hbase.regions.slop
+ 0.001
+ Rebalance if any regionserver has average + (average * slop) regions.
+ The default value of this parameter is 0.001 in StochasticLoadBalancer (the default load
+ balancer), while the default is 0.2 in other load balancers (i.e.,
+ SimpleLoadBalancer).
+
+
+
+ hbase.normalizer.period
+ 300000
+ Period at which the region normalizer runs in the Master, in
+ milliseconds.
+
+
+
+ hbase.normalizer.split.enabled
+ true
+ Whether to split a region as part of normalization.
+
+
+ hbase.normalizer.merge.enabled
+ true
+ Whether to merge a region as part of normalization.
+
+
+ hbase.normalizer.min.region.count
+ 3
+ The minimum number of regions in a table to consider it for merge
+ normalization.
+
+
+
+ hbase.normalizer.merge.min_region_age.days
+ 3
+ The minimum age for a region to be considered for a merge, in days.
+
+
+ hbase.normalizer.merge.min_region_age.days
+ 3
+ The minimum age for a region to be considered for a merge, in days.
+
+
+ hbase.normalizer.merge.min_region_size.mb
+ 1
+ The minimum size for a region to be considered for a merge, in whole
+ MBs.
+
+
+
+ hbase.table.normalization.enabled
+ false
+ This config is used to set default behaviour of normalizer at table level.
+ To override this at table level one can set NORMALIZATION_ENABLED at table descriptor level
+ and that property will be honored
+
+
+
+ hbase.server.thread.wakefrequency
+ 10000
+ Time to sleep in between searches for work (in milliseconds).
+ Used as sleep interval by service threads such as log roller.
+
+
+
+ hbase.server.versionfile.writeattempts
+ 3
+
+ How many times to retry attempting to write a version file
+ before just aborting. Each attempt is separated by the
+ hbase.server.thread.wakefrequency milliseconds.
+
+
+
+ hbase.hregion.memstore.flush.size
+ 134217728
+
+ Memstore will be flushed to disk if size of the memstore
+ exceeds this number of bytes. Value is checked by a thread that runs
+ every hbase.server.thread.wakefrequency.
+
+
+
+ hbase.hregion.percolumnfamilyflush.size.lower.bound.min
+ 16777216
+
+ If FlushLargeStoresPolicy is used and there are multiple column families,
+ then every time that we hit the total memstore limit, we find out all the
+ column families whose memstores exceed a "lower bound" and only flush them
+ while retaining the others in memory. The "lower bound" will be
+ "hbase.hregion.memstore.flush.size / column_family_number" by default
+ unless value of this property is larger than that. If none of the families
+ have their memstore size more than lower bound, all the memstores will be
+ flushed (just as usual).
+
+
+
+ hbase.hregion.preclose.flush.size
+ 5242880
+
+ If the memstores in a region are this size or larger when we go
+ to close, run a "pre-flush" to clear out memstores before we put up
+ the region closed flag and take the region offline. On close,
+ a flush is run under the close flag to empty memory. During
+ this time the region is offline and we are not taking on any writes.
+ If the memstore content is large, this flush could take a long time to
+ complete. The preflush is meant to clean out the bulk of the memstore
+ before putting up the close flag and taking the region offline so the
+ flush that runs under the close flag has little to do.
+
+
+
+ hbase.hregion.memstore.block.multiplier
+ 4
+
+ Block updates if memstore has hbase.hregion.memstore.block.multiplier
+ times hbase.hregion.memstore.flush.size bytes. Useful preventing
+ runaway memstore during spikes in update traffic. Without an
+ upper-bound, memstore fills such that when it flushes the
+ resultant flush files take a long time to compact or split, or
+ worse, we OOME.
+
+
+
+ hbase.hregion.memstore.mslab.enabled
+ true
+
+ Enables the MemStore-Local Allocation Buffer,
+ a feature which works to prevent heap fragmentation under
+ heavy write loads. This can reduce the frequency of stop-the-world
+ GC pauses on large heaps.
+
+
+
+ hbase.hregion.memstore.mslab.chunksize
+ 2097152
+ The maximum byte size of a chunk in the MemStoreLAB. Unit: bytes
+
+
+ hbase.regionserver.offheap.global.memstore.size
+ 0
+ The amount of off-heap memory all MemStores in a RegionServer may use.
+ A value of 0 means that no off-heap memory will be used and all chunks in MSLAB
+ will be HeapByteBuffer, otherwise the non-zero value means how many megabyte of
+ off-heap memory will be used for chunks in MSLAB and all chunks in MSLAB will be
+ DirectByteBuffer. Unit: megabytes.
+
+
+
+ hbase.hregion.memstore.mslab.max.allocation
+ 262144
+ The maximal size of one allocation in the MemStoreLAB, if the desired byte
+ size exceed this threshold then it will be just allocated from JVM heap rather than MemStoreLAB.
+
+
+
+ hbase.hregion.max.filesize
+ 10737418240
+
+ Maximum HFile size. If the sum of the sizes of a region's HFiles has grown to exceed this
+ value, the region is split in two.
+
+
+
+ hbase.hregion.split.overallfiles
+ false
+ If we should sum overall region files size when check to split.
+
+
+ hbase.hregion.majorcompaction
+ 604800000
+ Time between major compactions, expressed in milliseconds. Set to 0 to disable
+ time-based automatic major compactions. User-requested and size-based major compactions will
+ still run. This value is multiplied by hbase.hregion.majorcompaction.jitter to cause
+ compaction to start at a somewhat-random time during a given window of time. The default value
+ is 7 days, expressed in milliseconds. If major compactions are causing disruption in your
+ environment, you can configure them to run at off-peak times for your deployment, or disable
+ time-based major compactions by setting this parameter to 0, and run major compactions in a
+ cron job or by another external mechanism.
+
+
+
+ hbase.hregion.majorcompaction.jitter
+ 0.50
+ A multiplier applied to hbase.hregion.majorcompaction to cause compaction to occur
+ a given amount of time either side of hbase.hregion.majorcompaction. The smaller the number,
+ the closer the compactions will happen to the hbase.hregion.majorcompaction
+ interval.
+
+
+
+ hbase.hstore.compactionThreshold
+ 3
+ If more than this number of StoreFiles exist in any one Store
+ (one StoreFile is written per flush of MemStore), a compaction is run to rewrite all
+ StoreFiles into a single StoreFile. Larger values delay compaction, but when compaction does
+ occur, it takes longer to complete.
+
+
+
+ hbase.regionserver.compaction.enabled
+ true
+ Enable/disable compactions on by setting true/false.
+ We can further switch compactions dynamically with the
+ compaction_switch shell command.
+
+
+
+ hbase.hstore.flusher.count
+ 2
+ The number of flush threads. With fewer threads, the MemStore flushes will be
+ queued. With more threads, the flushes will be executed in parallel, increasing the load on
+ HDFS, and potentially causing more compactions.
+
+
+
+ hbase.hstore.blockingStoreFiles
+ 16
+ If more than this number of StoreFiles exist in any one Store (one StoreFile
+ is written per flush of MemStore), updates are blocked for this region until a compaction is
+ completed, or until hbase.hstore.blockingWaitTime has been exceeded.
+
+
+
+ hbase.hstore.blockingWaitTime
+ 90000
+ The time for which a region will block updates after reaching the StoreFile limit
+ defined by hbase.hstore.blockingStoreFiles. After this time has elapsed, the region will stop
+ blocking updates even if a compaction has not been completed.
+
+
+
+ hbase.hstore.compaction.min
+
+ The minimum number of StoreFiles which must be eligible for compaction before
+ compaction can run. The goal of tuning hbase.hstore.compaction.min is to avoid ending up with
+ too many tiny StoreFiles to compact. Setting this value to 2 would cause a minor compaction
+ each time you have two StoreFiles in a Store, and this is probably not appropriate. If you
+ set this value too high, all the other values will need to be adjusted accordingly. For most
+ cases, the default value is appropriate (empty value here, results in 3 by code logic). In
+ previous versions of HBase, the parameter hbase.hstore.compaction.min was named
+ hbase.hstore.compactionThreshold.
+
+
+
+ hbase.hstore.compaction.max
+ 10
+ The maximum number of StoreFiles which will be selected for a single minor
+ compaction, regardless of the number of eligible StoreFiles. Effectively, the value of
+ hbase.hstore.compaction.max controls the length of time it takes a single compaction to
+ complete. Setting it larger means that more StoreFiles are included in a compaction. For most
+ cases, the default value is appropriate.
+
+
+
+ hbase.hstore.compaction.min.size
+ 134217728
+ A StoreFile (or a selection of StoreFiles, when using ExploringCompactionPolicy)
+ smaller than this size will always be eligible for minor compaction.
+ HFiles this size or larger are evaluated by hbase.hstore.compaction.ratio to determine if
+ they are eligible. Because this limit represents the "automatic include" limit for all
+ StoreFiles smaller than this value, this value may need to be reduced in write-heavy
+ environments where many StoreFiles in the 1-2 MB range are being flushed, because every
+ StoreFile will be targeted for compaction and the resulting StoreFiles may still be under the
+ minimum size and require further compaction. If this parameter is lowered, the ratio check is
+ triggered more quickly. This addressed some issues seen in earlier versions of HBase but
+ changing this parameter is no longer necessary in most situations. Default: 128 MB expressed
+ in bytes.
+
+
+
+ hbase.hstore.compaction.max.size
+ 9223372036854775807
+ A StoreFile (or a selection of StoreFiles, when using ExploringCompactionPolicy)
+ larger than this size will be excluded from compaction. The effect of
+ raising hbase.hstore.compaction.max.size is fewer, larger StoreFiles that do not get
+ compacted often. If you feel that compaction is happening too often without much benefit, you
+ can try raising this value. Default: the value of LONG.MAX_VALUE, expressed in bytes.
+
+
+
+ hbase.hstore.compaction.ratio
+ 1.2F
+ For minor compaction, this ratio is used to determine whether a given StoreFile
+ which is larger than hbase.hstore.compaction.min.size is eligible for compaction. Its
+ effect is to limit compaction of large StoreFiles. The value of hbase.hstore.compaction.ratio
+ is expressed as a floating-point decimal. A large ratio, such as 10, will produce a single
+ giant StoreFile. Conversely, a low value, such as .25, will produce behavior similar to the
+ BigTable compaction algorithm, producing four StoreFiles. A moderate value of between 1.0 and
+ 1.4 is recommended. When tuning this value, you are balancing write costs with read costs.
+ Raising the value (to something like 1.4) will have more write costs, because you will
+ compact larger StoreFiles. However, during reads, HBase will need to seek through fewer
+ StoreFiles to accomplish the read. Consider this approach if you cannot take advantage of
+ Bloom filters. Otherwise, you can lower this value to something like 1.0 to reduce the
+ background cost of writes, and use Bloom filters to control the number of StoreFiles touched
+ during reads. For most cases, the default value is appropriate.
+
+
+
+ hbase.hstore.compaction.ratio.offpeak
+ 5.0F
+ Allows you to set a different (by default, more aggressive) ratio for determining
+ whether larger StoreFiles are included in compactions during off-peak hours. Works in the
+ same way as hbase.hstore.compaction.ratio. Only applies if hbase.offpeak.start.hour and
+ hbase.offpeak.end.hour are also enabled.
+
+
+
+ hbase.hstore.time.to.purge.deletes
+ 0
+ The amount of time to delay purging of delete markers with future timestamps. If
+ unset, or set to 0, all delete markers, including those with future timestamps, are purged
+ during the next major compaction. Otherwise, a delete marker is kept until the major compaction
+ which occurs after the marker's timestamp plus the value of this setting, in milliseconds.
+
+
+
+ hbase.offpeak.start.hour
+ -1
+ The start of off-peak hours, expressed as an integer between 0 and 23, inclusive.
+ Set to -1 to disable off-peak.
+
+
+
+ hbase.offpeak.end.hour
+ -1
+ The end of off-peak hours, expressed as an integer between 0 and 23, inclusive. Set
+ to -1 to disable off-peak.
+
+
+
+ hbase.regionserver.thread.compaction.throttle
+ 2684354560
+ There are two different thread pools for compactions, one for large compactions and
+ the other for small compactions. This helps to keep compaction of lean tables (such as
+ hbase:meta) fast. If a compaction is larger than this threshold, it
+ goes into the large compaction pool. In most cases, the default value is appropriate. Default:
+ 2 x hbase.hstore.compaction.max x hbase.hregion.memstore.flush.size (which defaults to 128MB).
+ The value field assumes that the value of hbase.hregion.memstore.flush.size is unchanged from
+ the default.
+
+
+
+ hbase.regionserver.majorcompaction.pagecache.drop
+ true
+ Specifies whether to drop pages read/written into the system page cache by
+ major compactions. Setting it to true helps prevent major compactions from
+ polluting the page cache, which is almost always required, especially for clusters
+ with low/moderate memory to storage ratio.
+
+
+
+ hbase.regionserver.minorcompaction.pagecache.drop
+ true
+ Specifies whether to drop pages read/written into the system page cache by
+ minor compactions. Setting it to true helps prevent minor compactions from
+ polluting the page cache, which is most beneficial on clusters with low
+ memory to storage ratio or very write heavy clusters. You may want to set it to
+ false under moderate to low write workload when bulk of the reads are
+ on the most recently written data.
+
+
+
+ hbase.hstore.compaction.kv.max
+ 10
+ The maximum number of KeyValues to read and then write in a batch when flushing or
+ compacting. Set this lower if you have big KeyValues and problems with Out Of Memory
+ Exceptions Set this higher if you have wide, small rows.
+
+
+
+ hbase.storescanner.parallel.seek.enable
+ false
+
+ Enables StoreFileScanner parallel-seeking in StoreScanner,
+ a feature which can reduce response latency under special conditions.
+
+
+
+ hbase.storescanner.parallel.seek.threads
+ 10
+
+ The default thread pool size if parallel-seeking feature enabled.
+
+
+
+ hfile.block.cache.policy
+ LRU
+ The eviction policy for the L1 block cache (LRU or TinyLFU).
+
+
+ hfile.block.cache.size
+ 0.4
+ Percentage of maximum heap (-Xmx setting) to allocate to block cache
+ used by a StoreFile. Default of 0.4 means allocate 40%.
+ Set to 0 to disable but it's not recommended; you need at least
+ enough cache to hold the storefile indices.
+
+
+
+ hfile.block.index.cacheonwrite
+ false
+ This allows to put non-root multi-level index blocks into the block
+ cache at the time the index is being written.
+
+
+
+ hfile.index.block.max.size
+ 131072
+ When the size of a leaf-level, intermediate-level, or root-level
+ index block in a multi-level block index grows to this size, the
+ block is written out and a new block is started.
+
+
+
+ hbase.bucketcache.ioengine
+
+ Where to store the contents of the bucketcache. One of: offheap,
+ file, files, mmap or pmem. If a file or files, set it to file(s):PATH_TO_FILE.
+ mmap means the content will be in an mmaped file. Use mmap:PATH_TO_FILE. 'pmem'
+ is bucket cache over a file on the persistent memory device.
+ Use pmem:PATH_TO_FILE.
+ See http://hbase.apache.org/book.html#offheap.blockcache for more information.
+
+
+
+ hbase.hstore.compaction.throughput.lower.bound
+ 52428800
+ The target lower bound on aggregate compaction throughput, in bytes/sec. Allows
+ you to tune the minimum available compaction throughput when the
+ PressureAwareCompactionThroughputController throughput controller is active. (It is active by
+ default.)
+
+
+
+ hbase.hstore.compaction.throughput.higher.bound
+ 104857600
+ The target upper bound on aggregate compaction throughput, in bytes/sec. Allows
+ you to control aggregate compaction throughput demand when the
+ PressureAwareCompactionThroughputController throughput controller is active. (It is active by
+ default.) The maximum throughput will be tuned between the lower and upper bounds when
+ compaction pressure is within the range [0.0, 1.0]. If compaction pressure is 1.0 or greater
+ the higher bound will be ignored until pressure returns to the normal range.
+
+
+
+ hbase.bucketcache.size
+
+ A float that EITHER represents a percentage of total heap memory
+ size to give to the cache (if < 1.0) OR, it is the total capacity in
+ megabytes of BucketCache. Default: 0.0
+
+
+
+ hbase.bucketcache.bucket.sizes
+
+ A comma-separated list of sizes for buckets for the bucketcache.
+ Can be multiple sizes. List block sizes in order from smallest to largest.
+ The sizes you use will depend on your data access patterns.
+ Must be a multiple of 256 else you will run into
+ 'java.io.IOException: Invalid HFile block magic' when you go to read from cache.
+ If you specify no values here, then you pick up the default bucketsizes set
+ in code (See BucketAllocator#DEFAULT_BUCKET_SIZES).
+
+
+
+ hfile.format.version
+ 3
+ The HFile format version to use for new files.
+ Version 3 adds support for tags in hfiles (See http://hbase.apache.org/book.html#hbase.tags).
+ Also see the configuration 'hbase.replication.rpc.codec'.
+
+
+
+ hfile.block.bloom.cacheonwrite
+ false
+ Enables cache-on-write for inline blocks of a compound Bloom filter.
+
+
+ io.storefile.bloom.block.size
+ 131072
+ The size in bytes of a single block ("chunk") of a compound Bloom
+ filter. This size is approximate, because Bloom blocks can only be
+ inserted at data block boundaries, and the number of keys per data
+ block varies.
+
+
+
+ hbase.rs.cacheblocksonwrite
+ false
+ Whether an HFile block should be added to the block cache when the
+ block is finished.
+
+
+
+ hbase.rpc.timeout
+ 60000
+ This is for the RPC layer to define how long (millisecond) HBase client applications
+ take for a remote call to time out. It uses pings to check connections
+ but will eventually throw a TimeoutException.
+
+
+
+ hbase.client.operation.timeout
+ 1200000
+ Operation timeout is a top-level restriction (millisecond) that makes sure a
+ blocking operation in Table will not be blocked more than this. In each operation, if rpc
+ request fails because of timeout or other reason, it will retry until success or throw
+ RetriesExhaustedException. But if the total time being blocking reach the operation timeout
+ before retries exhausted, it will break early and throw SocketTimeoutException.
+
+
+
+ hbase.cells.scanned.per.heartbeat.check
+ 10000
+ The number of cells scanned in between heartbeat checks. Heartbeat
+ checks occur during the processing of scans to determine whether or not the
+ server should stop scanning in order to send back a heartbeat message to the
+ client. Heartbeat messages are used to keep the client-server connection alive
+ during long running scans. Small values mean that the heartbeat checks will
+ occur more often and thus will provide a tighter bound on the execution time of
+ the scan. Larger values mean that the heartbeat checks occur less frequently
+
+
+
+ hbase.rpc.shortoperation.timeout
+ 10000
+ This is another version of "hbase.rpc.timeout". For those RPC operation
+ within cluster, we rely on this configuration to set a short timeout limitation
+ for short operation. For example, short rpc timeout for region server's trying
+ to report to active master can benefit quicker master failover process.
+
+
+
+ hbase.ipc.client.tcpnodelay
+ true
+ Set no delay on rpc socket connections. See
+ http://docs.oracle.com/javase/1.5.0/docs/api/java/net/Socket.html#getTcpNoDelay()
+
+
+
+ hbase.unsafe.regionserver.hostname
+
+ This config is for experts: don't set its value unless you really know what you are doing.
+ When set to a non-empty value, this represents the (external facing) hostname for the underlying server.
+ See https://issues.apache.org/jira/browse/HBASE-12954 for details.
+
+
+
+ hbase.unsafe.regionserver.hostname.disable.master.reversedns
+ false
+ This config is for experts: don't set its value unless you really know what you are doing.
+ When set to true, regionserver will use the current node hostname for the servername and HMaster will
+ skip reverse DNS lookup and use the hostname sent by regionserver instead. Note that this config and
+ hbase.unsafe.regionserver.hostname are mutually exclusive. See https://issues.apache.org/jira/browse/HBASE-18226
+ for more details.
+
+
+
+
+ hbase.master.keytab.file
+
+ Full path to the kerberos keytab file to use for logging in
+ the configured HMaster server principal.
+
+
+
+ hbase.master.kerberos.principal
+
+ Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
+ that should be used to run the HMaster process. The principal name should
+ be in the form: user/hostname@DOMAIN. If "_HOST" is used as the hostname
+ portion, it will be replaced with the actual hostname of the running
+ instance.
+
+
+
+ hbase.regionserver.keytab.file
+
+ Full path to the kerberos keytab file to use for logging in
+ the configured HRegionServer server principal.
+
+
+
+ hbase.regionserver.kerberos.principal
+
+ Ex. "hbase/_HOST@EXAMPLE.COM". The kerberos principal name
+ that should be used to run the HRegionServer process. The principal name
+ should be in the form: user/hostname@DOMAIN. If "_HOST" is used as the
+ hostname portion, it will be replaced with the actual hostname of the
+ running instance. An entry for this principal must exist in the file
+ specified in hbase.regionserver.keytab.file
+
+
+
+
+ hadoop.policy.file
+ hbase-policy.xml
+ The policy configuration file used by RPC servers to make
+ authorization decisions on client requests. Only used when HBase
+ security is enabled.
+
+
+
+ hbase.superuser
+
+ List of users or groups (comma-separated), who are allowed
+ full privileges, regardless of stored ACLs, across the cluster.
+ Only used when HBase security is enabled.
+
+
+
+ hbase.auth.key.update.interval
+ 86400000
+ The update interval for master key for authentication tokens
+ in servers in milliseconds. Only used when HBase security is enabled.
+
+
+
+ hbase.auth.token.max.lifetime
+ 604800000
+ The maximum lifetime in milliseconds after which an
+ authentication token expires. Only used when HBase security is enabled.
+
+
+
+ hbase.ipc.client.fallback-to-simple-auth-allowed
+ false
+ When a client is configured to attempt a secure connection, but attempts to
+ connect to an insecure server, that server may instruct the client to
+ switch to SASL SIMPLE (unsecure) authentication. This setting controls
+ whether or not the client will accept this instruction from the server.
+ When false (the default), the client will not allow the fallback to SIMPLE
+ authentication, and will abort the connection.
+
+
+
+ hbase.ipc.server.fallback-to-simple-auth-allowed
+ false
+ When a server is configured to require secure connections, it will
+ reject connection attempts from clients using SASL SIMPLE (unsecure) authentication.
+ This setting allows secure servers to accept SASL SIMPLE connections from clients
+ when the client requests. When false (the default), the server will not allow the fallback
+ to SIMPLE authentication, and will reject the connection. WARNING: This setting should ONLY
+ be used as a temporary measure while converting clients over to secure authentication. It
+ MUST BE DISABLED for secure operation.
+
+
+
+ hbase.display.keys
+ true
+ When this is set to true the webUI and such will display all start/end keys
+ as part of the table details, region names, etc. When this is set to false,
+ the keys are hidden.
+
+
+
+ hbase.coprocessor.enabled
+ true
+ Enables or disables coprocessor loading. If 'false'
+ (disabled), any other coprocessor related configuration will be ignored.
+
+
+
+ hbase.coprocessor.user.enabled
+ true
+ Enables or disables user (aka. table) coprocessor loading.
+ If 'false' (disabled), any table coprocessor attributes in table
+ descriptors will be ignored. If "hbase.coprocessor.enabled" is 'false'
+ this setting has no effect.
+
+
+
+ hbase.coprocessor.region.classes
+
+ A comma-separated list of Coprocessors that are loaded by
+ default on all tables. For any override coprocessor method, these classes
+ will be called in order. After implementing your own Coprocessor, just put
+ it in HBase's classpath and add the fully qualified class name here.
+ A coprocessor can also be loaded on demand by setting HTableDescriptor.
+
+
+
+ hbase.coprocessor.master.classes
+
+ A comma-separated list of
+ org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are
+ loaded by default on the active HMaster process. For any implemented
+ coprocessor methods, the listed classes will be called in order. After
+ implementing your own MasterObserver, just put it in HBase's classpath
+ and add the fully qualified class name here.
+
+
+
+ hbase.coprocessor.abortonerror
+ true
+ Set to true to cause the hosting server (master or regionserver)
+ to abort if a coprocessor fails to load, fails to initialize, or throws an
+ unexpected Throwable object. Setting this to false will allow the server to
+ continue execution but the system wide state of the coprocessor in question
+ will become inconsistent as it will be properly executing in only a subset
+ of servers, so this is most useful for debugging only.
+
+
+
+ hbase.rest.port
+ 8080
+ The port for the HBase REST server.
+
+
+ hbase.rest.readonly
+ false
+ Defines the mode the REST server will be started in. Possible values are:
+ false: All HTTP methods are permitted - GET/PUT/POST/DELETE.
+ true: Only the GET method is permitted.
+
+
+
+ hbase.rest.threads.max
+ 100
+ The maximum number of threads of the REST server thread pool.
+ Threads in the pool are reused to process REST requests. This
+ controls the maximum number of requests processed concurrently.
+ It may help to control the memory used by the REST server to
+ avoid OOM issues. If the thread pool is full, incoming requests
+ will be queued up and wait for some free threads.
+
+
+
+ hbase.rest.threads.min
+ 2
+ The minimum number of threads of the REST server thread pool.
+ The thread pool always has at least these number of threads so
+ the REST server is ready to serve incoming requests.
+
+
+
+ hbase.rest.support.proxyuser
+ false
+ Enables running the REST server to support proxy-user mode.
+
+
+ hbase.defaults.for.version
+ 2.4.9
+ This defaults file was compiled for version ${project.version}. This variable is used
+ to make sure that a user doesn't have an old version of hbase-default.xml on the
+ classpath.
+
+
+
+ hbase.defaults.for.version.skip
+ false
+ Set to true to skip the 'hbase.defaults.for.version' check.
+ Setting this to true can be useful in contexts other than
+ the other side of a maven generation; i.e. running in an
+ IDE. You'll want to set this boolean to true to avoid
+ seeing the RuntimeException complaint: "hbase-default.xml file
+ seems to be for and old version of HBase (\${hbase.version}), this
+ version is X.X.X-SNAPSHOT"
+
+
+
+ hbase.table.lock.enable
+ true
+ Set to true to enable locking the table in zookeeper for schema change operations.
+ Table locking from master prevents concurrent schema modifications to corrupt table
+ state.
+
+
+
+ hbase.table.max.rowsize
+ 1073741824
+
+ Maximum size of single row in bytes (default is 1 Gb) for Get'ting
+ or Scan'ning without in-row scan flag set. If row size exceeds this limit
+ RowTooBigException is thrown to client.
+
+
+
+ hbase.thrift.minWorkerThreads
+ 16
+ The "core size" of the thread pool. New threads are created on every
+ connection until this many threads are created.
+
+
+
+ hbase.thrift.maxWorkerThreads
+ 1000
+ The maximum size of the thread pool. When the pending request queue
+ overflows, new threads are created until their number reaches this number.
+ After that, the server starts dropping connections.
+
+
+
+ hbase.thrift.maxQueuedRequests
+ 1000
+ The maximum number of pending Thrift connections waiting in the queue. If
+ there are no idle threads in the pool, the server queues requests. Only
+ when the queue overflows, new threads are added, up to
+ hbase.thrift.maxQueuedRequests threads.
+
+
+
+ hbase.regionserver.thrift.framed
+ false
+ Use Thrift TFramedTransport on the server side.
+ This is the recommended transport for thrift servers and requires a similar setting
+ on the client side. Changing this to false will select the default transport,
+ vulnerable to DoS when malformed requests are issued due to THRIFT-601.
+
+
+
+ hbase.regionserver.thrift.framed.max_frame_size_in_mb
+ 2
+ Default frame size when using framed transport, in MB
+
+
+ hbase.regionserver.thrift.compact
+ false
+ Use Thrift TCompactProtocol binary serialization protocol.
+
+
+ hbase.rootdir.perms
+ 700
+ FS Permissions for the root data subdirectory in a secure (kerberos) setup.
+ When master starts, it creates the rootdir with this permissions or sets the permissions
+ if it does not match.
+
+
+
+ hbase.wal.dir.perms
+ 700
+ FS Permissions for the root WAL directory in a secure(kerberos) setup.
+ When master starts, it creates the WAL dir with this permissions or sets the permissions
+ if it does not match.
+
+
+
+ hbase.data.umask.enable
+ false
+ Enable, if true, that file permissions should be assigned
+ to the files written by the regionserver
+
+
+
+ hbase.data.umask
+ 000
+ File permissions that should be used to write data
+ files when hbase.data.umask.enable is true
+
+
+
+ hbase.snapshot.enabled
+ true
+ Set to true to allow snapshots to be taken / restored / cloned.
+
+
+ hbase.snapshot.restore.take.failsafe.snapshot
+ true
+ Set to true to take a snapshot before the restore operation.
+ The snapshot taken will be used in case of failure, to restore the previous state.
+ At the end of the restore operation this snapshot will be deleted
+
+
+
+ hbase.snapshot.restore.failsafe.name
+ hbase-failsafe-{snapshot.name}-{restore.timestamp}
+ Name of the failsafe snapshot taken by the restore operation.
+ You can use the {snapshot.name}, {table.name} and {restore.timestamp} variables
+ to create a name based on what you are restoring.
+
+
+
+ hbase.snapshot.working.dir
+
+ Location where the snapshotting process will occur. The location of the
+ completed snapshots will not change, but the temporary directory where the snapshot
+ process occurs will be set to this location. This can be a separate filesystem than
+ the root directory, for performance increase purposes. See HBASE-21098 for more
+ information
+
+
+
+ hbase.server.compactchecker.interval.multiplier
+ 1000
+ The number that determines how often we scan to see if compaction is necessary.
+ Normally, compactions are done after some events (such as memstore flush), but if
+ region didn't receive a lot of writes for some time, or due to different compaction
+ policies, it may be necessary to check it periodically. The interval between checks is
+ hbase.server.compactchecker.interval.multiplier multiplied by
+ hbase.server.thread.wakefrequency.
+
+
+
+ hbase.lease.recovery.timeout
+ 900000
+ How long we wait on dfs lease recovery in total before giving up.
+
+
+ hbase.lease.recovery.dfs.timeout
+ 64000
+ How long between dfs recover lease invocations. Should be larger than the sum of
+ the time it takes for the namenode to issue a block recovery command as part of
+ datanode; dfs.heartbeat.interval and the time it takes for the primary
+ datanode, performing block recovery to timeout on a dead datanode; usually
+ dfs.client.socket-timeout. See the end of HBASE-8389 for more.
+
+
+
+ hbase.column.max.version
+ 1
+ New column family descriptors will use this value as the default number of versions
+ to keep.
+
+
+
+ dfs.client.read.shortcircuit
+
+
+ If set to true, this configuration parameter enables short-circuit local
+ reads.
+
+
+
+ dfs.domain.socket.path
+
+
+ This is a path to a UNIX domain socket that will be used for
+ communication between the DataNode and local HDFS clients, if
+ dfs.client.read.shortcircuit is set to true. If the string "_PORT" is
+ present in this path, it will be replaced by the TCP port of the DataNode.
+ Be careful about permissions for the directory that hosts the shared
+ domain socket; dfsclient will complain if open to other users than the HBase user.
+
+
+
+ hbase.dfs.client.read.shortcircuit.buffer.size
+ 131072
+ If the DFSClient configuration
+ dfs.client.read.shortcircuit.buffer.size is unset, we will
+ use what is configured here as the short circuit read default
+ direct byte buffer size. DFSClient native default is 1MB; HBase
+ keeps its HDFS files open so number of file blocks * 1MB soon
+ starts to add up and threaten OOME because of a shortage of
+ direct memory. So, we set it down from the default. Make
+ it > the default hbase block size set in the HColumnDescriptor
+ which is usually 64k.
+
+
+
+ hbase.regionserver.checksum.verify
+ true
+
+ If set to true (the default), HBase verifies the checksums for hfile
+ blocks. HBase writes checksums inline with the data when it writes out
+ hfiles. HDFS (as of this writing) writes checksums to a separate file
+ than the data file necessitating extra seeks. Setting this flag saves
+ some on i/o. Checksum verification by HDFS will be internally disabled
+ on hfile streams when this flag is set. If the hbase-checksum verification
+ fails, we will switch back to using HDFS checksums (so do not disable HDFS
+ checksums! And besides this feature applies to hfiles only, not to WALs).
+ If this parameter is set to false, then hbase will not verify any checksums,
+ instead it will depend on checksum verification being done in the HDFS client.
+
+
+
+ hbase.hstore.bytes.per.checksum
+ 16384
+
+ Number of bytes in a newly created checksum chunk for HBase-level
+ checksums in hfile blocks.
+
+
+
+ hbase.hstore.checksum.algorithm
+ CRC32C
+
+ Name of an algorithm that is used to compute checksums. Possible values
+ are NULL, CRC32, CRC32C.
+
+
+
+ hbase.client.scanner.max.result.size
+ 2097152
+ Maximum number of bytes returned when calling a scanner's next method.
+ Note that when a single row is larger than this limit the row is still returned completely.
+ The default value is 2MB, which is good for 1ge networks.
+ With faster and/or high latency networks this value should be increased.
+
+
+
+ hbase.server.scanner.max.result.size
+ 104857600
+ Maximum number of bytes returned when calling a scanner's next method.
+ Note that when a single row is larger than this limit the row is still returned completely.
+ The default value is 100MB.
+ This is a safety setting to protect the server from OOM situations.
+
+
+
+ hbase.status.published
+ false
+
+ This setting activates the publication by the master of the status of the region server.
+ When a region server dies and its recovery starts, the master will push this information
+ to the client application, to let them cut the connection immediately instead of waiting
+ for a timeout.
+
+
+
+ hbase.status.publisher.class
+ org.apache.hadoop.hbase.master.ClusterStatusPublisher$MulticastPublisher
+
+ Implementation of the status publication with a multicast message.
+
+
+
+ hbase.status.listener.class
+ org.apache.hadoop.hbase.client.ClusterStatusListener$MulticastListener
+
+ Implementation of the status listener with a multicast message.
+
+
+
+ hbase.status.multicast.address.ip
+ 226.1.1.3
+
+ Multicast address to use for the status publication by multicast.
+
+
+
+ hbase.status.multicast.address.port
+ 16100
+
+ Multicast port to use for the status publication by multicast.
+
+
+
+ hbase.dynamic.jars.dir
+ ${hbase.rootdir}/lib
+
+ The directory from which the custom filter JARs can be loaded
+ dynamically by the region server without the need to restart. However,
+ an already loaded filter/co-processor class would not be un-loaded. See
+ HBASE-1936 for more details.
+
+ Does not apply to coprocessors.
+
+
+
+ hbase.security.authentication
+ simple
+
+ Controls whether or not secure authentication is enabled for HBase.
+ Possible values are 'simple' (no authentication), and 'kerberos'.
+
+
+
+ hbase.rest.filter.classes
+ org.apache.hadoop.hbase.rest.filter.GzipFilter
+
+ Servlet filters for REST service.
+
+
+
+ hbase.master.loadbalancer.class
+ org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer
+
+ Class used to execute the regions balancing when the period occurs.
+ See the class comment for more on how it works
+ http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.html
+ It replaces the DefaultLoadBalancer as the default (since renamed
+ as the SimpleLoadBalancer).
+
+
+
+ hbase.master.loadbalance.bytable
+ false
+ Factor Table name when the balancer runs.
+ Default: false.
+
+
+
+ hbase.master.normalizer.class
+ org.apache.hadoop.hbase.master.normalizer.SimpleRegionNormalizer
+
+ Class used to execute the region normalization when the period occurs.
+ See the class comment for more on how it works
+ http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.html
+
+
+
+ hbase.rest.csrf.enabled
+ false
+
+ Set to true to enable protection against cross-site request forgery (CSRF)
+
+
+
+ hbase.rest-csrf.browser-useragents-regex
+ ^Mozilla.*,^Opera.*
+
+ A comma-separated list of regular expressions used to match against an HTTP
+ request's User-Agent header when protection against cross-site request
+ forgery (CSRF) is enabled for REST server by setting
+ hbase.rest.csrf.enabled to true. If the incoming User-Agent matches
+ any of these regular expressions, then the request is considered to be sent
+ by a browser, and therefore CSRF prevention is enforced. If the request's
+ User-Agent does not match any of these regular expressions, then the request
+ is considered to be sent by something other than a browser, such as scripted
+ automation. In this case, CSRF is not a potential attack vector, so
+ the prevention is not enforced. This helps achieve backwards-compatibility
+ with existing automation that has not been updated to send the CSRF
+ prevention header.
+
+
+
+ hbase.security.exec.permission.checks
+ false
+
+ If this setting is enabled and ACL based access control is active (the
+ AccessController coprocessor is installed either as a system coprocessor
+ or on a table as a table coprocessor) then you must grant all relevant
+ users EXEC privilege if they require the ability to execute coprocessor
+ endpoint calls. EXEC privilege, like any other permission, can be
+ granted globally to a user, or to a user on a per table or per namespace
+ basis. For more information on coprocessor endpoints, see the coprocessor
+ section of the HBase online manual. For more information on granting or
+ revoking permissions using the AccessController, see the security
+ section of the HBase online manual.
+
+
+
+ hbase.procedure.regionserver.classes
+
+ A comma-separated list of
+ org.apache.hadoop.hbase.procedure.RegionServerProcedureManager procedure managers that are
+ loaded by default on the active HRegionServer process. The lifecycle methods (init/start/stop)
+ will be called by the active HRegionServer process to perform the specific globally barriered
+ procedure. After implementing your own RegionServerProcedureManager, just put it in
+ HBase's classpath and add the fully qualified class name here.
+
+
+
+ hbase.procedure.master.classes
+
+ A comma-separated list of
+ org.apache.hadoop.hbase.procedure.MasterProcedureManager procedure managers that are
+ loaded by default on the active HMaster process. A procedure is identified by its signature and
+ users can use the signature and an instant name to trigger an execution of a globally barriered
+ procedure. After implementing your own MasterProcedureManager, just put it in HBase's classpath
+ and add the fully qualified class name here.
+
+
+
+ hbase.coordinated.state.manager.class
+ org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager
+ Fully qualified name of class implementing coordinated state manager.
+
+
+ hbase.regionserver.storefile.refresh.period
+ 0
+
+ The period (in milliseconds) for refreshing the store files for the secondary regions. 0
+ means this feature is disabled. Secondary regions sees new files (from flushes and
+ compactions) from primary once the secondary region refreshes the list of files in the
+ region (there is no notification mechanism). But too frequent refreshes might cause
+ extra Namenode pressure. If the files cannot be refreshed for longer than HFile TTL
+ (hbase.master.hfilecleaner.ttl) the requests are rejected. Configuring HFile TTL to a larger
+ value is also recommended with this setting.
+
+
+
+ hbase.region.replica.replication.enabled
+ false
+
+ Whether asynchronous WAL replication to the secondary region replicas is enabled or not.
+ If this is enabled, a replication peer named "region_replica_replication" will be created
+ which will tail the logs and replicate the mutations to region replicas for tables that
+ have region replication > 1. If this is enabled once, disabling this replication also
+ requires disabling the replication peer using shell or Admin java class.
+ Replication to secondary region replicas works over standard inter-cluster replication.
+
+
+
+ hbase.http.filter.initializers
+ org.apache.hadoop.hbase.http.lib.StaticUserWebFilter
+
+ A comma separated list of class names. Each class in the list must extend
+ org.apache.hadoop.hbase.http.FilterInitializer. The corresponding Filter will
+ be initialized. Then, the Filter will be applied to all user facing jsp
+ and servlet web pages.
+ The ordering of the list defines the ordering of the filters.
+ The default StaticUserWebFilter add a user principal as defined by the
+ hbase.http.staticuser.user property.
+
+
+
+ hbase.security.visibility.mutations.checkauths
+ false
+
+ This property if enabled, will check whether the labels in the visibility
+ expression are associated with the user issuing the mutation
+
+
+
+ hbase.http.max.threads
+ 16
+
+ The maximum number of threads that the HTTP Server will create in its
+ ThreadPool.
+
+
+
+ hbase.replication.rpc.codec
+ org.apache.hadoop.hbase.codec.KeyValueCodecWithTags
+
+ The codec that is to be used when replication is enabled so that
+ the tags are also replicated. This is used along with HFileV3 which
+ supports tags in them. If tags are not used or if the hfile version used
+ is HFileV2 then KeyValueCodec can be used as the replication codec. Note that
+ using KeyValueCodecWithTags for replication when there are no tags causes no harm.
+
+
+
+ hbase.replication.source.maxthreads
+ 10
+
+ The maximum number of threads any replication source will use for
+ shipping edits to the sinks in parallel. This also limits the number of
+ chunks each replication batch is broken into. Larger values can improve
+ the replication throughput between the master and slave clusters. The
+ default of 10 will rarely need to be changed.
+
+
+
+
+ hbase.http.staticuser.user
+ dr.stack
+
+ The user name to filter as, on static web filters
+ while rendering content. An example use is the HDFS
+ web UI (user to be used for browsing files).
+
+
+
+ hbase.regionserver.handler.abort.on.error.percent
+ 0.5
+ The percent of region server RPC threads failed to abort RS.
+ -1 Disable aborting; 0 Abort if even a single handler has died;
+ 0.x Abort only when this percent of handlers have died;
+ 1 Abort only all of the handers have died.
+
+
+
+
+ hbase.mob.file.cache.size
+ 1000
+
+ Number of opened file handlers to cache.
+ A larger value will benefit reads by providing more file handlers per mob
+ file cache and would reduce frequent file opening and closing.
+ However, if this is set too high, this could lead to a "too many opened file handlers"
+ The default value is 1000.
+
+
+
+ hbase.mob.cache.evict.period
+ 3600
+
+ The amount of time in seconds before the mob cache evicts cached mob files.
+ The default value is 3600 seconds.
+
+
+
+ hbase.mob.cache.evict.remain.ratio
+ 0.5f
+
+ The ratio (between 0.0 and 1.0) of files that remains cached after an eviction
+ is triggered when the number of cached mob files exceeds the hbase.mob.file.cache.size.
+ The default value is 0.5f.
+
+
+
+ hbase.master.mob.ttl.cleaner.period
+ 86400
+
+ The period that ExpiredMobFileCleanerChore runs. The unit is second.
+ The default value is one day. The MOB file name uses only the date part of
+ the file creation time in it. We use this time for deciding TTL expiry of
+ the files. So the removal of TTL expired files might be delayed. The max
+ delay might be 24 hrs.
+
+
+
+ hbase.mob.compaction.mergeable.threshold
+ 1342177280
+
+ If the size of a mob file is less than this value, it's regarded as a small
+ file and needs to be merged in mob compaction. The default value is 1280MB.
+
+
+
+ hbase.mob.delfile.max.count
+ 3
+
+ The max number of del files that is allowed in the mob compaction.
+ In the mob compaction, when the number of existing del files is larger than
+ this value, they are merged until number of del files is not larger this value.
+ The default value is 3.
+
+
+
+ hbase.mob.compaction.batch.size
+ 100
+
+ The max number of the mob files that is allowed in a batch of the mob compaction.
+ The mob compaction merges the small mob files to bigger ones. If the number of the
+ small files is very large, it could lead to a "too many opened file handlers" in the merge.
+ And the merge has to be split into batches. This value limits the number of mob files
+ that are selected in a batch of the mob compaction. The default value is 100.
+
+
+
+ hbase.mob.compaction.chore.period
+ 604800
+
+ The period that MobCompactionChore runs. The unit is second.
+ The default value is one week.
+
+
+
+ hbase.mob.compactor.class
+ org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor
+
+ Implementation of mob compactor, the default one is PartitionedMobCompactor.
+
+
+
+ hbase.mob.compaction.threads.max
+ 1
+
+ The max number of threads used in MobCompactor.
+
+
+
+ hbase.snapshot.master.timeout.millis
+ 300000
+
+ Timeout for master for the snapshot procedure execution.
+
+
+
+ hbase.snapshot.region.timeout
+ 300000
+
+ Timeout for regionservers to keep threads in snapshot request pool waiting.
+
+
+
+ hbase.rpc.rows.warning.threshold
+ 5000
+
+ Number of rows in a batch operation above which a warning will be logged.
+
+
+
+ hbase.master.wait.on.service.seconds
+ 30
+ Default is 5 minutes. Make it 30 seconds for tests. See
+ HBASE-19794 for some context.
+
+
+
+ hbase.master.cleaner.snapshot.interval
+ 1800000
+
+ Snapshot Cleanup chore interval in milliseconds.
+ The cleanup thread keeps running at this interval
+ to find all snapshots that are expired based on TTL
+ and delete them.
+
+
+
+ hbase.master.snapshot.ttl
+ 0
+
+ Default Snapshot TTL to be considered when the user does not specify TTL while
+ creating snapshot. Default value 0 indicates FOREVERE - snapshot should not be
+ automatically deleted until it is manually deleted
+
+
+
+ hbase.master.regions.recovery.check.interval
+ 1200000
+
+ Regions Recovery Chore interval in milliseconds.
+ This chore keeps running at this interval to
+ find all regions with configurable max store file ref count
+ and reopens them.
+
+
+
+ hbase.regions.recovery.store.file.ref.count
+ -1
+
+ Very large number of ref count on a compacted
+ store file indicates that it is a ref leak
+ on that object(compacted store file).
+ Such files can not be removed after
+ it is invalidated via compaction.
+ Only way to recover in such scenario is to
+ reopen the region which can release
+ all resources, like the refcount,
+ leases, etc. This config represents Store files Ref
+ Count threshold value considered for reopening
+ regions. Any region with compacted store files
+ ref count > this value would be eligible for
+ reopening by master. Here, we get the max
+ refCount among all refCounts on all
+ compacted away store files that belong to a
+ particular region. Default value -1 indicates
+ this feature is turned off. Only positive
+ integer value should be provided to
+ enable this feature.
+
+
+
+ hbase.regionserver.slowlog.ringbuffer.size
+ 256
+
+ Default size of ringbuffer to be maintained by each RegionServer in order
+ to store online slowlog responses. This is an in-memory ring buffer of
+ requests that were judged to be too slow in addition to the responseTooSlow
+ logging. The in-memory representation would be complete.
+ For more details, please look into Doc Section:
+ Get Slow Response Log from shell
+
+
+
+ hbase.regionserver.slowlog.buffer.enabled
+ false
+
+ Indicates whether RegionServers have ring buffer running for storing
+ Online Slow logs in FIFO manner with limited entries. The size of
+ the ring buffer is indicated by config: hbase.regionserver.slowlog.ringbuffer.size
+ The default value is false, turn this on and get latest slowlog
+ responses with complete data.
+
+
+
+ hbase.regionserver.slowlog.systable.enabled
+ false
+
+ Should be enabled only if hbase.regionserver.slowlog.buffer.enabled is enabled. If enabled
+ (true), all slow/large RPC logs would be persisted to system table hbase:slowlog (in addition
+ to in-memory ring buffer at each RegionServer). The records are stored in increasing
+ order of time. Operators can scan the table with various combination of ColumnValueFilter.
+ More details are provided in the doc section:
+ "Get Slow/Large Response Logs from System table hbase:slowlog"
+
+
+
+ hbase.rpc.rows.size.threshold.reject
+ false
+
+ If value is true, RegionServer will abort batch requests of Put/Delete with number of rows
+ in a batch operation exceeding threshold defined by value of config:
+ hbase.rpc.rows.warning.threshold. The default value is false and hence, by default, only
+ warning will be logged. This config should be turned on to prevent RegionServer from serving
+ very large batch size of rows and this way we can improve CPU usages by discarding
+ too large batch request.
+
+
+
+ hbase.namedqueue.provider.classes
+
+ org.apache.hadoop.hbase.namequeues.impl.SlowLogQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerDecisionQueueService,org.apache.hadoop.hbase.namequeues.impl.BalancerRejectionQueueService
+
+
+ Default values for NamedQueueService implementors. This comma separated full class names
+ represent all implementors of NamedQueueService that we would like to be invoked by
+ LogEvent handler service. One example of NamedQueue service is SlowLogQueueService which
+ is used to store slow/large RPC logs in ringbuffer at each RegionServer.
+ All implementors of NamedQueueService should be found under package:
+ "org.apache.hadoop.hbase.namequeues.impl"
+
+
+
+ hbase.master.balancer.decision.buffer.enabled
+ false
+
+ Indicates whether active HMaster has ring buffer running for storing
+ balancer decisions in FIFO manner with limited entries. The size of
+ the ring buffer is indicated by config: hbase.master.balancer.decision.queue.size
+
+
+
+ hbase.master.balancer.rejection.buffer.enabled
+ false
+
+ Indicates whether active HMaster has ring buffer running for storing
+ balancer rejection in FIFO manner with limited entries. The size of
+ the ring buffer is indicated by config: hbase.master.balancer.rejection.queue.size
+
+
+
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java
index 92f83aad7fd7e..0f364eddbc614 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystem.java
@@ -369,7 +369,8 @@ private Path getRandomInlinePath() {
private void verifyFileStatus(FileStatus expected, Path inlinePath, long expectedLength, FileStatus actual) {
assertEquals(inlinePath, actual.getPath());
assertEquals(expectedLength, actual.getLen());
- assertEquals(expected.getAccessTime(), actual.getAccessTime());
+ // removing below assertion as it is flaky on rare occasion (difference is in single-digit ms)
+ // assertEquals(expected.getAccessTime(), actual.getAccessTime());
assertEquals(expected.getBlockSize(), actual.getBlockSize());
assertEquals(expected.getGroup(), actual.getGroup());
assertEquals(expected.getModificationTime(), actual.getModificationTime());
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
index cc59b46024792..f09ecf76b2d88 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestInLineFileSystemHFileInLining.java
@@ -19,12 +19,12 @@
package org.apache.hudi.common.fs.inline;
import org.apache.hudi.common.testutils.FileSystemTestUtils;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -39,10 +39,12 @@
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.UUID;
+import static org.apache.hadoop.hbase.CellComparatorImpl.COMPARATOR;
import static org.apache.hudi.common.testutils.FileSystemTestUtils.FILE_SCHEME;
import static org.apache.hudi.common.testutils.FileSystemTestUtils.RANDOM;
import static org.apache.hudi.common.testutils.FileSystemTestUtils.getPhantomFile;
@@ -56,11 +58,12 @@
*/
public class TestInLineFileSystemHFileInLining {
+ private static final String LOCAL_FORMATTER = "%010d";
+ private static final String VALUE_PREFIX = "value";
+ private static final int MIN_BLOCK_SIZE = 1024;
private final Configuration inMemoryConf;
private final Configuration inlineConf;
- private final int minBlockSize = 1024;
- private static final String LOCAL_FORMATTER = "%010d";
- private int maxRows = 100 + RANDOM.nextInt(1000);
+ private final int maxRows = 100 + RANDOM.nextInt(1000);
private Path generatedPath;
public TestInLineFileSystemHFileInLining() {
@@ -88,12 +91,11 @@ public void testSimpleInlineFileSystem() throws IOException {
CacheConfig cacheConf = new CacheConfig(inMemoryConf);
FSDataOutputStream fout = createFSOutput(outerInMemFSPath, inMemoryConf);
HFileContext meta = new HFileContextBuilder()
- .withBlockSize(minBlockSize)
+ .withBlockSize(MIN_BLOCK_SIZE).withCellComparator(COMPARATOR)
.build();
HFile.Writer writer = HFile.getWriterFactory(inMemoryConf, cacheConf)
.withOutputStream(fout)
.withFileContext(meta)
- .withComparator(new HoodieHBaseKVComparator())
.create();
writeRecords(writer);
@@ -110,9 +112,9 @@ public void testSimpleInlineFileSystem() throws IOException {
InLineFileSystem inlineFileSystem = (InLineFileSystem) inlinePath.getFileSystem(inlineConf);
FSDataInputStream fin = inlineFileSystem.open(inlinePath);
- HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, inlineConf);
+ HFile.Reader reader = HFile.createReader(inlineFileSystem, inlinePath, cacheConf, true, inlineConf);
// Load up the index.
- reader.loadFileInfo();
+ reader.getHFileInfo();
// Get a scanner that caches and that does not use pread.
HFileScanner scanner = reader.getScanner(true, false);
// Align scanner at start of the file.
@@ -121,21 +123,24 @@ public void testSimpleInlineFileSystem() throws IOException {
Set rowIdsToSearch = getRandomValidRowIds(10);
for (int rowId : rowIdsToSearch) {
- assertEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))),
+ KeyValue keyValue = new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId));
+ assertEquals(0, scanner.seekTo(keyValue),
"location lookup failed");
// read the key and see if it matches
- ByteBuffer readKey = scanner.getKey();
- assertArrayEquals(getSomeKey(rowId), Bytes.toBytes(readKey), "seeked key does not match");
- scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
+ Cell cell = scanner.getCell();
+ byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
+ assertArrayEquals(Arrays.copyOfRange(keyValue.getRowArray(), keyValue.getRowOffset(), keyValue.getRowOffset() + keyValue.getRowLength()), key,
+ "seeked key does not match");
+ scanner.seekTo(keyValue);
ByteBuffer val1 = scanner.getValue();
- scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId)));
+ scanner.seekTo(keyValue);
ByteBuffer val2 = scanner.getValue();
assertArrayEquals(Bytes.toBytes(val1), Bytes.toBytes(val2));
}
int[] invalidRowIds = {-4, maxRows, maxRows + 1, maxRows + 120, maxRows + 160, maxRows + 1000};
for (int rowId : invalidRowIds) {
- assertNotEquals(0, scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(rowId))),
+ assertNotEquals(0, scanner.seekTo(new KeyValue.KeyOnlyKeyValue(getSomeKey(rowId))),
"location lookup should have failed");
}
reader.close();
@@ -155,7 +160,7 @@ private Set getRandomValidRowIds(int count) {
}
private byte[] getSomeKey(int rowId) {
- KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, Integer.valueOf(rowId)).getBytes(),
+ KeyValue kv = new KeyValue(String.format(LOCAL_FORMATTER, rowId).getBytes(),
Bytes.toBytes("family"), Bytes.toBytes("qual"), HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put);
return kv.getKey();
}
@@ -169,17 +174,15 @@ private void writeRecords(HFile.Writer writer) throws IOException {
writer.close();
}
- private int writeSomeRecords(HFile.Writer writer)
+ private void writeSomeRecords(HFile.Writer writer)
throws IOException {
- String value = "value";
KeyValue kv;
for (int i = 0; i < (maxRows); i++) {
- String key = String.format(LOCAL_FORMATTER, Integer.valueOf(i));
+ String key = String.format(LOCAL_FORMATTER, i);
kv = new KeyValue(Bytes.toBytes(key), Bytes.toBytes("family"), Bytes.toBytes("qual"),
- Bytes.toBytes(value + key));
+ Bytes.toBytes(VALUE_PREFIX + key));
writer.append(kv);
}
- return (maxRows);
}
private void readAllRecords(HFileScanner scanner) throws IOException {
@@ -187,30 +190,27 @@ private void readAllRecords(HFileScanner scanner) throws IOException {
}
// read the records and check
- private int readAndCheckbytes(HFileScanner scanner, int start, int n)
+ private void readAndCheckbytes(HFileScanner scanner, int start, int n)
throws IOException {
- String value = "value";
int i = start;
for (; i < (start + n); i++) {
- ByteBuffer key = scanner.getKey();
- ByteBuffer val = scanner.getValue();
- String keyStr = String.format(LOCAL_FORMATTER, Integer.valueOf(i));
- String valStr = value + keyStr;
+ Cell cell = scanner.getCell();
+ byte[] key = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
+ byte[] val = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
+ String keyStr = String.format(LOCAL_FORMATTER, i);
+ String valStr = VALUE_PREFIX + keyStr;
KeyValue kv = new KeyValue(Bytes.toBytes(keyStr), Bytes.toBytes("family"),
Bytes.toBytes("qual"), Bytes.toBytes(valStr));
- byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(key), 0,
- Bytes.toBytes(key).length).getKey();
- assertArrayEquals(kv.getKey(), keyBytes,
- "bytes for keys do not match " + keyStr + " " + Bytes.toString(Bytes.toBytes(key)));
- byte[] valBytes = Bytes.toBytes(val);
- assertArrayEquals(Bytes.toBytes(valStr), valBytes,
- "bytes for vals do not match " + valStr + " " + Bytes.toString(valBytes));
+ byte[] keyBytes = new KeyValue.KeyOnlyKeyValue(key, 0, key.length).getKey();
+ assertArrayEquals(Arrays.copyOfRange(kv.getRowArray(), kv.getRowOffset(), kv.getRowOffset() + kv.getRowLength()), keyBytes,
+ "bytes for keys do not match " + keyStr + " " + Bytes.toString(key));
+ assertArrayEquals(Bytes.toBytes(valStr), val,
+ "bytes for vals do not match " + valStr + " " + Bytes.toString(val));
if (!scanner.next()) {
break;
}
}
assertEquals(i, start + n - 1);
- return (start + n);
}
private long generateOuterFile(Path outerPath, byte[] inlineBytes) throws IOException {
diff --git a/hudi-examples/pom.xml b/hudi-examples/pom.xml
index 2ea284f203209..9024844b4dec7 100644
--- a/hudi-examples/pom.xml
+++ b/hudi-examples/pom.xml
@@ -221,6 +221,10 @@
org.eclipse.jetty.aggregate*
+
+ org.eclipse.jetty
+ *
+
diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml
index 27a4a0b453cb7..181c921277068 100644
--- a/hudi-flink/pom.xml
+++ b/hudi-flink/pom.xml
@@ -43,8 +43,8 @@
org.apache.maven.pluginsmaven-compiler-plugin
- 1.8
- 1.8
+ ${java.version}
+ ${java.version}
diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml
index bf87bfaa36a81..ecb74bf6c6df8 100644
--- a/hudi-hadoop-mr/pom.xml
+++ b/hudi-hadoop-mr/pom.xml
@@ -67,6 +67,16 @@
${hive.groupid}hive-jdbc
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
+ ${hive.groupid}
@@ -88,12 +98,24 @@
hadoop-commonteststest
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoophadoop-hdfsteststest
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/NoOpMetastoreUriResolverHook.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/NoOpMetastoreUriResolverHook.java
new file mode 100644
index 0000000000000..a8c71a70aff70
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/NoOpMetastoreUriResolverHook.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop.hive;
+
+import org.apache.hadoop.hive.metastore.hooks.URIResolverHook;
+
+import java.net.URI;
+import java.util.Collections;
+import java.util.List;
+
+public class NoOpMetastoreUriResolverHook implements URIResolverHook {
+
+ @Override
+ public List resolveURI(URI uri) {
+ return Collections.singletonList(uri);
+ }
+
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
index 0aa74ef154334..71061bc3e4eaf 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeRecordReaderUtils.java
@@ -186,7 +186,8 @@ public static Writable avroToArrayWritable(Object value, Schema schema) {
Writable[] recordValues = new Writable[schema.getFields().size()];
int recordValueIndex = 0;
for (Schema.Field field : schema.getFields()) {
- recordValues[recordValueIndex++] = avroToArrayWritable(record.get(field.name()), field.schema());
+ Object fieldVal = record.hasField(field.name()) ? record.get(field.name()) : null;
+ recordValues[recordValueIndex++] = avroToArrayWritable(fieldVal, field.schema());
}
return new ArrayWritable(Writable.class, recordValues);
case ENUM:
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
index 07a4a0250e5de..3971afb11c8c0 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
@@ -67,7 +67,11 @@
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.hadoop.RealtimeFileStatus;
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
+import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
+
+import org.apache.avro.generic.GenericRecord;
+
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
@@ -892,6 +896,20 @@ public void testIncrementalWithCompaction() throws Exception {
assertTrue(splits.length == 0);
}
+ @Test
+ public void testAvroToArrayWritable() throws IOException {
+ Schema schema = SchemaTestUtil.getEvolvedSchema();
+ GenericRecord record = SchemaTestUtil.generateAvroRecordFromJson(schema, 1, "100", "100", false);
+ ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(record, schema);
+ assertEquals(schema.getFields().size(), aWritable.get().length);
+
+ // In some queries, generic records that Hudi gets are just part of the full records.
+ // Here test the case that some fields are missing in the record.
+ Schema schemaWithMetaFields = HoodieAvroUtils.addMetadataFields(schema);
+ ArrayWritable aWritable2 = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(record, schemaWithMetaFields);
+ assertEquals(schemaWithMetaFields.getFields().size(), aWritable2.get().length);
+ }
+
private File createCompactionFile(java.nio.file.Path basePath, String commitTime)
throws IOException {
File file = basePath.resolve(".hoodie")
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 08affb5e48dee..d724bf6c33d1f 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -270,7 +270,6 @@
com.fasterxml.jackson.corejackson-annotations
- testcom.fasterxml.jackson.datatype
@@ -297,6 +296,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -318,6 +321,10 @@
netty-allio.netty
+
+ org.eclipse.jetty
+ *
+
@@ -352,6 +359,10 @@
javax.servlet*
+
+ org.eclipse.jetty.aggregate
+ *
+ org.eclipse.jetty*
@@ -407,7 +418,7 @@
${project.basedir}/compose_env
- ${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark244.yml
+ ${project.basedir}/../docker/compose/docker-compose_hadoop310_hive312_spark321.yml${skipITs}true${project.parent.basedir}
@@ -525,6 +536,7 @@
${dockerCompose.envFile}
+
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index 4c0265ce90f64..bfbec518d22b6 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -90,6 +90,8 @@ static String[] getHiveConsoleCommand(String hiveExpr) {
List cmd = new ArrayList<>();
cmd.add("hive");
cmd.add("--hiveconf");
+ cmd.add("hive.execution.engine=mr");
+ cmd.add("--hiveconf");
cmd.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
cmd.add("--hiveconf");
cmd.add("hive.stats.autogather=false");
@@ -100,6 +102,7 @@ static String[] getHiveConsoleCommand(String hiveExpr) {
private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
StringBuilder builder = new StringBuilder().append("beeline -u " + HIVE_SERVER_JDBC_URL)
+ .append(" --hiveconf hive.execution.engine=mr")
.append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
.append(" --hiveconf hive.stats.autogather=false ")
.append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
@@ -115,7 +118,7 @@ static String getSparkShellCommand(String commandFile) {
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
.append(
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
- .append(" --packages org.apache.spark:spark-avro_2.11:2.4.4 ").append(" -i ").append(commandFile).toString();
+ .append(" --packages org.apache.spark:spark-avro_2.12:3.2.1 ").append(" -i ").append(commandFile).toString();
}
static String getPrestoConsoleCommand(String commandFile) {
@@ -145,6 +148,11 @@ public void init() {
await().atMost(300, SECONDS).until(this::servicesUp);
LOG.info(String.format("Waiting for all the containers and services finishes in %d ms",
System.currentTimeMillis() - currTs));
+ try {
+ Thread.sleep(90000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
}
private boolean servicesUp() {
@@ -221,9 +229,11 @@ private TestExecStartResultCallback executeCommandInDocker(String containerName,
// Each execution of command(s) in docker should not be more than 15 mins. Otherwise, it is deemed stuck. We will
// try to capture stdout and stderr of the stuck process.
+ LOG.error("containerName: " + containerName);
+ LOG.error("Command: " + Arrays.asList(command));
boolean completed =
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
- .awaitCompletion(540, SECONDS);
+ .awaitCompletion(540, SECONDS);
if (!completed) {
callback.getStderr().flush();
callback.getStdout().flush();
@@ -236,8 +246,11 @@ private TestExecStartResultCallback executeCommandInDocker(String containerName,
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
LOG.info("Exit code for command : " + exitCode);
if (exitCode != 0) {
- LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
+ //LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
}
+ callback.getStderr().flush();
+ callback.getStdout().flush();
+ LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
if (checkIfSucceed) {
@@ -338,8 +351,8 @@ private void saveUpLogs() {
executeCommandStringInDocker(HIVESERVER, "cat /tmp/root/hive.log | grep -i exception -A 10 -B 5", false).getStdout().toString();
String filePath = System.getProperty("java.io.tmpdir") + "/" + System.currentTimeMillis() + "-hive.log";
FileIOUtils.writeStringToFile(hiveLogStr, filePath);
- LOG.info("Hive log saved up at : " + filePath);
- LOG.info("<=========== Full hive log ===============>\n"
+ LOG.error("Hive log saved up at : " + filePath);
+ LOG.error("<=========== Full hive log ===============>\n"
+ "\n" + hiveLogStr
+ "\n <==========================================>");
} catch (Exception e) {
@@ -356,6 +369,11 @@ void assertStdOutContains(Pair stdOutErr, String expectedOutput,
String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", "");
expectedOutput = singleSpace(expectedOutput).replaceAll(" ", "");
+ LOG.error("stdOutErr : " + stdOutErr.getLeft());
+ LOG.error("stdOutErr.getRight : " + stdOutErr.getRight());
+ LOG.error("stdOutSingleSpaced : " + stdOutSingleSpaced);
+ LOG.error("expectedOutput : " + expectedOutput);
+
int lastIndex = 0;
int count = 0;
while (lastIndex != -1) {
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
index a6a4c3ec4201e..213639d82f287 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/command/ITTestHoodieSyncCommand.java
@@ -60,7 +60,7 @@ public void testValidateSync() throws Exception {
}
private void syncHoodieTable(String hiveTableName, String op) throws Exception {
- StringBuilder cmdBuilder = new StringBuilder("spark-submit --packages org.apache.spark:spark-avro_2.11:2.4.4 ")
+ StringBuilder cmdBuilder = new StringBuilder("spark-submit --packages org.apache.spark:spark-avro_2.12:3.2.1 ")
.append(" --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer ").append(HUDI_UTILITIES_BUNDLE)
.append(" --table-type COPY_ON_WRITE ")
.append(" --base-file-format ").append(HoodieFileFormat.PARQUET.toString())
diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml
index 8845bfb801ae3..a7a7fffbaf550 100644
--- a/hudi-kafka-connect/pom.xml
+++ b/hudi-kafka-connect/pom.xml
@@ -43,8 +43,8 @@
org.apache.maven.pluginsmaven-compiler-plugin
- 1.8
- 1.8
+ ${java.version}
+ ${java.version}
@@ -198,6 +198,12 @@
org.apache.hadoophadoop-common${hadoop.version}
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -205,6 +211,12 @@
org.apache.hivehive-common${hive.version}
+
+
+ org.eclipse.jetty
+ *
+
+ ${hive.groupid}
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml
index 606f6fa894d72..459f2adca2fbb 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -351,7 +351,7 @@
org.apache.hadoophadoop-common
-
+ javax.servlet*
@@ -360,8 +360,12 @@
javax.servlet.jsp*
+
+ org.eclipse.jetty
+ *
+
- provided
+ provided
@@ -394,6 +398,14 @@
javax.servlet.jsp*
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
@@ -420,6 +432,10 @@
org.eclipse.jetty.orbitjavax.servlet
+
+ org.eclipse.jetty
+ *
+
@@ -526,7 +542,6 @@
org.slf4jslf4j-api${slf4j.version}
- test
@@ -548,6 +563,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
diff --git a/hudi-sync/hudi-dla-sync/pom.xml b/hudi-sync/hudi-dla-sync/pom.xml
index afb5717318f99..fb883c4b64524 100644
--- a/hudi-sync/hudi-dla-sync/pom.xml
+++ b/hudi-sync/hudi-dla-sync/pom.xml
@@ -111,6 +111,12 @@
org.apache.hadoophadoop-common
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hive
diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml
index 19c1233d371bc..6e40304a21ea9 100644
--- a/hudi-sync/hudi-hive-sync/pom.xml
+++ b/hudi-sync/hudi-hive-sync/pom.xml
@@ -73,6 +73,12 @@
org.apache.hadoophadoop-common
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoop
@@ -81,6 +87,12 @@
org.apache.hadoophadoop-hdfs
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoop
@@ -91,12 +103,24 @@
hadoop-commonteststest
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoophadoop-hdfsteststest
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -104,12 +128,36 @@
${hive.groupid}hive-service${hive.version}
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+ org.eclipse.jetty
+ *
+
+ test${hive.groupid}hive-jdbc${hive.version}
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
+ ${hive.groupid}
@@ -120,6 +168,12 @@
${hive.groupid}hive-common${hive.version}
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/hudi-sync/hudi-sync-common/pom.xml b/hudi-sync/hudi-sync-common/pom.xml
index 1f1abb4f177f1..c2ea3938c5d72 100644
--- a/hudi-sync/hudi-sync-common/pom.xml
+++ b/hudi-sync/hudi-sync-common/pom.xml
@@ -44,6 +44,12 @@
org.apache.hadoophadoop-common
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index cb2c643c78741..8af897ab446ff 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -73,6 +73,12 @@
org.apache.hudihudi-common${project.version}
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -104,7 +110,7 @@
io.javalinjavalin
- 2.8.0
+ ${javalin.version}
@@ -137,6 +143,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -157,6 +167,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -167,6 +181,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -178,6 +196,12 @@
teststest-jartest
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
index 1d3bb583a0861..159685418d834 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/RequestHandler.java
@@ -41,9 +41,9 @@
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
-import io.javalin.BadRequestResponse;
-import io.javalin.Context;
-import io.javalin.Handler;
+import io.javalin.http.BadRequestResponse;
+import io.javalin.http.Context;
+import io.javalin.http.Handler;
import io.javalin.Javalin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -227,14 +227,14 @@ private void registerTimelineAPI() {
app.get(RemoteHoodieTableFileSystemView.LAST_INSTANT, new ViewHandler(ctx -> {
metricsRegistry.add("LAST_INSTANT", 1);
List dtos = instantHandler
- .getLastInstant(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getValue());
+ .getLastInstant(ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, false));
app.get(RemoteHoodieTableFileSystemView.TIMELINE, new ViewHandler(ctx -> {
metricsRegistry.add("TIMELINE", 1);
TimelineDTO dto = instantHandler
- .getTimeline(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getValue());
+ .getTimeline(ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, dto);
}, false));
}
@@ -246,7 +246,7 @@ private void registerDataFilesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_PARTITION_DATA_FILES_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_PARTITION_DATA_FILES", 1);
List dtos = dataFileHandler.getLatestDataFiles(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -254,42 +254,42 @@ private void registerDataFilesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_PARTITION_DATA_FILE_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_PARTITION_DATA_FILE", 1);
List dtos = dataFileHandler.getLatestDataFile(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_ALL_DATA_FILES, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_ALL_DATA_FILES", 1);
List dtos = dataFileHandler
- .getLatestDataFiles(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow());
+ .getLatestDataFiles(ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_DATA_FILES_BEFORE_ON_INSTANT", 1);
List dtos = dataFileHandler.getLatestDataFilesBeforeOrOn(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_DATA_FILE_ON_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_DATA_FILE_ON_INSTANT", 1);
List dtos = dataFileHandler.getLatestDataFileOn(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
ctx.queryParam(RemoteHoodieTableFileSystemView.INSTANT_PARAM),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.ALL_DATA_FILES, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_DATA_FILES", 1);
List dtos = dataFileHandler.getAllDataFiles(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -297,8 +297,8 @@ private void registerDataFilesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_DATA_FILES_RANGE_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_DATA_FILES_RANGE_INSTANT", 1);
List dtos = dataFileHandler.getLatestDataFilesInRange(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(), Arrays
- .asList(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.INSTANTS_PARAM).getOrThrow().split(",")));
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(), Arrays
+ .asList(ctx.queryParam(RemoteHoodieTableFileSystemView.INSTANTS_PARAM, String.class).get().split(",")));
writeValueAsString(ctx, dtos);
}, true));
}
@@ -310,7 +310,7 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_PARTITION_SLICES_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_PARTITION_SLICES", 1);
List dtos = sliceHandler.getLatestFileSlices(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -318,16 +318,16 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_PARTITION_SLICE_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_PARTITION_SLICE", 1);
List dtos = sliceHandler.getLatestFileSlice(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.FILEID_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_PARTITION_UNCOMPACTED_SLICES_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_PARTITION_UNCOMPACTED_SLICES", 1);
List dtos = sliceHandler.getLatestUnCompactedFileSlices(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -335,7 +335,7 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.ALL_SLICES_URL, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_SLICES", 1);
List dtos = sliceHandler.getAllFileSlices(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -343,43 +343,42 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.LATEST_SLICES_RANGE_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_SLICE_RANGE_INSTANT", 1);
List dtos = sliceHandler.getLatestFileSliceInRange(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(), Arrays
- .asList(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.INSTANTS_PARAM).getOrThrow().split(",")));
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(), Arrays
+ .asList(ctx.queryParam(RemoteHoodieTableFileSystemView.INSTANTS_PARAM, String.class).get().split(",")));
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_SLICES_MERGED_BEFORE_ON_INSTANT", 1);
List dtos = sliceHandler.getLatestMergedFileSlicesBeforeOrOn(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.LATEST_SLICES_BEFORE_ON_INSTANT_URL, new ViewHandler(ctx -> {
metricsRegistry.add("LATEST_SLICES_BEFORE_ON_INSTANT", 1);
List dtos = sliceHandler.getLatestFileSlicesBeforeOrOn(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""),
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM, String.class).get(),
Boolean.parseBoolean(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM)
- .getOrThrow()));
+ ctx.queryParam(RemoteHoodieTableFileSystemView.INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM, String.class).get()));
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.PENDING_COMPACTION_OPS, new ViewHandler(ctx -> {
metricsRegistry.add("PEDING_COMPACTION_OPS", 1);
List dtos = sliceHandler.getPendingCompactionOperations(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
app.get(RemoteHoodieTableFileSystemView.ALL_FILEGROUPS_FOR_PARTITION_URL, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_FILEGROUPS_FOR_PARTITION", 1);
List dtos = sliceHandler.getAllFileGroups(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -387,14 +386,14 @@ private void registerFileSlicesAPI() {
app.post(RemoteHoodieTableFileSystemView.REFRESH_TABLE, new ViewHandler(ctx -> {
metricsRegistry.add("REFRESH_TABLE", 1);
boolean success = sliceHandler
- .refreshTable(ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow());
+ .refreshTable(ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, success);
}, false));
app.get(RemoteHoodieTableFileSystemView.ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON", 1);
List dtos = sliceHandler.getReplacedFileGroupsBeforeOrOn(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM,""),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
@@ -403,7 +402,7 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.ALL_REPLACED_FILEGROUPS_BEFORE, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_REPLACED_FILEGROUPS_BEFORE", 1);
List dtos = sliceHandler.getReplacedFileGroupsBefore(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.MAX_INSTANT_PARAM,""),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
@@ -412,7 +411,7 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.ALL_REPLACED_FILEGROUPS_PARTITION, new ViewHandler(ctx -> {
metricsRegistry.add("ALL_REPLACED_FILEGROUPS_PARTITION", 1);
List dtos = sliceHandler.getAllReplacedFileGroups(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow(),
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get(),
ctx.queryParam(RemoteHoodieTableFileSystemView.PARTITION_PARAM,""));
writeValueAsString(ctx, dtos);
}, true));
@@ -420,7 +419,7 @@ private void registerFileSlicesAPI() {
app.get(RemoteHoodieTableFileSystemView.PENDING_CLUSTERING_FILEGROUPS, new ViewHandler(ctx -> {
metricsRegistry.add("PENDING_CLUSTERING_FILEGROUPS", 1);
List dtos = sliceHandler.getFileGroupsInPendingClustering(
- ctx.validatedQueryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM).getOrThrow());
+ ctx.queryParam(RemoteHoodieTableFileSystemView.BASEPATH_PARAM, String.class).get());
writeValueAsString(ctx, dtos);
}, true));
}
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
index 40669f50e42d6..c8aca058b1ea2 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/TimelineService.java
@@ -18,6 +18,7 @@
package org.apache.hudi.timeline.service;
+import io.javalin.core.JettyUtil;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.SerializableConfiguration;
@@ -31,7 +32,6 @@
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import io.javalin.Javalin;
-import io.javalin.core.util.JettyServerUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
@@ -273,13 +273,13 @@ private int startServiceOnPort(int port) throws IOException {
}
public int startService() throws IOException {
- final Server server = timelineServerConf.numThreads == DEFAULT_NUM_THREADS ? JettyServerUtil.defaultServer()
+ final Server server = timelineServerConf.numThreads == DEFAULT_NUM_THREADS ? JettyUtil.getOrDefault(null)
: new Server(new QueuedThreadPool(timelineServerConf.numThreads));
- app = Javalin.create().server(() -> server);
- if (!timelineServerConf.compress) {
- app.disableDynamicGzip();
- }
+ app = Javalin.create(config -> {
+ config.server(() -> server);
+ config.dynamicGzip = timelineServerConf.compress;
+ });
requestHandler = new RequestHandler(
app, conf, timelineServerConf, context, fs, fsViewsManager);
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/MarkerHandler.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/MarkerHandler.java
index e793c20432f92..1251afe6cf60e 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/MarkerHandler.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/MarkerHandler.java
@@ -27,7 +27,7 @@
import org.apache.hudi.timeline.service.handlers.marker.MarkerCreationFuture;
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
-import io.javalin.Context;
+import io.javalin.http.Context;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.log4j.LogManager;
diff --git a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerCreationFuture.java b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerCreationFuture.java
index 5ff8baa90da1f..d965e56a01cb9 100644
--- a/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerCreationFuture.java
+++ b/hudi-timeline-service/src/main/java/org/apache/hudi/timeline/service/handlers/marker/MarkerCreationFuture.java
@@ -20,7 +20,7 @@
import org.apache.hudi.common.util.HoodieTimer;
-import io.javalin.Context;
+import io.javalin.http.Context;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 39510537ba2fe..c9c3979ee1a04 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -39,9 +39,10 @@
org.apache.maven.pluginsmaven-compiler-plugin
+ ${maven-compiler-plugin.version}
- 1.8
- 1.8
+ ${java.version}
+ ${java.version}
@@ -341,6 +342,12 @@
hadoop-hdfsteststest
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoop
@@ -360,6 +367,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -381,12 +392,26 @@
org.eclipse.jetty.orbitjavax.servlet
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+ ${hive.groupid}hive-service${hive.version}
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index 222478090b4b0..e61b1fcc0a572 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -70,6 +70,7 @@
META-INF/LICENSEtarget/classes/META-INF/LICENSE
+
@@ -137,7 +138,7 @@
org.apache.hive:hive-serviceorg.apache.hive:hive-service-rpcorg.apache.hive:hive-exec
- org.apache.hive:hive-standalone-metastore
+ org.apache.hive:hive-standalone-metastoreorg.apache.hive:hive-metastoreorg.apache.hive:hive-jdbcorg.datanucleus:datanucleus-core
@@ -147,10 +148,22 @@
org.apache.hbase:hbase-commonorg.apache.hbase:hbase-client
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-apiorg.apache.hbase:hbase-server
- org.apache.hbase:hbase-protocol
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-protocol-shaded
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4commons-codec:commons-codec
+ commons-io:commons-io
@@ -162,6 +175,22 @@
org.apache.avro.${flink.bundle.shade.prefix}org.apache.avro.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ com.yammer.metrics.${flink.bundle.shade.prefix}com.yammer.metrics.
@@ -191,6 +220,74 @@
com.fasterxml.jackson.${flink.bundle.shade.prefix}com.fasterxml.jackson.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
@@ -200,6 +297,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -267,11 +366,23 @@
org.apache.hudihudi-hadoop-mr${project.version}
+
+
+ guava
+ com.google.guava
+
+ org.apache.hudihudi-hive-sync${project.version}
+
+
+ guava
+ com.google.guava
+
+ org.apache.hudi
@@ -282,6 +393,10 @@
rocksdbjniorg.rocksdb
+
+ guava
+ com.google.guava
+
@@ -444,6 +559,10 @@
javax.servlet.jsp*
+
+ org.eclipse.jetty.aggregate
+ *
+ org.eclipse.jetty*
@@ -545,66 +664,6 @@
jackson-annotationscompile
-
-
-
- org.apache.hbase
- hbase-common
- ${hbase.version}
-
-
- guava
- com.google.guava
-
-
-
-
- org.apache.hbase
- hbase-server
- ${hbase.version}
- compile
-
-
- guava
- com.google.guava
-
-
- org.apache.hbase
- hbase-common
-
-
- javax.servlet
- *
-
-
- org.codehaus.jackson
- *
-
-
- org.mortbay.jetty
- *
-
-
- tomcat
- *
-
-
-
-
- org.apache.hbase
- hbase-client
- ${hbase.version}
-
-
- org.apache.hbase
- hbase-protocol
- ${hbase.version}
-
-
- org.apache.htrace
- htrace-core
- ${htrace.version}
-
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index f6215b1e017a5..8391a843b4c83 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -55,30 +55,44 @@
- true
+ trueMETA-INF/LICENSEtarget/classes/META-INF/LICENSE
+ org.apache.hudi:hudi-commonorg.apache.hudi:hudi-hadoop-mr
-
+
org.apache.parquet:parquet-avro
+ org.apache.parquet:parquet-hadoop-bundleorg.apache.avro:avrocom.esotericsoftware:kryo-shadedorg.objenesis:objenesiscom.esotericsoftware:minlogorg.apache.hbase:hbase-commonorg.apache.hbase:hbase-client
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-server
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4com.yammer.metrics:metrics-corecom.google.guava:guava
+ commons-io:commons-io
@@ -103,13 +117,101 @@
org.apache.hudi.org.apache.avro.
- org.apache.parquet.avro.
- org.apache.hudi.org.apache.parquet.avro.
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+
+
+ org.apache.parquet.
+ org.apache.hudi.org.apache.parquet.
+
+
+ shaded.parquet.
+ org.apache.hudi.shaded.parquet.com.google.common.org.apache.hudi.com.google.common.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+ false
@@ -120,6 +222,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -160,34 +264,24 @@
compile
-
-
- org.apache.avro
- avro
- ${avro.version}
- compile
-
-
- org.apache.htrace
- htrace-core
- ${htrace.version}
+ org.apache.parquet
+ parquet-hadoop-bundle
+ ${parquet.version}compile
-
-
- org.apache.hbase
- hbase-common
- ${hbase.version}
-
-
+
- org.apache.hbase
- hbase-server
- ${hbase.version}
+ org.apache.avro
+ avro
+ ${avro.version}compile
+
+ guava
+ com.google.guava
+ org.apache.hbasehbase-common
@@ -204,6 +298,10 @@
org.mortbay.jetty*
+
+ org.eclipse.jetty
+ *
+ tomcat*
diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml
index b53e02aaf7768..d6318478d4d9e 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -62,6 +62,7 @@
META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+
@@ -85,6 +86,20 @@
org.apache.hudi:hudi-awsorg.apache.hudi:hudi-integ-test
+ org.apache.hbase:hbase-common
+ org.apache.hbase:hbase-client
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shaded
+ org.apache.hbase:hbase-server
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4
+ commons-io:commons-io
+
org.jetbrains.kotlin:kotlin-stdlib-jdk8org.jetbrains.kotlin:kotlin-stdliborg.jetbrains.kotlin:kotlin-stdlib-common
@@ -133,7 +148,6 @@
org.apache.hive:hive-commonorg.apache.hive:hive-service
- org.apache.hive:hive-metastoreorg.apache.hive:hive-jdbcorg.apache.hive:hive-exec
@@ -156,7 +170,6 @@
com.fasterxml.jackson.core:jackson-databindcom.fasterxml.jackson.dataformat:jackson-dataformat-yaml
- org.apache.htrace:htrace-coreorg.apache.curator:curator-frameworkorg.apache.curator:curator-clientorg.apache.curator:curator-recipes
@@ -179,6 +192,22 @@
org.apache.commons.pool.org.apache.hudi.org.apache.commons.pool.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ org.apache.hive.jdbc.org.apache.hudi.org.apache.hive.jdbc.
@@ -259,6 +288,74 @@
org.apache.parquet.avro.org.apache.hudi.org.apache.parquet.avro.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
@@ -270,6 +367,8 @@
META-INF/NOTICE*META-INF/LICENSE*
+ **/*.proto
+ hbase-webapps/**
@@ -300,7 +399,7 @@
io.javalinjavalin
- 2.8.0
+ ${javalin.version}
@@ -365,6 +464,12 @@
hadoop-hdfsteststest
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoop
@@ -383,6 +488,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -398,6 +507,12 @@
hive-metastore${hive.version}provided
+
+
+ org.apache.hbase
+ *
+
+
@@ -418,6 +533,14 @@
org.pentaho*
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
@@ -434,6 +557,14 @@
javax.servletservlet-api
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
@@ -442,6 +573,12 @@
hive-common${hive.version}compile
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml
index f66bc7f051e48..daf8f2f00765f 100644
--- a/packaging/hudi-kafka-connect-bundle/pom.xml
+++ b/packaging/hudi-kafka-connect-bundle/pom.xml
@@ -58,14 +58,16 @@
implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+ implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
true
+ implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
META-INF/LICENSEtarget/classes/META-INF/LICENSE
+
@@ -115,13 +117,21 @@
org.objenesis:objenesiscom.esotericsoftware:kryo-shadedcom.esotericsoftware:minlog
-
+
org.apache.hbase:hbase-clientorg.apache.hbase:hbase-common
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-server
- org.apache.htrace:htrace-core
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4org.scala-lang:*
+ commons-io:commons-io
@@ -131,15 +141,104 @@
com.yammer.metrics.
- ${kafka.connect.bundle.shade.prefix}com.yammer.metrics.
+ ${kafka.connect.bundle.shade.prefix}com.yammer.metrics.
+ com.beust.jcommander.
- ${kafka.connect.bundle.shade.prefix}com.beust.jcommander.
+ ${kafka.connect.bundle.shade.prefix}com.beust.jcommander.
+ org.eclipse.jetty.
- ${kafka.connect.bundle.shade.prefix}org.eclipse.jetty.
+ ${kafka.connect.bundle.shade.prefix}org.eclipse.jetty.
+
+
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
@@ -150,6 +249,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -268,6 +369,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -291,6 +396,10 @@
org.slf4jslf4j-log4j12
+
+ org.eclipse.jetty
+ *
+
@@ -306,6 +415,16 @@
hive-jdbc${hive.version}${utilities.bundle.hive.scope}
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -320,13 +439,12 @@
hive-common${hive.version}${utilities.bundle.hive.scope}
-
-
-
- org.apache.htrace
- htrace-core
- ${htrace.version}
- compile
+
+
+ org.eclipse.jetty
+ *
+
+
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index 90c1087dcb4d2..b50c79fe33063 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -61,6 +61,7 @@
META-INF/LICENSEtarget/classes/META-INF/LICENSE
+
@@ -75,20 +76,48 @@
com.esotericsoftware:minlogorg.apache.hbase:hbase-commonorg.apache.hbase:hbase-client
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-apiorg.apache.hbase:hbase-protocol
- org.apache.hbase:hbase-server
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-protocol-shaded
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4com.yammer.metrics:metrics-corecom.google.guava:guava
+ commons-io:commons-iocommons-lang:commons-langcom.google.protobuf:protobuf-java
+
+ org.apache.parquet.avro.
+ org.apache.hudi.org.apache.parquet.avro.
+ org.apache.avro.org.apache.hudi.org.apache.avro.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ org.codehaus.jackson.org.apache.hudi.org.codehaus.jackson.
@@ -121,14 +150,78 @@
com.google.protobuf.${presto.bundle.bootstrap.shade.prefix}com.google.protobuf.
-
- org.apache.htrace.
- ${presto.bundle.bootstrap.shade.prefix}org.apache.htrace.
- org.apache.parquet.avro.${presto.bundle.bootstrap.shade.prefix}org.apache.parquet.avro.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+ false
@@ -139,7 +232,9 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.protocom/esotericsoftware/reflectasm/**
+ hbase-webapps/**stringBehavior.avsc
@@ -171,20 +266,6 @@
org.apache.hudihudi-hadoop-mr-bundle${project.version}
-
-
- org.apache.hbase
- hbase-common
-
-
- org.apache.hbase
- hbase-server
-
-
- org.apache.hbase
- hbase-client
-
-
@@ -201,42 +282,6 @@
compile
-
-
- org.apache.hbase
- hbase-common
- ${hbase.version}
-
-
-
- org.apache.hbase
- hbase-server
- ${hbase.version}
- compile
-
-
- org.apache.hbase
- hbase-common
-
-
- javax.servlet
- *
-
-
- org.codehaus.jackson
- *
-
-
- org.mortbay.jetty
- *
-
-
- tomcat
- *
-
-
-
-
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index a877d10a586a8..6fb0023e95126 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -63,6 +63,7 @@
META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+
@@ -116,13 +117,25 @@
org.apache.hbase:hbase-clientorg.apache.hbase:hbase-common
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-server
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4org.apache.curator:curator-frameworkorg.apache.curator:curator-clientorg.apache.curator:curator-recipescommons-codec:commons-codec
+ commons-io:commons-io
@@ -134,6 +147,22 @@
com.beust.jcommander.org.apache.hudi.com.beust.jcommander.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ org.apache.spark.sql.avro.${spark.bundle.spark.shade.prefix}org.apache.spark.sql.avro.
@@ -183,6 +212,74 @@
${spark.bundle.spark.shade.prefix}com.google.common.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
@@ -192,6 +289,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -284,6 +383,12 @@
hive-service${hive.version}${spark.bundle.hive.scope}
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -298,6 +403,16 @@
hive-jdbc${hive.version}${spark.bundle.hive.scope}
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -312,59 +427,13 @@
hive-common${hive.version}${spark.bundle.hive.scope}
-
-
-
- org.apache.htrace
- htrace-core
- ${htrace.version}
- compile
-
-
-
-
- org.apache.hbase
- hbase-common
- ${hbase.version}
-
-
- org.apache.hbase
- hbase-server
- ${hbase.version}
- compile
- org.apache.hbase
- hbase-common
-
-
- javax.servlet
- *
-
-
- org.codehaus.jackson
- *
-
-
- org.mortbay.jetty
- *
-
-
- tomcat
+ org.eclipse.jetty*
-
- org.apache.hbase
- hbase-client
- ${hbase.version}
-
-
- org.apache.hbase
- hbase-protocol
- ${hbase.version}
-
diff --git a/packaging/hudi-timeline-server-bundle/pom.xml b/packaging/hudi-timeline-server-bundle/pom.xml
index 618d3d2122315..11f80daf901b2 100644
--- a/packaging/hudi-timeline-server-bundle/pom.xml
+++ b/packaging/hudi-timeline-server-bundle/pom.xml
@@ -71,7 +71,7 @@
io.javalinjavalin
- 2.8.0
+ ${javalin.version}
@@ -102,6 +102,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -120,6 +124,10 @@
javax.servlet*
+
+ org.eclipse.jetty
+ *
+
@@ -155,6 +163,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -198,16 +208,110 @@
com.fasterxml.jackson.core:jackson-annotationscom.fasterxml.jackson.core:jackson-corecom.fasterxml.jackson.core:jackson-databind
- org.apache.htrace:htrace-coreorg.apache.hbase:hbase-commonorg.apache.hbase:hbase-client
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-server
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4com.esotericsoftware:kryo-shadedcom.esotericsoftware:minlog
+ commons-io:commons-iolog4j:log4j
+
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
+
diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml
index adf73f1bb0b83..68861cb6e8a0c 100644
--- a/packaging/hudi-trino-bundle/pom.xml
+++ b/packaging/hudi-trino-bundle/pom.xml
@@ -62,6 +62,7 @@
META-INF/LICENSEtarget/classes/META-INF/LICENSE
+
@@ -76,22 +77,49 @@
com.esotericsoftware:minlogorg.apache.hbase:hbase-commonorg.apache.hbase:hbase-client
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-serverorg.apache.hbase:hbase-annotations
- org.apache.htrace:htrace-core
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.htrace:htrace-core4com.yammer.metrics:metrics-corecom.google.guava:guavacommons-lang:commons-lang
+ commons-io:commons-iocom.google.protobuf:protobuf-java
-
+
+ org.apache.parquet.avro.
+ org.apache.hudi.org.apache.parquet.avro.
+ org.apache.avro.org.apache.hudi.org.apache.avro.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ org.codehaus.jackson.org.apache.hudi.org.codehaus.jackson.
@@ -124,6 +152,74 @@
com.google.protobuf.${trino.bundle.bootstrap.shade.prefix}com.google.protobuf.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+ false
@@ -134,6 +230,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -155,71 +253,10 @@
-
- org.apache.hudi
- hudi-common
- ${project.version}
-
-
- org.apache.hbase
- hbase-server
-
-
- org.apache.hbase
- hbase-client
-
-
- org.apache.hudihudi-hadoop-mr-bundle${project.version}
-
-
- org.apache.hbase
- hbase-server
-
-
- org.apache.hbase
- hbase-client
-
-
-
-
-
-
- org.apache.hbase
- hbase-common
- ${hbase.version}
-
-
-
- org.apache.hbase
- hbase-server
- ${hbase.version}
- compile
-
-
- org.apache.hbase
- hbase-common
-
-
- javax.servlet
- *
-
-
- org.codehaus.jackson
- *
-
-
- org.mortbay.jetty
- *
-
-
- tomcat
- *
-
-
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index 1ffca7634a1ff..eeddd8977adfc 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -86,6 +86,7 @@
META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+
@@ -150,13 +151,25 @@
org.apache.hbase:hbase-clientorg.apache.hbase:hbase-common
- org.apache.hbase:hbase-protocol
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-protocol-shadedorg.apache.hbase:hbase-server
- org.apache.htrace:htrace-core
+ org.apache.hbase:hbase-hadoop-compat
+ org.apache.hbase:hbase-hadoop2-compat
+ org.apache.hbase:hbase-metrics-api
+ org.apache.hbase:hbase-metrics
+ org.apache.hbase.thirdparty:hbase-shaded-miscellaneous
+ org.apache.hbase.thirdparty:hbase-shaded-netty
+ org.apache.hbase.thirdparty:hbase-shaded-protobuf
+ org.apache.htrace:htrace-core4org.apache.curator:curator-frameworkorg.apache.curator:curator-clientorg.apache.curator:curator-recipescommons-codec:commons-codec
+ commons-io:commons-io
@@ -172,6 +185,22 @@
org.apache.hive.jdbc.${utilities.bundle.hive.shade.prefix}org.apache.hive.jdbc.
+
+ org.apache.commons.io.
+ org.apache.hudi.org.apache.commons.io.
+
+
+ org.apache.hadoop.hbase.
+ org.apache.hudi.org.apache.hadoop.hbase.
+
+
+ org.apache.hbase.
+ org.apache.hudi.org.apache.hbase.
+
+
+ org.apache.htrace.
+ org.apache.hudi.org.apache.htrace.
+ org.apache.hadoop.hive.metastore.${utilities.bundle.hive.shade.prefix}org.apache.hadoop.hive.metastore.
@@ -208,6 +237,74 @@
org.eclipse.jetty.org.apache.hudi.org.eclipse.jetty.
+
+
+ org.apache.hadoop.metrics2.MetricHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricHistogram
+
+
+
+ org.apache.hadoop.metrics2.MetricsExecutor
+ org.apache.hudi.org.apache.hadoop.metrics2.MetricsExecutor
+
+
+
+ org.apache.hadoop.metrics2.impl.JmxCacheBuster
+ org.apache.hudi.org.apache.hadoop.metrics2.impl.JmxCacheBuster
+
+
+ org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DefaultMetricsSystemHelper
+
+
+
+ org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry
+
+
+
+ org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MetricsExecutorImpl
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableFastCounter
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableFastCounter
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableRangeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableSizeHistogram
+
+
+
+ org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+ org.apache.hudi.org.apache.hadoop.metrics2.lib.MutableTimeHistogram
+
+
+
+ org.apache.hadoop.metrics2.util.MetricQuantile
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricQuantile
+
+
+
+ org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+ org.apache.hudi.org.apache.hadoop.metrics2.util.MetricSampleQuantiles
+
+
@@ -217,6 +314,8 @@
META-INF/*.DSAMETA-INF/*.RSAMETA-INF/services/javax.*
+ **/*.proto
+ hbase-webapps/**
@@ -303,6 +402,12 @@
hive-service${hive.version}${utilities.bundle.hive.scope}
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -317,6 +422,16 @@
hive-jdbc${hive.version}${utilities.bundle.hive.scope}
+
+
+ org.eclipse.jetty.aggregate
+ *
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -331,6 +446,12 @@
hive-common${hive.version}${utilities.bundle.hive.scope}
+
+
+ org.eclipse.jetty
+ *
+
+
@@ -345,6 +466,12 @@
org.apache.hbasehbase-common${hbase.version}
+
+
+ guava
+ com.google.guava
+
+ org.apache.hbase
@@ -352,6 +479,10 @@
${hbase.version}compile
+
+ guava
+ com.google.guava
+ org.apache.hbasehbase-common
@@ -368,6 +499,10 @@
org.mortbay.jetty*
+
+ org.eclipse.jetty
+ *
+ tomcat*
@@ -381,9 +516,41 @@
org.apache.hbase
- hbase-protocol
+ hbase-hadoop-compat
+ ${hbase.version}
+
+
+ org.apache.hbase
+ hbase-hadoop2-compat
+ ${hbase.version}
+
+
+ org.apache.hbase
+ hbase-metrics-api
+ ${hbase.version}
+
+
+
+
+ org.apache.hbase
+ hbase-protocol-shaded${hbase.version}
+
+ org.apache.hbase.thirdparty
+ hbase-shaded-miscellaneous
+ ${hbase-thirdparty.version}
+
+
+ org.apache.hbase.thirdparty
+ hbase-shaded-netty
+ ${hbase-thirdparty.version}
+
+
+ org.apache.hbase.thirdparty
+ hbase-shaded-protobuf
+ ${hbase-thirdparty.version}
+
diff --git a/pom.xml b/pom.xml
index 33bd112a1bfe3..5f65d64951eca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -73,11 +73,12 @@
3.2.0
+ 3.3.03.0.0-M43.0.0-M43.2.43.1.1
- 3.8.0
+ 3.8.12.40.151.7
@@ -103,9 +104,9 @@
2.17.01.7.302.9.9
- 2.7.3
+ 3.1.0org.apache.hive
- 2.3.1
+ 3.1.2core4.1.11.6.0
@@ -125,13 +126,15 @@
${scala11.version}2.110.12
- 3.3.1
+ 4.5.43.0.1file://${project.basedir}/src/test/resources/log4j-surefire.properties0.12.0
- 9.4.15.v20190215
+ 9.4.43.v20210629
+ 3.13.123.1.0-incubating
- 1.2.3
+ 2.4.9
+ 3.5.11.9.131.4.1993.1.2
@@ -163,6 +166,7 @@
3.17.33.11.41.1.0
+ 3.5.78000http://localhost:${dynamodb-local.port}
@@ -359,17 +363,24 @@
maven-jar-plugin${maven-jar-plugin.version}
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ ${maven-dependency-plugin.version}
+ net.alchim31.mavenscala-maven-plugin${scala-maven-plugin.version}
+ allfalseorg.apache.maven.pluginsmaven-compiler-plugin
+ ${maven-compiler-plugin.version}
@@ -760,6 +771,10 @@
javax.xml.bindjaxb-api
+
+ org.eclipse.jetty
+ *
+
@@ -804,6 +819,12 @@
teststest${hadoop.version}
+
+
+ org.eclipse.jetty
+ *
+
+ org.apache.hadoop
@@ -819,6 +840,10 @@
javax.xml.bindjaxb-api
+
+ org.eclipse.jetty
+ *
+
@@ -841,6 +866,10 @@
org.pentaho*
+
+ org.eclipse.jetty
+ *
+ org.apache.logging.log4j*
@@ -893,6 +922,10 @@
org.eclipse.jetty.aggregate*
+
+ org.eclipse.jetty
+ *
+
@@ -945,6 +978,10 @@
org.eclipse.jetty.aggregate*
+
+ org.eclipse.jetty
+ *
+ org.apache.logging.log4j*
@@ -1441,9 +1478,19 @@
org.apache.maven.pluginsmaven-compiler-plugin
+ ${maven-compiler-plugin.version}
+
${java.version}${java.version}
+
+ -verbose
+ -Xlint:unchecked
+
+
+ -verbose
+ -Xlint:unchecked
+
@@ -1522,7 +1569,7 @@
https://docs.spring.io/spring-shell/docs/1.2.0.RELEASE
https://fasterxml.github.io/jackson-databind/javadoc/2.6
https://hadoop.apache.org/docs/r${hadoop.version}/api
- https://hbase.apache.org/1.2/apidocs
+ https://hbase.apache.org/2.4/apidocs
https://hive.apache.org/javadocs/r2.3.6/api
https://javadoc.io/static/io.javalin/javalin/2.3.0
https://javadoc.io/doc/org.apache.parquet/parquet-avro/${parquet.version}
@@ -1623,7 +1670,7 @@
${fasterxml.spark3.version}${fasterxml.spark3.version}true
- true
+ falsehudi-spark-datasource/hudi-spark3