From b9a4de74b779ab800a82a400aed661e43220b34d Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 7 Dec 2021 13:15:06 -0800 Subject: [PATCH 1/3] [HUDI-2785] Add Trino setup in Docker Demo --- ...ker-compose_hadoop284_hive233_spark244.yml | 34 ++++- docker/hoodie/hadoop/base_java11/Dockerfile | 60 +++++++++ .../hoodie/hadoop/base_java11/entrypoint.sh | 107 ++++++++++++++++ .../hadoop/base_java11/export_container_ip.sh | 30 +++++ docker/hoodie/hadoop/base_java11/pom.xml | 96 +++++++++++++++ docker/hoodie/hadoop/pom.xml | 5 + docker/hoodie/hadoop/sparkadhoc/Dockerfile | 9 +- docker/hoodie/hadoop/sparkadhoc/adhoc.sh | 4 +- docker/hoodie/hadoop/sparkadhoc/pom.xml | 8 +- docker/hoodie/hadoop/trinobase/Dockerfile | 66 ++++++++++ docker/hoodie/hadoop/trinobase/pom.xml | 116 ++++++++++++++++++ .../hoodie/hadoop/trinobase/scripts/trino.sh | 21 ++++ .../hoodie/hadoop/trinocoordinator/Dockerfile | 29 +++++ .../etc/catalog/hive.properties | 35 ++++++ .../etc/catalog/jmx.properties | 19 +++ .../etc/catalog/memory.properties | 20 +++ .../etc/catalog/tpcds.properties | 19 +++ .../etc/catalog/tpch.properties | 19 +++ .../trinocoordinator/etc/config.properties | 26 ++++ .../hadoop/trinocoordinator/etc/jvm.config | 27 ++++ .../trinocoordinator/etc/log.properties | 19 +++ .../trinocoordinator/etc/node.properties | 21 ++++ docker/hoodie/hadoop/trinocoordinator/pom.xml | 96 +++++++++++++++ docker/hoodie/hadoop/trinoworker/Dockerfile | 29 +++++ .../trinoworker/etc/catalog/hive.properties | 35 ++++++ .../trinoworker/etc/catalog/jmx.properties | 19 +++ .../trinoworker/etc/catalog/memory.properties | 20 +++ .../trinoworker/etc/catalog/tpcds.properties | 19 +++ .../trinoworker/etc/catalog/tpch.properties | 19 +++ .../hadoop/trinoworker/etc/config.properties | 24 ++++ .../hoodie/hadoop/trinoworker/etc/jvm.config | 27 ++++ .../hadoop/trinoworker/etc/log.properties | 19 +++ .../hadoop/trinoworker/etc/node.properties | 21 ++++ docker/hoodie/hadoop/trinoworker/pom.xml | 96 +++++++++++++++ 34 files changed, 1208 insertions(+), 6 deletions(-) create mode 100644 docker/hoodie/hadoop/base_java11/Dockerfile create mode 100644 docker/hoodie/hadoop/base_java11/entrypoint.sh create mode 100755 docker/hoodie/hadoop/base_java11/export_container_ip.sh create mode 100644 docker/hoodie/hadoop/base_java11/pom.xml create mode 100644 docker/hoodie/hadoop/trinobase/Dockerfile create mode 100644 docker/hoodie/hadoop/trinobase/pom.xml create mode 100644 docker/hoodie/hadoop/trinobase/scripts/trino.sh create mode 100644 docker/hoodie/hadoop/trinocoordinator/Dockerfile create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/config.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/jvm.config create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/log.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/node.properties create mode 100644 docker/hoodie/hadoop/trinocoordinator/pom.xml create mode 100644 docker/hoodie/hadoop/trinoworker/Dockerfile create mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/config.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/jvm.config create mode 100644 docker/hoodie/hadoop/trinoworker/etc/log.properties create mode 100644 docker/hoodie/hadoop/trinoworker/etc/node.properties create mode 100644 docker/hoodie/hadoop/trinoworker/pom.xml diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml index 05790963b1884..7c74f8e34ebd7 100644 --- a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml +++ b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml @@ -221,6 +221,34 @@ services: - ${HUDI_WS}:/var/hoodie/ws command: worker + trino-coordinator-1: + container_name: trino-coordinator-1 + hostname: trino-coordinator-1 + image: yihua/hudi-hadoop_2.8.4-trinocoordinator_365:latest + ports: + - '8091:8091' + links: + - "hivemetastore" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + command: http://trino-coordinator-1:8091 trino-coordinator-1 + + trino-worker-1: + container_name: trino-worker-1 + hostname: trino-worker-1 + image: yihua/hudi-hadoop_2.8.4-trinoworker_365:latest + depends_on: [ "trino-coordinator-1" ] + ports: + - '8092:8092' + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + command: http://trino-coordinator-1:8091 trino-worker-1 + graphite: container_name: graphite hostname: graphite @@ -231,7 +259,7 @@ services: - 8126:8126 adhoc-1: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + image: yihua/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest hostname: adhoc-1 container_name: adhoc-1 env_file: @@ -248,11 +276,12 @@ services: - "hive-metastore-postgresql" - "namenode" - "presto-coordinator-1" + - "trino-coordinator-1" volumes: - ${HUDI_WS}:/var/hoodie/ws adhoc-2: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + image: yihua/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest hostname: adhoc-2 container_name: adhoc-2 env_file: @@ -267,6 +296,7 @@ services: - "hive-metastore-postgresql" - "namenode" - "presto-coordinator-1" + - "trino-coordinator-1" volumes: - ${HUDI_WS}:/var/hoodie/ws diff --git a/docker/hoodie/hadoop/base_java11/Dockerfile b/docker/hoodie/hadoop/base_java11/Dockerfile new file mode 100644 index 0000000000000..8052eae6add84 --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/Dockerfile @@ -0,0 +1,60 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM openjdk:11-jdk-slim-bullseye +MAINTAINER Hoodie +USER root + +# Default to UTF-8 file.encoding +ENV LANG C.UTF-8 + +ARG HADOOP_VERSION=2.8.4 +ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz +ENV HADOOP_VERSION ${HADOOP_VERSION} +ENV HADOOP_URL ${HADOOP_URL} + +RUN set -x \ + && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \ + && echo "Fetch URL2 is : ${HADOOP_URL}" \ + && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \ + && curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \ + && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \ + && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ + && rm /tmp/hadoop.tar.gz* \ + && ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \ + && cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \ + && mkdir /hadoop-data + +ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION +ENV HADOOP_CONF_DIR=/etc/hadoop +ENV MULTIHOMED_NETWORK=1 +ENV HADOOP_HOME=${HADOOP_PREFIX} +ENV HADOOP_INSTALL=${HADOOP_HOME} +ENV USER=root +ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH + +# Exposing a union of ports across hadoop versions +# Well known ports including ssh +EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042 + +ADD entrypoint.sh /entrypoint.sh +ADD export_container_ip.sh /usr/bin/ +RUN chmod a+x /usr/bin/export_container_ip.sh \ + && chmod a+x /entrypoint.sh + +ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] + diff --git a/docker/hoodie/hadoop/base_java11/entrypoint.sh b/docker/hoodie/hadoop/base_java11/entrypoint.sh new file mode 100644 index 0000000000000..7c26f29f66886 --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/entrypoint.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +####################################################################################### +## COPIED FROM ## +## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ## +# ## +####################################################################################### + +# Set some sensible defaults +export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020} + +function addProperty() { + local path=$1 + local name=$2 + local value=$3 + + local entry="$name${value}" + local escapedEntry=$(echo $entry | sed 's/\//\\\//g') + sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path +} + +function configure() { + local path=$1 + local module=$2 + local envPrefix=$3 + + local var + local value + + echo "Configuring $module" + for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do + name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'` + var="${envPrefix}_${c}" + value=${!var} + echo " - Setting $name=$value" + addProperty /etc/hadoop/$module-site.xml $name "$value" + done +} + +configure /etc/hadoop/core-site.xml core CORE_CONF +configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF +configure /etc/hadoop/yarn-site.xml yarn YARN_CONF +configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF +configure /etc/hadoop/kms-site.xml kms KMS_CONF + +if [ "$MULTIHOMED_NETWORK" = "1" ]; then + echo "Configuring for multihomed network" + + # HDFS + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true + addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true + + # YARN + addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0 + + # MAPRED + addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0 +fi + +if [ -n "$GANGLIA_HOST" ]; then + mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig + mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig + + for module in mapred jvm rpc ugi; do + echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31" + echo "$module.period=10" + echo "$module.servers=$GANGLIA_HOST:8649" + done > /etc/hadoop/hadoop-metrics.properties + + for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do + echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31" + echo "$module.sink.ganglia.period=10" + echo "$module.sink.ganglia.supportsparse=true" + echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both" + echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40" + echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649" + done > /etc/hadoop/hadoop-metrics2.properties +fi + +# Save Container IP in ENV variable +/usr/bin/export_container_ip.sh + +exec "$@" diff --git a/docker/hoodie/hadoop/base_java11/export_container_ip.sh b/docker/hoodie/hadoop/base_java11/export_container_ip.sh new file mode 100755 index 0000000000000..b427f92ccf7c3 --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/export_container_ip.sh @@ -0,0 +1,30 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +interfaces=( "en0" "eth0" ) + +ipAddr="" +for interface in "${interfaces[@]}" +do + ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head` + if [ -n "$ipAddr" ]; then + break + fi +done + +echo "Container IP is set to : $ipAddr" +export MY_CONTAINER_IP=$ipAddr diff --git a/docker/hoodie/hadoop/base_java11/pom.xml b/docker/hoodie/hadoop/base_java11/pom.xml new file mode 100644 index 0000000000000..5596d7e7249ba --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-base-java11-docker + + Base Docker Image with Hoodie + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + + org.apache.hudi + hudi-hadoop-docker + ${project.version} + pom + import + + + + + + + hudi + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + yihua/hudi-hadoop_${docker.hadoop.version}-base-java11 + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + yihua/hudi-hadoop_${docker.hadoop.version}-base-java11 + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml index db47f3924e4ba..795646a24e1c9 100644 --- a/docker/hoodie/hadoop/pom.xml +++ b/docker/hoodie/hadoop/pom.xml @@ -28,6 +28,7 @@ pom base + base_java11 namenode datanode historyserver @@ -37,6 +38,9 @@ sparkworker sparkadhoc prestobase + trinobase + trinocoordinator + trinoworker @@ -54,6 +58,7 @@ 2.3.3 2.8.4 0.217 + 365 1.4.13 true ${project.parent.basedir} diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile index a114cf0fbe92c..ff2f4444614e3 100644 --- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile +++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile @@ -21,15 +21,22 @@ ARG SPARK_VERSION=2.4.4 FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION} ARG PRESTO_VERSION=0.217 +ARG TRINO_VERSION=365 COPY adhoc.sh /opt/spark ENV SPARK_WORKER_WEBUI_PORT 8081 ENV SPARK_WORKER_LOG /spark/logs ENV SPARK_MASTER "spark://spark-master:7077" ENV PRESTO_VERSION ${PRESTO_VERSION} +ENV TRINO_VERSION ${TRINO_VERSION} +ENV BASE_URL=https://repo1.maven.org/maven2 RUN set -x \ ## presto-client - && wget -q -O /usr/local/bin/presto https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \ + && wget -q -O /usr/local/bin/presto ${BASE_URL}/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \ && chmod +x /usr/local/bin/presto +RUN set -x \ + ## trino-cli + && wget -q -O /usr/local/bin/trino ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar \ + && chmod +x /usr/local/bin/trino CMD ["/bin/bash", "/opt/spark/adhoc.sh"] diff --git a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh index b20e8cb5fe06a..fd2ef651765f1 100644 --- a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh +++ b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh @@ -22,10 +22,12 @@ export SPARK_HOME=/opt/spark -export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1" +export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1:8090" +export TRINO_CLI_CMD="/usr/local/bin/trino --server trino-coordinator-1:8091" date echo "SPARK HOME is : $SPARK_HOME" echo "PRESTO CLI CMD is : $PRESTO_CLI_CMD" +echo "TRINO CLI CMD is : $TRINO_CLI_CMD" tail -f /dev/null diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml index 20a9cab164c33..edf059d12b7e0 100644 --- a/docker/hoodie/hadoop/sparkadhoc/pom.xml +++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml @@ -64,7 +64,9 @@ ${docker.build.skip} false - apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + + yihua/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + true latest @@ -80,7 +82,9 @@ ${docker.build.skip} false - apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + + yihua/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + true ${project.version} diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile b/docker/hoodie/hadoop/trinobase/Dockerfile new file mode 100644 index 0000000000000..4e8e984223d47 --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/Dockerfile @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG HIVE_VERSION=2.3.3 +FROM yihua/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base + +ENV TRINO_VERSION=365 +ENV TRINO_HOME=/usr/local/trino +ENV BASE_URL=https://repo1.maven.org/maven2 + +RUN apt-get update +RUN apt-get install -y \ + curl \ + tar \ + sudo \ + rsync \ + python \ + wget \ + python3-pip \ + python-dev \ + build-essential \ + uuid-runtime \ + less + +ENV JAVA_HOME /usr/java/default +ENV PATH $PATH:$JAVA_HOME/bin + +WORKDIR /usr/local/bin +RUN wget -q ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar +RUN chmod +x trino-cli-${TRINO_VERSION}-executable.jar +RUN mv trino-cli-${TRINO_VERSION}-executable.jar trino-cli + +WORKDIR /usr/local +RUN wget -q ${BASE_URL}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz +RUN tar xvzf trino-server-${TRINO_VERSION}.tar.gz -C /usr/local/ +RUN ln -s /usr/local/trino-server-${TRINO_VERSION} $TRINO_HOME + +ENV TRINO_BASE_WS /var/hoodie/ws/docker/hoodie/hadoop/trinobase +RUN mkdir -p ${TRINO_BASE_WS}/target/ +ADD target/ ${TRINO_BASE_WS}/target/ +ENV HUDI_TRINO_BUNDLE ${TRINO_BASE_WS}/target/hudi-trino-bundle.jar +RUN cp ${HUDI_TRINO_BUNDLE} ${TRINO_HOME}/plugin/hive/ + +ADD scripts ${TRINO_HOME}/scripts +RUN chmod +x ${TRINO_HOME}/scripts/trino.sh + +RUN mkdir -p $TRINO_HOME/data +VOLUME ["$TRINO_HOME/data"] diff --git a/docker/hoodie/hadoop/trinobase/pom.xml b/docker/hoodie/hadoop/trinobase/pom.xml new file mode 100644 index 0000000000000..7c506e791bbc7 --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/pom.xml @@ -0,0 +1,116 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinobase-docker + Trino Base Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-base-java11-docker + ${project.version} + pom + import + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + package + + + + + + + run + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/trinobase/scripts/trino.sh b/docker/hoodie/hadoop/trinobase/scripts/trino.sh new file mode 100644 index 0000000000000..9aacd842c3dec --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/scripts/trino.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +/usr/local/trino/bin/launcher run diff --git a/docker/hoodie/hadoop/trinocoordinator/Dockerfile b/docker/hoodie/hadoop/trinocoordinator/Dockerfile new file mode 100644 index 0000000000000..dc3dacaee5396 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG TRINO_VERSION=365 +FROM yihua/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base + +ADD etc /usr/local/trino/etc +EXPOSE 8091 + +WORKDIR /usr/local/trino +ENTRYPOINT [ "./scripts/trino.sh" ] diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties new file mode 100644 index 0000000000000..3a70496a1f4bb --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=hive-hadoop2 +hive.metastore-cache-ttl=1s +hive.metastore-refresh-interval=1m +hive.metastore-timeout=20s +hive.metastore.uri=thrift://hivemetastore:9083 +hive.storage-format=PARQUET +hive.parquet.use-column-names=true +hive.max-split-size=128MB +hive.recursive-directories=true +hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml +hive.hdfs.authentication.type=NONE +hive.hdfs.impersonation.enabled=false +hive.bucket-execution=false +hive.table-statistics-enabled=true +hive.max-partitions-per-writers=3000 +hive.split-loader-concurrency=1 +hive.orc.bloom-filters.enabled=true diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties new file mode 100644 index 0000000000000..81174f6a855d2 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=jmx diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties new file mode 100644 index 0000000000000..4dd3233bd925c --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=memory +memory.max-data-per-node=1024MB diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties new file mode 100644 index 0000000000000..3e88ca95b1f6d --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=tpcds diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties new file mode 100644 index 0000000000000..a193e3ec6ff03 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=tpch diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/config.properties b/docker/hoodie/hadoop/trinocoordinator/etc/config.properties new file mode 100644 index 0000000000000..9876a0fe0f008 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/config.properties @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=8091 +query.max-memory=50GB +query.max-memory-per-node=1GB +query.max-total-memory-per-node=2GB +discovery-server.enabled=true +discovery.uri=http://trino-coordinator-1:8091 diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config b/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config new file mode 100644 index 0000000000000..fb17203ca211b --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +-server +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:OnOutOfMemoryError=kill -9 %p +-Djdk.attach.allowAttachSelf=true diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/log.properties b/docker/hoodie/hadoop/trinocoordinator/etc/log.properties new file mode 100644 index 0000000000000..23b063080b4fe --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/log.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +io.trinosql=INFO diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/node.properties b/docker/hoodie/hadoop/trinocoordinator/etc/node.properties new file mode 100644 index 0000000000000..d97d547485998 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/node.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +node.environment=development +node.id=3044b958-f077-4fce-87ed-ca8308f800b6 +node.data-dir=/usr/local/trino/data diff --git a/docker/hoodie/hadoop/trinocoordinator/pom.xml b/docker/hoodie/hadoop/trinocoordinator/pom.xml new file mode 100644 index 0000000000000..7727d91c5b633 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinocoordinator-docker + Trino Coordinator Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-trinobase-docker + ${project.version} + pom + + + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/trinoworker/Dockerfile b/docker/hoodie/hadoop/trinoworker/Dockerfile new file mode 100644 index 0000000000000..06eb90e40b653 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG TRINO_VERSION=365 +FROM yihua/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base + +ADD etc /usr/local/trino/etc +EXPOSE 8092 + +WORKDIR /usr/local/trino +ENTRYPOINT [ "./scripts/trino.sh" ] diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties new file mode 100644 index 0000000000000..3a70496a1f4bb --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=hive-hadoop2 +hive.metastore-cache-ttl=1s +hive.metastore-refresh-interval=1m +hive.metastore-timeout=20s +hive.metastore.uri=thrift://hivemetastore:9083 +hive.storage-format=PARQUET +hive.parquet.use-column-names=true +hive.max-split-size=128MB +hive.recursive-directories=true +hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml +hive.hdfs.authentication.type=NONE +hive.hdfs.impersonation.enabled=false +hive.bucket-execution=false +hive.table-statistics-enabled=true +hive.max-partitions-per-writers=3000 +hive.split-loader-concurrency=1 +hive.orc.bloom-filters.enabled=true diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties new file mode 100644 index 0000000000000..81174f6a855d2 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=jmx diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties new file mode 100644 index 0000000000000..4dd3233bd925c --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=memory +memory.max-data-per-node=1024MB diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties new file mode 100644 index 0000000000000..3e88ca95b1f6d --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=tpcds diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties new file mode 100644 index 0000000000000..a193e3ec6ff03 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=tpch diff --git a/docker/hoodie/hadoop/trinoworker/etc/config.properties b/docker/hoodie/hadoop/trinoworker/etc/config.properties new file mode 100644 index 0000000000000..0e15d3d7c1e9c --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/config.properties @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +coordinator=false +http-server.http.port=8091 +query.max-memory=50GB +query.max-memory-per-node=1GB +query.max-total-memory-per-node=2GB +discovery.uri=http://trino-coordinator-1:8091 diff --git a/docker/hoodie/hadoop/trinoworker/etc/jvm.config b/docker/hoodie/hadoop/trinoworker/etc/jvm.config new file mode 100644 index 0000000000000..fb17203ca211b --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/jvm.config @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +-server +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:OnOutOfMemoryError=kill -9 %p +-Djdk.attach.allowAttachSelf=true diff --git a/docker/hoodie/hadoop/trinoworker/etc/log.properties b/docker/hoodie/hadoop/trinoworker/etc/log.properties new file mode 100644 index 0000000000000..23b063080b4fe --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/log.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +io.trinosql=INFO diff --git a/docker/hoodie/hadoop/trinoworker/etc/node.properties b/docker/hoodie/hadoop/trinoworker/etc/node.properties new file mode 100644 index 0000000000000..6cfebf995602e --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/node.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +node.environment=development +node.id=6606f0b3-6ae7-4152-a4b1-ddadb6345fe6 +node.data-dir=/var/trino/data diff --git a/docker/hoodie/hadoop/trinoworker/pom.xml b/docker/hoodie/hadoop/trinoworker/pom.xml new file mode 100644 index 0000000000000..bc008151beb19 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinoworker-docker + Trino Worker Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-trinobase-docker + ${project.version} + pom + + + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + yihua/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + + true + ${project.version} + + + + + + + From 0c9dfe03889971ea6f0bf095ef582a8a6749132b Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 11 Jan 2022 19:35:39 -0800 Subject: [PATCH 2/3] Update docker account and remove unnecessary configs --- ...ker-compose_hadoop284_hive233_spark244.yml | 8 ++++---- docker/hoodie/hadoop/base_java11/pom.xml | 4 ++-- docker/hoodie/hadoop/pom.xml | 2 +- docker/hoodie/hadoop/sparkadhoc/Dockerfile | 2 +- docker/hoodie/hadoop/sparkadhoc/pom.xml | 4 ++-- docker/hoodie/hadoop/trinobase/Dockerfile | 4 ++-- docker/hoodie/hadoop/trinobase/pom.xml | 4 ++-- .../hoodie/hadoop/trinocoordinator/Dockerfile | 4 ++-- .../etc/catalog/hive.properties | 15 +------------- .../etc/catalog/jmx.properties | 19 ------------------ .../etc/catalog/memory.properties | 20 ------------------- .../etc/catalog/tpcds.properties | 19 ------------------ .../etc/catalog/tpch.properties | 19 ------------------ docker/hoodie/hadoop/trinocoordinator/pom.xml | 4 ++-- docker/hoodie/hadoop/trinoworker/Dockerfile | 4 ++-- .../trinoworker/etc/catalog/hive.properties | 15 +------------- .../trinoworker/etc/catalog/jmx.properties | 19 ------------------ .../trinoworker/etc/catalog/memory.properties | 20 ------------------- .../trinoworker/etc/catalog/tpcds.properties | 19 ------------------ .../trinoworker/etc/catalog/tpch.properties | 19 ------------------ docker/hoodie/hadoop/trinoworker/pom.xml | 4 ++-- 21 files changed, 24 insertions(+), 204 deletions(-) delete mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties delete mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties delete mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties delete mode 100644 docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties delete mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties delete mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties delete mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties delete mode 100644 docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml index 7c74f8e34ebd7..933f9eb27eef8 100644 --- a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml +++ b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml @@ -224,7 +224,7 @@ services: trino-coordinator-1: container_name: trino-coordinator-1 hostname: trino-coordinator-1 - image: yihua/hudi-hadoop_2.8.4-trinocoordinator_365:latest + image: apachehudi/hudi-hadoop_2.8.4-trinocoordinator_368:latest ports: - '8091:8091' links: @@ -236,7 +236,7 @@ services: trino-worker-1: container_name: trino-worker-1 hostname: trino-worker-1 - image: yihua/hudi-hadoop_2.8.4-trinoworker_365:latest + image: apachehudi/hudi-hadoop_2.8.4-trinoworker_368:latest depends_on: [ "trino-coordinator-1" ] ports: - '8092:8092' @@ -259,7 +259,7 @@ services: - 8126:8126 adhoc-1: - image: yihua/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest hostname: adhoc-1 container_name: adhoc-1 env_file: @@ -281,7 +281,7 @@ services: - ${HUDI_WS}:/var/hoodie/ws adhoc-2: - image: yihua/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest hostname: adhoc-2 container_name: adhoc-2 env_file: diff --git a/docker/hoodie/hadoop/base_java11/pom.xml b/docker/hoodie/hadoop/base_java11/pom.xml index 5596d7e7249ba..b5d5a3ec16b94 100644 --- a/docker/hoodie/hadoop/base_java11/pom.xml +++ b/docker/hoodie/hadoop/base_java11/pom.xml @@ -68,7 +68,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-base-java11 + apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11 true latest @@ -84,7 +84,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-base-java11 + apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11 true ${project.version} diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml index 795646a24e1c9..2d0d75283dcd1 100644 --- a/docker/hoodie/hadoop/pom.xml +++ b/docker/hoodie/hadoop/pom.xml @@ -58,7 +58,7 @@ 2.3.3 2.8.4 0.217 - 365 + 368 1.4.13 true ${project.parent.basedir} diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile index ff2f4444614e3..1f9eab26187ef 100644 --- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile +++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile @@ -21,7 +21,7 @@ ARG SPARK_VERSION=2.4.4 FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION} ARG PRESTO_VERSION=0.217 -ARG TRINO_VERSION=365 +ARG TRINO_VERSION=368 COPY adhoc.sh /opt/spark ENV SPARK_WORKER_WEBUI_PORT 8081 diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml index edf059d12b7e0..e554313ace23e 100644 --- a/docker/hoodie/hadoop/sparkadhoc/pom.xml +++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml @@ -65,7 +65,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} true latest @@ -83,7 +83,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} true ${project.version} diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile b/docker/hoodie/hadoop/trinobase/Dockerfile index 4e8e984223d47..9d7c23010fbb8 100644 --- a/docker/hoodie/hadoop/trinobase/Dockerfile +++ b/docker/hoodie/hadoop/trinobase/Dockerfile @@ -20,9 +20,9 @@ ARG HADOOP_VERSION=2.8.4 ARG HIVE_VERSION=2.3.3 -FROM yihua/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base -ENV TRINO_VERSION=365 +ENV TRINO_VERSION=368 ENV TRINO_HOME=/usr/local/trino ENV BASE_URL=https://repo1.maven.org/maven2 diff --git a/docker/hoodie/hadoop/trinobase/pom.xml b/docker/hoodie/hadoop/trinobase/pom.xml index 7c506e791bbc7..5a290556e495b 100644 --- a/docker/hoodie/hadoop/trinobase/pom.xml +++ b/docker/hoodie/hadoop/trinobase/pom.xml @@ -85,7 +85,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} true latest @@ -103,7 +103,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} true ${project.version} diff --git a/docker/hoodie/hadoop/trinocoordinator/Dockerfile b/docker/hoodie/hadoop/trinocoordinator/Dockerfile index dc3dacaee5396..67a31448d7a65 100644 --- a/docker/hoodie/hadoop/trinocoordinator/Dockerfile +++ b/docker/hoodie/hadoop/trinocoordinator/Dockerfile @@ -19,8 +19,8 @@ # Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster ARG HADOOP_VERSION=2.8.4 -ARG TRINO_VERSION=365 -FROM yihua/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base +ARG TRINO_VERSION=368 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base ADD etc /usr/local/trino/etc EXPOSE 8091 diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties index 3a70496a1f4bb..ed7fce1b3e640 100644 --- a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties @@ -16,20 +16,7 @@ # specific language governing permissions and limitations # under the License. # -connector.name=hive-hadoop2 -hive.metastore-cache-ttl=1s -hive.metastore-refresh-interval=1m -hive.metastore-timeout=20s +connector.name=hive hive.metastore.uri=thrift://hivemetastore:9083 -hive.storage-format=PARQUET -hive.parquet.use-column-names=true -hive.max-split-size=128MB -hive.recursive-directories=true hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml hive.hdfs.authentication.type=NONE -hive.hdfs.impersonation.enabled=false -hive.bucket-execution=false -hive.table-statistics-enabled=true -hive.max-partitions-per-writers=3000 -hive.split-loader-concurrency=1 -hive.orc.bloom-filters.enabled=true diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties deleted file mode 100644 index 81174f6a855d2..0000000000000 --- a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/jmx.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=jmx diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties deleted file mode 100644 index 4dd3233bd925c..0000000000000 --- a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/memory.properties +++ /dev/null @@ -1,20 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=memory -memory.max-data-per-node=1024MB diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties deleted file mode 100644 index 3e88ca95b1f6d..0000000000000 --- a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpcds.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=tpcds diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties deleted file mode 100644 index a193e3ec6ff03..0000000000000 --- a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/tpch.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=tpch diff --git a/docker/hoodie/hadoop/trinocoordinator/pom.xml b/docker/hoodie/hadoop/trinocoordinator/pom.xml index 7727d91c5b633..564a2083e7dd9 100644 --- a/docker/hoodie/hadoop/trinocoordinator/pom.xml +++ b/docker/hoodie/hadoop/trinocoordinator/pom.xml @@ -65,7 +65,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} true latest @@ -83,7 +83,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} true ${project.version} diff --git a/docker/hoodie/hadoop/trinoworker/Dockerfile b/docker/hoodie/hadoop/trinoworker/Dockerfile index 06eb90e40b653..ae5b2766dc9d9 100644 --- a/docker/hoodie/hadoop/trinoworker/Dockerfile +++ b/docker/hoodie/hadoop/trinoworker/Dockerfile @@ -19,8 +19,8 @@ # Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster ARG HADOOP_VERSION=2.8.4 -ARG TRINO_VERSION=365 -FROM yihua/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base +ARG TRINO_VERSION=368 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base ADD etc /usr/local/trino/etc EXPOSE 8092 diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties index 3a70496a1f4bb..ed7fce1b3e640 100644 --- a/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties @@ -16,20 +16,7 @@ # specific language governing permissions and limitations # under the License. # -connector.name=hive-hadoop2 -hive.metastore-cache-ttl=1s -hive.metastore-refresh-interval=1m -hive.metastore-timeout=20s +connector.name=hive hive.metastore.uri=thrift://hivemetastore:9083 -hive.storage-format=PARQUET -hive.parquet.use-column-names=true -hive.max-split-size=128MB -hive.recursive-directories=true hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml hive.hdfs.authentication.type=NONE -hive.hdfs.impersonation.enabled=false -hive.bucket-execution=false -hive.table-statistics-enabled=true -hive.max-partitions-per-writers=3000 -hive.split-loader-concurrency=1 -hive.orc.bloom-filters.enabled=true diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties deleted file mode 100644 index 81174f6a855d2..0000000000000 --- a/docker/hoodie/hadoop/trinoworker/etc/catalog/jmx.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=jmx diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties deleted file mode 100644 index 4dd3233bd925c..0000000000000 --- a/docker/hoodie/hadoop/trinoworker/etc/catalog/memory.properties +++ /dev/null @@ -1,20 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=memory -memory.max-data-per-node=1024MB diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties deleted file mode 100644 index 3e88ca95b1f6d..0000000000000 --- a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpcds.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=tpcds diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties deleted file mode 100644 index a193e3ec6ff03..0000000000000 --- a/docker/hoodie/hadoop/trinoworker/etc/catalog/tpch.properties +++ /dev/null @@ -1,19 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -connector.name=tpch diff --git a/docker/hoodie/hadoop/trinoworker/pom.xml b/docker/hoodie/hadoop/trinoworker/pom.xml index bc008151beb19..54f56b9e11309 100644 --- a/docker/hoodie/hadoop/trinoworker/pom.xml +++ b/docker/hoodie/hadoop/trinoworker/pom.xml @@ -65,7 +65,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} true latest @@ -83,7 +83,7 @@ ${docker.build.skip} false - yihua/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} true ${project.version} From b8ff46627b9bded68f3d3caf3acdea26b991f5ad Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 11 Jan 2022 22:09:43 -0800 Subject: [PATCH 3/3] Adjust sparkadhoc Dockerfile --- docker/hoodie/hadoop/sparkadhoc/Dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile index 1f9eab26187ef..19af7351b73d0 100644 --- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile +++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile @@ -31,6 +31,20 @@ ENV PRESTO_VERSION ${PRESTO_VERSION} ENV TRINO_VERSION ${TRINO_VERSION} ENV BASE_URL=https://repo1.maven.org/maven2 +RUN apt-get update +RUN apt-get install -y \ + curl \ + tar \ + sudo \ + rsync \ + python \ + wget \ + python3-pip \ + python-dev \ + build-essential \ + uuid-runtime \ + less + RUN set -x \ ## presto-client && wget -q -O /usr/local/bin/presto ${BASE_URL}/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \