Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docker/compose/docker-compose_hadoop284_hive233_spark244.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,34 @@ services:
- ${HUDI_WS}:/var/hoodie/ws
command: worker

trino-coordinator-1:
container_name: trino-coordinator-1
hostname: trino-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-trinocoordinator_368:latest
ports:
- '8091:8091'
links:
- "hivemetastore"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: http://trino-coordinator-1:8091 trino-coordinator-1

trino-worker-1:
container_name: trino-worker-1
hostname: trino-worker-1
image: apachehudi/hudi-hadoop_2.8.4-trinoworker_368:latest
depends_on: [ "trino-coordinator-1" ]
ports:
- '8092:8092'
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: http://trino-coordinator-1:8091 trino-worker-1

graphite:
container_name: graphite
hostname: graphite
Expand Down Expand Up @@ -248,6 +276,7 @@ services:
- "hive-metastore-postgresql"
- "namenode"
- "presto-coordinator-1"
- "trino-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

Expand All @@ -267,6 +296,7 @@ services:
- "hive-metastore-postgresql"
- "namenode"
- "presto-coordinator-1"
- "trino-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

Expand Down
60 changes: 60 additions & 0 deletions docker/hoodie/hadoop/base_java11/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM openjdk:11-jdk-slim-bullseye
MAINTAINER Hoodie
USER root

# Default to UTF-8 file.encoding
ENV LANG C.UTF-8

ARG HADOOP_VERSION=2.8.4
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}

RUN set -x \
&& DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \
&& echo "Fetch URL2 is : ${HADOOP_URL}" \
&& curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
&& curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \
&& mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* \
&& ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
&& cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
&& mkdir /hadoop-data

ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV HADOOP_HOME=${HADOOP_PREFIX}
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV USER=root
ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH

# Exposing a union of ports across hadoop versions
# Well known ports including ssh
EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042

ADD entrypoint.sh /entrypoint.sh
ADD export_container_ip.sh /usr/bin/
RUN chmod a+x /usr/bin/export_container_ip.sh \
&& chmod a+x /entrypoint.sh

ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

107 changes: 107 additions & 0 deletions docker/hoodie/hadoop/base_java11/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#######################################################################################
## COPIED FROM ##
## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ##
# ##
#######################################################################################

# Set some sensible defaults
export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}

function addProperty() {
local path=$1
local name=$2
local value=$3

local entry="<property><name>$name</name><value>${value}</value></property>"
local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
}

function configure() {
local path=$1
local module=$2
local envPrefix=$3

local var
local value

echo "Configuring $module"
for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do
name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
var="${envPrefix}_${c}"
value=${!var}
echo " - Setting $name=$value"
addProperty /etc/hadoop/$module-site.xml $name "$value"
done
}

configure /etc/hadoop/core-site.xml core CORE_CONF
configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF

if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"

# HDFS
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true

# YARN
addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0

# MAPRED
addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
fi

if [ -n "$GANGLIA_HOST" ]; then
mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig

for module in mapred jvm rpc ugi; do
echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
echo "$module.period=10"
echo "$module.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics.properties

for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
echo "$module.sink.ganglia.period=10"
echo "$module.sink.ganglia.supportsparse=true"
echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics2.properties
fi

# Save Container IP in ENV variable
/usr/bin/export_container_ip.sh

exec "$@"
30 changes: 30 additions & 0 deletions docker/hoodie/hadoop/base_java11/export_container_ip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

interfaces=( "en0" "eth0" )

ipAddr=""
for interface in "${interfaces[@]}"
do
ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
if [ -n "$ipAddr" ]; then
break
fi
done

echo "Container IP is set to : $ipAddr"
export MY_CONTAINER_IP=$ipAddr
96 changes: 96 additions & 0 deletions docker/hoodie/hadoop/base_java11/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.11.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hudi-hadoop-base-java11-docker</artifactId>

<description>Base Docker Image with Hoodie</description>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
</properties>


<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>


<build>
<finalName>hudi</finalName>
<plugins>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
5 changes: 5 additions & 0 deletions docker/hoodie/hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<packaging>pom</packaging>
<modules>
<module>base</module>
<module>base_java11</module>
<module>namenode</module>
<module>datanode</module>
<module>historyserver</module>
Expand All @@ -37,6 +38,9 @@
<module>sparkworker</module>
<module>sparkadhoc</module>
<module>prestobase</module>
<module>trinobase</module>
<module>trinocoordinator</module>
<module>trinoworker</module>
</modules>

<dependencies>
Expand All @@ -54,6 +58,7 @@
<docker.hive.version>2.3.3</docker.hive.version>
<docker.hadoop.version>2.8.4</docker.hadoop.version>
<docker.presto.version>0.217</docker.presto.version>
<docker.trino.version>368</docker.trino.version>
<dockerfile.maven.version>1.4.13</dockerfile.maven.version>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.basedir}</main.basedir>
Expand Down
Loading