Skip to content

Commit

Permalink
add start_hdfs_fuse.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
chaohengstudent committed Feb 5, 2023
1 parent 26f1f76 commit b4071eb
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ spark.dockerfile
deps.dockerfile
worker.dockerfile
etl.dockerfile
hdfs-fuse.dockerfile
# we don't put binary file to git repo
gradle-wrapper.jar
VersionUtils.java
152 changes: 152 additions & 0 deletions docker/start_hdfs_fuse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

declare -r DOCKER_FOLDER=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
source $DOCKER_FOLDER/docker_build_common.sh

# ===============================[global variables]===============================
declare -r VERSION=${VERSION:-3.3.4}
declare -r REPO=${REPO:-ghcr.io/skiptests/astraea/hdfs_fuse}
declare -r IMAGE_NAME="$REPO:$VERSION"
declare -r DOCKERFILE=$DOCKER_FOLDER/hdfs-fuse.dockerfile
declare -r CONTAINER_NAME="hdfs-fuse"

# ===================================[functions]===================================

function showHelp() {
echo "Usage: [ENV] start_hdfs_fuse.sh"
echo "ENV: "
echo " REPO=astraea/datanode set the docker repo"
echo " VERSION=3.3.4 set version of hadoop distribution"
echo " BUILD=false set true if you want to build image locally"
echo " RUN=false set false if you want to build/pull image only"
}

function generateDockerfile() {
echo "#this dockerfile is generated dynamically
FROM ubuntu:22.04 AS build
#install tools
RUN apt-get update && apt-get install -y wget
#download hadoop
WORKDIR /tmp
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}-src.tar.gz
RUN mkdir /opt/hadoop-src
RUN tar -zxvf hadoop-${VERSION}-src.tar.gz -C /opt/hadoop-src --strip-components=1
RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}.tar.gz
RUN mkdir /opt/hadoop
RUN tar -zxvf hadoop-${VERSION}.tar.gz -C /opt/hadoop --strip-components=1
FROM ubuntu:22.04 AS buildsrc
#install tools
RUN apt-get update \\
&& apt-get install -y openjdk-11-jdk \\
maven \\
build-essential \\
autoconf \\
automake \\
libtool \\
cmake \\
zlib1g-dev \\
pkg-config \\
libssl-dev \\
libsasl2-dev \\
g++ \\
curl \\
libfuse-dev
WORKDIR /tmp
RUN curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz \\
&& mkdir /opt/protobuf-3.7-src \\
&& tar -zxf protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src && cd /opt/protobuf-3.7-src \\
&& ./configure --prefix=/usr/ \\
&& make -j\$(nproc) \\
&& make install
WORKDIR /tmp
RUN curl -L https://sourceforge.net/projects/boost/files/boost/1.80.0/boost_1_80_0.tar.bz2/download > boost_1_80_0.tar.bz2 \\
&& tar --bzip2 -xf boost_1_80_0.tar.bz2 -C /opt && cd /opt/boost_1_80_0 \\
&& ./bootstrap.sh --prefix=/usr/ \\
&& ./b2 --without-python \\
&& ./b2 --without-python install
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
#copy hadoop
COPY --from=build /opt/hadoop-src /opt/hadoop
WORKDIR /opt/hadoop
RUN mvn clean package -pl hadoop-hdfs-project/hadoop-hdfs-native-client -Pnative -DskipTests -Drequire.fuse=true
FROM ubuntu:22.04
#install tools
RUN apt-get update && apt-get install -y openjdk-11-jre fuse
#copy hadoop
COPY --from=build /opt/hadoop /opt/hadoop
COPY --from=buildsrc /opt/hadoop /opt/hadoop
ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64
ENV HADOOP_HOME /opt/hadoop
RUN echo \"user_allow_other\" >> /etc/fuse.conf
WORKDIR /opt/hadoop/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs
RUN sed -i -e '18aexport CLASSPATH=\\\${HADOOP_HOME}/etc/hadoop:\`find \\\${HADOOP_HOME}/share/hadoop/ | awk '\"'\"'{path=path\":\"\\\$0}END{print path}'\"'\"'\`' \\
-i -e '18aexport LD_LIBRARY_PATH=\\\${HADOOP_HOME}/lib/native:\\\$LD_LIBRARY_PATH' \\
-i -e 's#export LIBHDFS_PATH=.*#export LIBHDFS_PATH=\\\${HADOOP_HOME}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/native/target/usr/local/lib#' \\
-i -e 's/find \"\\\$HADOOP_HOME\/hadoop-client\" -name \"\\*.jar\"/find \"\\\$HADOOP_HOME\/hadoop-client-modules\/hadoop-client\" -name \"\\*.jar\"/g' fuse_dfs_wrapper.sh
#add user
RUN groupadd astraea && useradd -ms /bin/bash -g astraea astraea
RUN mkdir /mnt/hdfs
#change user
RUN chown -R $USER:$USER /opt/hadoop /mnt/hdfs
USER $USER
" >"$DOCKERFILE"
}

# ===================================[main]===================================

checkDocker
buildImageIfNeed "$IMAGE_NAME"
if [[ "$RUN" != "true" ]]; then
echo "docker image: $IMAGE_NAME is created"
exit 0
fi

checkNetwork

if [[ $# -gt 0 ]]; then
HDFS=$1
HDFS_PORT="${HDFS: -5}"
fi

docker run -d --init \
--name $CONTAINER_NAME-$HDFS_PORT \
--device /dev/fuse \
--cap-add SYS_ADMIN \
--security-opt apparmor:unconfined \
"$IMAGE_NAME" /bin/bash -c "./fuse_dfs_wrapper.sh -d $HDFS /mnt/hdfs"

echo "================================================="
echo "run docker exec -it -w /mnt/hdfs $CONTAINER_NAME-$HDFS_PORT /bin/bash to access fuse"
echo "================================================="

0 comments on commit b4071eb

Please sign in to comment.