From b4071eb9e17a344bdc8d2630acaf2910b8bad62e Mon Sep 17 00:00:00 2001 From: Chao-Heng Lee Date: Sun, 5 Feb 2023 23:39:39 +0800 Subject: [PATCH 1/3] add start_hdfs_fuse.sh --- .gitignore | 1 + docker/start_hdfs_fuse.sh | 152 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100755 docker/start_hdfs_fuse.sh diff --git a/.gitignore b/.gitignore index 3936902b34..9655493b80 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ spark.dockerfile deps.dockerfile worker.dockerfile etl.dockerfile +hdfs-fuse.dockerfile # we don't put binary file to git repo gradle-wrapper.jar VersionUtils.java \ No newline at end of file diff --git a/docker/start_hdfs_fuse.sh b/docker/start_hdfs_fuse.sh new file mode 100755 index 0000000000..d81d18334b --- /dev/null +++ b/docker/start_hdfs_fuse.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +declare -r DOCKER_FOLDER=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +source $DOCKER_FOLDER/docker_build_common.sh + +# ===============================[global variables]=============================== +declare -r VERSION=${VERSION:-3.3.4} +declare -r REPO=${REPO:-ghcr.io/skiptests/astraea/hdfs_fuse} +declare -r IMAGE_NAME="$REPO:$VERSION" +declare -r DOCKERFILE=$DOCKER_FOLDER/hdfs-fuse.dockerfile +declare -r CONTAINER_NAME="hdfs-fuse" + +# ===================================[functions]=================================== + +function showHelp() { + echo "Usage: [ENV] start_hdfs_fuse.sh" + echo "ENV: " + echo " REPO=astraea/datanode set the docker repo" + echo " VERSION=3.3.4 set version of hadoop distribution" + echo " BUILD=false set true if you want to build image locally" + echo " RUN=false set false if you want to build/pull image only" +} + +function generateDockerfile() { + echo "#this dockerfile is generated dynamically +FROM ubuntu:22.04 AS build + +#install tools +RUN apt-get update && apt-get install -y wget + +#download hadoop +WORKDIR /tmp +RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}-src.tar.gz +RUN mkdir /opt/hadoop-src +RUN tar -zxvf hadoop-${VERSION}-src.tar.gz -C /opt/hadoop-src --strip-components=1 +RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}.tar.gz +RUN mkdir /opt/hadoop +RUN tar -zxvf hadoop-${VERSION}.tar.gz -C /opt/hadoop --strip-components=1 + +FROM ubuntu:22.04 AS buildsrc + +#install tools +RUN apt-get update \\ + && apt-get install -y openjdk-11-jdk \\ + maven \\ + build-essential \\ + autoconf \\ + automake \\ + libtool \\ + cmake \\ + zlib1g-dev \\ + pkg-config \\ + libssl-dev \\ + libsasl2-dev \\ + g++ \\ + curl \\ + libfuse-dev + +WORKDIR /tmp +RUN curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz \\ + && mkdir /opt/protobuf-3.7-src \\ + && tar -zxf protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src && cd /opt/protobuf-3.7-src \\ + && ./configure --prefix=/usr/ \\ + && make -j\$(nproc) \\ + && make install + +WORKDIR /tmp +RUN curl -L https://sourceforge.net/projects/boost/files/boost/1.80.0/boost_1_80_0.tar.bz2/download > boost_1_80_0.tar.bz2 \\ + && tar --bzip2 -xf boost_1_80_0.tar.bz2 -C /opt && cd /opt/boost_1_80_0 \\ + && ./bootstrap.sh --prefix=/usr/ \\ + && ./b2 --without-python \\ + && ./b2 --without-python install + +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 + +#copy hadoop +COPY --from=build /opt/hadoop-src /opt/hadoop +WORKDIR /opt/hadoop +RUN mvn clean package -pl hadoop-hdfs-project/hadoop-hdfs-native-client -Pnative -DskipTests -Drequire.fuse=true + +FROM ubuntu:22.04 + +#install tools +RUN apt-get update && apt-get install -y openjdk-11-jre fuse + +#copy hadoop +COPY --from=build /opt/hadoop /opt/hadoop +COPY --from=buildsrc /opt/hadoop /opt/hadoop + +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 +ENV HADOOP_HOME /opt/hadoop + +RUN echo \"user_allow_other\" >> /etc/fuse.conf + +WORKDIR /opt/hadoop/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs +RUN sed -i -e '18aexport CLASSPATH=\\\${HADOOP_HOME}/etc/hadoop:\`find \\\${HADOOP_HOME}/share/hadoop/ | awk '\"'\"'{path=path\":\"\\\$0}END{print path}'\"'\"'\`' \\ + -i -e '18aexport LD_LIBRARY_PATH=\\\${HADOOP_HOME}/lib/native:\\\$LD_LIBRARY_PATH' \\ + -i -e 's#export LIBHDFS_PATH=.*#export LIBHDFS_PATH=\\\${HADOOP_HOME}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/native/target/usr/local/lib#' \\ + -i -e 's/find \"\\\$HADOOP_HOME\/hadoop-client\" -name \"\\*.jar\"/find \"\\\$HADOOP_HOME\/hadoop-client-modules\/hadoop-client\" -name \"\\*.jar\"/g' fuse_dfs_wrapper.sh + +#add user +RUN groupadd astraea && useradd -ms /bin/bash -g astraea astraea + +RUN mkdir /mnt/hdfs + +#change user +RUN chown -R $USER:$USER /opt/hadoop /mnt/hdfs +USER $USER + +" >"$DOCKERFILE" +} + +# ===================================[main]=================================== + +checkDocker +buildImageIfNeed "$IMAGE_NAME" +if [[ "$RUN" != "true" ]]; then + echo "docker image: $IMAGE_NAME is created" + exit 0 +fi + +checkNetwork + +if [[ $# -gt 0 ]]; then + HDFS=$1 + HDFS_PORT="${HDFS: -5}" +fi + +docker run -d --init \ + --name $CONTAINER_NAME-$HDFS_PORT \ + --device /dev/fuse \ + --cap-add SYS_ADMIN \ + --security-opt apparmor:unconfined \ + "$IMAGE_NAME" /bin/bash -c "./fuse_dfs_wrapper.sh -d $HDFS /mnt/hdfs" + +echo "=================================================" +echo "run docker exec -it -w /mnt/hdfs $CONTAINER_NAME-$HDFS_PORT /bin/bash to access fuse" +echo "=================================================" \ No newline at end of file From ba1edac6ff6c4fc603acc4bb93196e3445252f8b Mon Sep 17 00:00:00 2001 From: Chao-Heng Lee Date: Tue, 7 Feb 2023 22:07:28 +0800 Subject: [PATCH 2/3] exec into container directly. --- docker/start_hdfs_fuse.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker/start_hdfs_fuse.sh b/docker/start_hdfs_fuse.sh index d81d18334b..8dfd0f12dc 100755 --- a/docker/start_hdfs_fuse.sh +++ b/docker/start_hdfs_fuse.sh @@ -147,6 +147,4 @@ docker run -d --init \ --security-opt apparmor:unconfined \ "$IMAGE_NAME" /bin/bash -c "./fuse_dfs_wrapper.sh -d $HDFS /mnt/hdfs" -echo "=================================================" -echo "run docker exec -it -w /mnt/hdfs $CONTAINER_NAME-$HDFS_PORT /bin/bash to access fuse" -echo "=================================================" \ No newline at end of file +docker exec -it -w /mnt/hdfs $CONTAINER_NAME-$HDFS_PORT /bin/bash \ No newline at end of file From 2ce66f63fdae8a1ad54d95727ab9a89113887ad1 Mon Sep 17 00:00:00 2001 From: Chao-Heng Lee Date: Sat, 11 Feb 2023 09:12:13 +0800 Subject: [PATCH 3/3] use start-build-env.sh to build src. --- docker/start_hdfs_fuse.sh | 155 +++++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 68 deletions(-) diff --git a/docker/start_hdfs_fuse.sh b/docker/start_hdfs_fuse.sh index 8dfd0f12dc..71b42f55a7 100755 --- a/docker/start_hdfs_fuse.sh +++ b/docker/start_hdfs_fuse.sh @@ -19,10 +19,13 @@ source $DOCKER_FOLDER/docker_build_common.sh # ===============================[global variables]=============================== declare -r VERSION=${VERSION:-3.3.4} -declare -r REPO=${REPO:-ghcr.io/skiptests/astraea/hdfs_fuse} +declare -r REPO=${REPO:-ghcr.io/skiptests/astraea/hdfs-fuse} declare -r IMAGE_NAME="$REPO:$VERSION" -declare -r DOCKERFILE=$DOCKER_FOLDER/hdfs-fuse.dockerfile +declare -r TMP_DOCKER_FOLDER=/tmp/docker +declare -r DOCKERFILE=$TMP_DOCKER_FOLDER/hdfs-fuse.dockerfile declare -r CONTAINER_NAME="hdfs-fuse" +declare -r HADOOP_SRC_PATH=$TMP_DOCKER_FOLDER/hadoop-src +declare -r FUSE_DFS_WRAPPER_SH=$HADOOP_SRC_PATH/fuse_dfs_wrapper.sh # ===================================[functions]=================================== @@ -37,80 +40,19 @@ function showHelp() { function generateDockerfile() { echo "#this dockerfile is generated dynamically -FROM ubuntu:22.04 AS build - -#install tools -RUN apt-get update && apt-get install -y wget - -#download hadoop -WORKDIR /tmp -RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}-src.tar.gz -RUN mkdir /opt/hadoop-src -RUN tar -zxvf hadoop-${VERSION}-src.tar.gz -C /opt/hadoop-src --strip-components=1 -RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-${VERSION}/hadoop-${VERSION}.tar.gz -RUN mkdir /opt/hadoop -RUN tar -zxvf hadoop-${VERSION}.tar.gz -C /opt/hadoop --strip-components=1 - -FROM ubuntu:22.04 AS buildsrc - -#install tools -RUN apt-get update \\ - && apt-get install -y openjdk-11-jdk \\ - maven \\ - build-essential \\ - autoconf \\ - automake \\ - libtool \\ - cmake \\ - zlib1g-dev \\ - pkg-config \\ - libssl-dev \\ - libsasl2-dev \\ - g++ \\ - curl \\ - libfuse-dev - -WORKDIR /tmp -RUN curl -L -s -S https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-java-3.7.1.tar.gz -o protobuf-3.7.1.tar.gz \\ - && mkdir /opt/protobuf-3.7-src \\ - && tar -zxf protobuf-3.7.1.tar.gz --strip-components 1 -C /opt/protobuf-3.7-src && cd /opt/protobuf-3.7-src \\ - && ./configure --prefix=/usr/ \\ - && make -j\$(nproc) \\ - && make install - -WORKDIR /tmp -RUN curl -L https://sourceforge.net/projects/boost/files/boost/1.80.0/boost_1_80_0.tar.bz2/download > boost_1_80_0.tar.bz2 \\ - && tar --bzip2 -xf boost_1_80_0.tar.bz2 -C /opt && cd /opt/boost_1_80_0 \\ - && ./bootstrap.sh --prefix=/usr/ \\ - && ./b2 --without-python \\ - && ./b2 --without-python install - -ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 +FROM ubuntu:22.04 #copy hadoop -COPY --from=build /opt/hadoop-src /opt/hadoop -WORKDIR /opt/hadoop -RUN mvn clean package -pl hadoop-hdfs-project/hadoop-hdfs-native-client -Pnative -DskipTests -Drequire.fuse=true - -FROM ubuntu:22.04 +COPY hadoop-src/ /opt/hadoop/ #install tools RUN apt-get update && apt-get install -y openjdk-11-jre fuse -#copy hadoop -COPY --from=build /opt/hadoop /opt/hadoop -COPY --from=buildsrc /opt/hadoop /opt/hadoop - -ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 -ENV HADOOP_HOME /opt/hadoop - RUN echo \"user_allow_other\" >> /etc/fuse.conf -WORKDIR /opt/hadoop/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs -RUN sed -i -e '18aexport CLASSPATH=\\\${HADOOP_HOME}/etc/hadoop:\`find \\\${HADOOP_HOME}/share/hadoop/ | awk '\"'\"'{path=path\":\"\\\$0}END{print path}'\"'\"'\`' \\ - -i -e '18aexport LD_LIBRARY_PATH=\\\${HADOOP_HOME}/lib/native:\\\$LD_LIBRARY_PATH' \\ - -i -e 's#export LIBHDFS_PATH=.*#export LIBHDFS_PATH=\\\${HADOOP_HOME}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/native/target/usr/local/lib#' \\ - -i -e 's/find \"\\\$HADOOP_HOME\/hadoop-client\" -name \"\\*.jar\"/find \"\\\$HADOOP_HOME\/hadoop-client-modules\/hadoop-client\" -name \"\\*.jar\"/g' fuse_dfs_wrapper.sh +ENV HADOOP_HOME /opt/hadoop +ENV JAVA_HOME /usr/lib/jvm/java-11-openjdk-amd64 +WORKDIR /opt/hadoop #add user RUN groupadd astraea && useradd -ms /bin/bash -g astraea astraea @@ -119,11 +61,88 @@ RUN mkdir /mnt/hdfs #change user RUN chown -R $USER:$USER /opt/hadoop /mnt/hdfs +RUN chmod 755 fuse_dfs_wrapper.sh USER $USER " >"$DOCKERFILE" } +function checkGit() { + if [[ "$(which git)" == "" ]]; then + echo "you have to install git" + exit 2 + fi +} + +function cloneSrcIfNeed() { + if [[ ! -d "$HADOOP_SRC_PATH" ]]; then + mkdir -p $HADOOP_SRC_PATH + git clone https://github.com/apache/hadoop.git $HADOOP_SRC_PATH + fi +} + +function replaceLine() { + local line_number=$1 + local text=$2 + local file=$3 + + if [[ "$(uname)" == "Darwin" ]]; then + sed -i "" "${line_number}s/.*/${text}/" $file + else + sed -i "${line_number}s/.*/${text}/" $file + fi +} + +function buildSrc() { + checkGit + cloneSrcIfNeed + cd $HADOOP_SRC_PATH + git checkout rel/release-${VERSION} + replaceLine 17 USER=\$\(whoami\) start-build-env.sh + ./start-build-env.sh mvn clean package -Pnative -DskipTests -Drequire.fuse=true -Dmaven.javadoc.skip=true +} + +function generateFuseDfsWrapper() { + cat > "$FUSE_DFS_WRAPPER_SH" << 'EOF' +#!/usr/bin/env bash + +export FUSEDFS_PATH="$HADOOP_HOME/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs" +export LIBHDFS_PATH="$HADOOP_HOME/hadoop-hdfs-project/hadoop-hdfs-native-client/target/native/target/usr/local/lib" +export PATH=$FUSEDFS_PATH:$PATH +export LD_LIBRARY_PATH=$LIBHDFS_PATH:$JAVA_HOME/lib/server +while IFS= read -r -d '' file +do + export CLASSPATH=$CLASSPATH:$file +done < <(find "$HADOOP_HOME/hadoop-tools" -name "*.jar" -print0) + +fuse_dfs "$@" +EOF +} + +function buildImageIfNeed() { + local imageName="$1" + if [[ "$(docker images -q "$imageName" 2>/dev/null)" == "" ]]; then + local needToBuild="true" + if [[ "$BUILD" == "false" ]]; then + docker pull "$imageName" 2>/dev/null + if [[ "$?" == "0" ]]; then + needToBuild="false" + else + echo "Can't find $imageName from repo. Will build $imageName on the local" + fi + fi + if [[ "$needToBuild" == "true" ]]; then + buildSrc + generateFuseDfsWrapper + generateDockerfile + docker build --no-cache -t "$imageName" -f "$DOCKERFILE" "$TMP_DOCKER_FOLDER" + if [[ "$?" != "0" ]]; then + exit 2 + fi + fi + fi +} + # ===================================[main]=================================== checkDocker