diff --git a/Dockerfile b/Dockerfile index 2800a7e..dcbe126 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ # 适配Hadoop 3.3+ FROM bitnami/spark:3.5.0 -LABEL maintainer="somebottle " +LABEL maintainer="somebottle " LABEL description="Docker image with Spark 3.5.0 and Hadoop 3.3.6, based on bitnami/spark image. For my graduation project." # 环境变量配置 @@ -25,6 +25,9 @@ ENV HADOOP_CONF_DIR="/opt/hadoop/etc/hadoop" ENV HADOOP_LOG_DIR="/opt/hadoop/logs" # 把Hadoop目录加入环境变量 ENV PATH="$HADOOP_HOME/bin:/opt/somebottle/haspark/tools:$ZOOKEEPER_HOME/bin:$PATH" +# 把Hadoop本地库加入动态链接库路径 +# 以免Spark或Hadoop找不到Hadoop Native Library +ENV LD_LIBRARY_PATH="$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH" # 临时密码文件路径加入环境变量 ENV TEMP_PASS_FILE="/root/temp.pass" # 用户.ssh配置目录 @@ -48,8 +51,11 @@ ENV HA_YARN_SETUP_ON_STARTUP="false" # 以Root用户完成 USER root -# 将环境变量写入/etc/profile.d/container_env.sh -RUN echo -e '#!/bin/bash\nexport PATH='$PATH > /etc/profile.d/container_env.sh +# 将路径环境变量写入/etc/profile.d/path_env.sh +RUN echo -e "#!/bin/bash\nexport PATH=$PATH\nexport LD_LIBRARY_PATH=$LD_LIBRARY_PATH" > /etc/profile.d/path_env.sh + +# 将Hadoop部分环境变量写入/etc/profile.d/hadoop.sh +RUN echo -e "#!/bin/bash\nexport HADOOP_HOME=$HADOOP_HOME\nexport HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> /etc/profile.d/hadoop.sh # 创建容器启动标识文件 RUN touch $INIT_FLAG_FILE @@ -71,7 +77,7 @@ RUN mv /tmp/sources.list /etc/apt/sources.list # 更新apt-get以及openssh-server, wget, vim, sshpass, net-tools, psmisc # psmisc包含Hadoop HA - sshfence所需的fuser工具 -RUN apt-get update && apt-get install -y openssh-server wget vim sshpass lsof net-tools psmisc +RUN apt-get update && apt-get install -y openssh-server wget vim sshpass lsof net-tools psmisc rsync zip # 建立haspark脚本目录 RUN mkdir -p /opt/somebottle/haspark diff --git a/README.md b/README.md index c6db89e..55ac5c0 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,14 @@ docker pull somebottle/haspark 脚本实际位于`/opt/somebottle/haspark/tools/test-wordcount.sh`。 +### 4.5 文件同步脚本 + +本脚本用于将某个节点上的文件同步到其他所有节点上(根据上面配置的 `$SH_HOSTS` 环境变量)。 + +命令行: `xsync <文件名列表>` + +脚本实际位于`/opt/somebottle/haspark/tools/xsync`。 + ## 5. 容器部署 ### 5.1. 拉取 @@ -152,7 +160,7 @@ version: '3' services: haspark-main: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shmain env_file: ./conf.env environment: @@ -170,7 +178,7 @@ services: - '9870:9870' - '19888:19888' haspark-worker-1: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shworker1 env_file: ./conf.env environment: @@ -186,7 +194,7 @@ services: ports: - '8081:8081' haspark-worker-2: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shworker2 env_file: ./conf.env environment: diff --git a/docker-compose.yml b/docker-compose.yml index ae21990..caa0a30 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: haspark-main: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shmain env_file: ./conf.env environment: @@ -20,7 +20,7 @@ services: - '9870:9870' - '19888:19888' haspark-worker-1: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shworker1 env_file: ./conf.env environment: @@ -36,7 +36,7 @@ services: ports: - '8081:8081' haspark-worker-2: - image: somebottle/haspark:3.1.1 + image: somebottle/haspark:3.1.2 hostname: shworker2 env_file: ./conf.env environment: diff --git a/scripts/entry.sh b/scripts/entry.sh index aa759a5..35c6004 100644 --- a/scripts/entry.sh +++ b/scripts/entry.sh @@ -1,8 +1,8 @@ #!/bin/bash # 容器启动时执行的脚本 -# 修正家目录,bitnami不知道怎么想的,把文件系统根目录当家目录 -# 不修正的话,ssh-copy-id没法正常运作 +# 指定家目录 +# 不指定的话,ssh-copy-id没法正常运作 export HOME="$(eval echo ~$(whoami))" # 各组件的守护进程启动顺序 export HDFS_DAEMON_SEQ_FILE=/opt/somebottle/haspark/daemon_sequence/hdfs.seq diff --git a/tools/xsync b/tools/xsync new file mode 100644 index 0000000..97a7d71 --- /dev/null +++ b/tools/xsync @@ -0,0 +1,39 @@ +#!/bin/bash + +# 文件同步脚本 + +# 先让环境变量生效 +source /etc/profile + +# 临时密码文件还存在就说明SSH公钥还没交换完毕,需要等待交换完毕 +while [ -e $TEMP_PASS_FILE ]; do + sleep 3 +done + +echo "Syncing files..." + +# 至少要有一个参数,空格分隔 +if [ $# -lt 1 ]; then + echo 'Please input file path(s) ! ' + exit 1 +fi + +# 遍历集群所有节点 +for host in $SH_HOSTS; do + echo ==================== Transfering files to $host ==================== + for file in $@; do + if [ -e $file ]; then + # 获得文件父目录的绝对路径 + parent_dir=$( + cd -P $(dirname $file) + pwd + ) + #6. 获取当前文件的名称 + file_name=$(basename $file) + ssh $host "mkdir -p $parent_dir" + rsync -av $parent_dir/$file_name $host:$parent_dir + else + echo $file not found. + fi + done +done