Skip to content

Commit

Permalink
3.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
SomeBottle committed Feb 6, 2024
1 parent 7bb7608 commit ae263e2
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 12 deletions.
14 changes: 10 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# 适配Hadoop 3.3+
FROM bitnami/spark:3.5.0

LABEL maintainer="somebottle <somebottle@gmail.com>"
LABEL maintainer="somebottle <somebottle@outlook.com>"
LABEL description="Docker image with Spark 3.5.0 and Hadoop 3.3.6, based on bitnami/spark image. For my graduation project."

# 环境变量配置
Expand All @@ -25,6 +25,9 @@ ENV HADOOP_CONF_DIR="/opt/hadoop/etc/hadoop"
ENV HADOOP_LOG_DIR="/opt/hadoop/logs"
# 把Hadoop目录加入环境变量
ENV PATH="$HADOOP_HOME/bin:/opt/somebottle/haspark/tools:$ZOOKEEPER_HOME/bin:$PATH"
# 把Hadoop本地库加入动态链接库路径
# 以免Spark或Hadoop找不到Hadoop Native Library
ENV LD_LIBRARY_PATH="$HADOOP_HOME/lib/native:$LD_LIBRARY_PATH"
# 临时密码文件路径加入环境变量
ENV TEMP_PASS_FILE="/root/temp.pass"
# 用户.ssh配置目录
Expand All @@ -48,8 +51,11 @@ ENV HA_YARN_SETUP_ON_STARTUP="false"
# 以Root用户完成
USER root

# 将环境变量写入/etc/profile.d/container_env.sh
RUN echo -e '#!/bin/bash\nexport PATH='$PATH > /etc/profile.d/container_env.sh
# 将路径环境变量写入/etc/profile.d/path_env.sh
RUN echo -e "#!/bin/bash\nexport PATH=$PATH\nexport LD_LIBRARY_PATH=$LD_LIBRARY_PATH" > /etc/profile.d/path_env.sh

# 将Hadoop部分环境变量写入/etc/profile.d/hadoop.sh
RUN echo -e "#!/bin/bash\nexport HADOOP_HOME=$HADOOP_HOME\nexport HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> /etc/profile.d/hadoop.sh

# 创建容器启动标识文件
RUN touch $INIT_FLAG_FILE
Expand All @@ -71,7 +77,7 @@ RUN mv /tmp/sources.list /etc/apt/sources.list

# 更新apt-get以及openssh-server, wget, vim, sshpass, net-tools, psmisc
# psmisc包含Hadoop HA - sshfence所需的fuser工具
RUN apt-get update && apt-get install -y openssh-server wget vim sshpass lsof net-tools psmisc
RUN apt-get update && apt-get install -y openssh-server wget vim sshpass lsof net-tools psmisc rsync zip

# 建立haspark脚本目录
RUN mkdir -p /opt/somebottle/haspark
Expand Down
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ docker pull somebottle/haspark

脚本实际位于`/opt/somebottle/haspark/tools/test-wordcount.sh`

### 4.5 文件同步脚本

本脚本用于将某个节点上的文件同步到其他所有节点上(根据上面配置的 `$SH_HOSTS` 环境变量)。

命令行: `xsync <文件名列表>`

脚本实际位于`/opt/somebottle/haspark/tools/xsync`

## 5. 容器部署

### 5.1. 拉取
Expand All @@ -152,7 +160,7 @@ version: '3'

services:
haspark-main:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shmain
env_file: ./conf.env
environment:
Expand All @@ -170,7 +178,7 @@ services:
- '9870:9870'
- '19888:19888'
haspark-worker-1:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shworker1
env_file: ./conf.env
environment:
Expand All @@ -186,7 +194,7 @@ services:
ports:
- '8081:8081'
haspark-worker-2:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shworker2
env_file: ./conf.env
environment:
Expand Down
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: '3'

services:
haspark-main:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shmain
env_file: ./conf.env
environment:
Expand All @@ -20,7 +20,7 @@ services:
- '9870:9870'
- '19888:19888'
haspark-worker-1:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shworker1
env_file: ./conf.env
environment:
Expand All @@ -36,7 +36,7 @@ services:
ports:
- '8081:8081'
haspark-worker-2:
image: somebottle/haspark:3.1.1
image: somebottle/haspark:3.1.2
hostname: shworker2
env_file: ./conf.env
environment:
Expand Down
4 changes: 2 additions & 2 deletions scripts/entry.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash
# 容器启动时执行的脚本

# 修正家目录,bitnami不知道怎么想的,把文件系统根目录当家目录
# 不修正的话,ssh-copy-id没法正常运作
# 指定家目录
# 不指定的话,ssh-copy-id没法正常运作
export HOME="$(eval echo ~$(whoami))"
# 各组件的守护进程启动顺序
export HDFS_DAEMON_SEQ_FILE=/opt/somebottle/haspark/daemon_sequence/hdfs.seq
Expand Down
39 changes: 39 additions & 0 deletions tools/xsync
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

# 文件同步脚本

# 先让环境变量生效
source /etc/profile

# 临时密码文件还存在就说明SSH公钥还没交换完毕,需要等待交换完毕
while [ -e $TEMP_PASS_FILE ]; do
sleep 3
done

echo "Syncing files..."

# 至少要有一个参数,空格分隔
if [ $# -lt 1 ]; then
echo 'Please input file path(s) ! '
exit 1
fi

# 遍历集群所有节点
for host in $SH_HOSTS; do
echo ==================== Transfering files to $host ====================
for file in $@; do
if [ -e $file ]; then
# 获得文件父目录的绝对路径
parent_dir=$(
cd -P $(dirname $file)
pwd
)
#6. 获取当前文件的名称
file_name=$(basename $file)
ssh $host "mkdir -p $parent_dir"
rsync -av $parent_dir/$file_name $host:$parent_dir
else
echo $file not found.
fi
done
done

0 comments on commit ae263e2

Please sign in to comment.