Skip to content

Commit

Permalink
3.1.4
Browse files Browse the repository at this point in the history
  • Loading branch information
SomeBottle committed Feb 16, 2024
1 parent 7fbb38a commit 0535ca9
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 16 deletions.
11 changes: 9 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ ENV HADOOP_LAUNCH_MODE="general" \
GN_NODEMANAGER_WITH_RESOURCEMANAGER="false" \
GN_HDFS_SETUP_ON_STARTUP="false" \
GN_YARN_SETUP_ON_STARTUP="false" \
GN_ZOOKEEPER_START_ON_STARTUP="false" \
HA_HDFS_NAMESERVICE="hacluster" \
HA_HDFS_SETUP_ON_STARTUP="false" \
HA_YARN_SETUP_ON_STARTUP="false"
HA_YARN_SETUP_ON_STARTUP="false"

# 以Root用户完成
USER root
Expand All @@ -57,7 +58,9 @@ COPY resources/sources.list /tmp/sources.list
# 将路径环境变量写入/etc/profile.d/path_env.sh
RUN echo -e "#!/bin/bash\nexport PATH=$PATH\nexport LD_LIBRARY_PATH=$LD_LIBRARY_PATH" > /etc/profile.d/path_env.sh && \
# 将Hadoop部分环境变量写入/etc/profile.d/hadoop.sh
echo -e "#!/bin/bash\nexport HADOOP_HOME=$HADOOP_HOME\nexport HADOOP_CONF_DIR=$HADOOP_CONF_DIR" >> /etc/profile.d/hadoop.sh && \
echo -e "#!/bin/bash\nexport HADOOP_HOME=$HADOOP_HOME\nexport HADOOP_CONF_DIR=$HADOOP_CONF_DIR\nexport HADOOP_LOG_DIR=$HADOOP_LOG_DIR\nexport HADOOP_VER=$HADOOP_VER" >> /etc/profile.d/hadoop.sh && \
# 将Zookeeper部分环境变量写入/etc/profile.d/zookeeper.sh
echo -e "#!/bin/bash\nexport ZOOKEEPER_HOME=$ZOOKEEPER_HOME\nexport ZOOKEEPER_CONF_DIR=$ZOOKEEPER_CONF_DIR\nexport ZOOKEEPER_VER=$ZOOKEEPER_VER\nexport ZOOKEEPER_DATA_DIR=$ZOOKEEPER_DATA_DIR" >> /etc/profile.d/zookeeper.sh && \
# 创建容器启动标识文件
touch $INIT_FLAG_FILE && \
# 先生成一个临时SSH密码,用于首次启动时交换ssh密钥
Expand Down Expand Up @@ -99,6 +102,8 @@ RUN wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-${HADO
tar -zxf hadoop-${HADOOP_VER}.tar.gz && \
mv hadoop-${HADOOP_VER} hadoop && \
rm -f hadoop-${HADOOP_VER}.tar.gz && \
# 删除hadoop的docs,可以省下很多空间
rm -rf ${HADOOP_HOME}/share/doc && \
# 移动配置文件到对应目录
mv /tmp/tmp_configs/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml && \
mv /tmp/tmp_configs/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml && \
Expand All @@ -114,6 +119,8 @@ RUN wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-${HADO
tar -zxf apache-zookeeper-${ZOOKEEPER_VER}-bin.tar.gz && \
mv apache-zookeeper-${ZOOKEEPER_VER}-bin zookeeper && \
rm -f apache-zookeeper-${ZOOKEEPER_VER}-bin.tar.gz && \
# 删除zookeeper的docs
rm -rf ${ZOOKEEPER_HOME}/docs && \
# 拷贝Zookeeper基础配置文件
cp /opt/zookeeper/conf/zoo_sample.cfg /opt/zookeeper/conf/zoo.cfg && \
# 修改Zookeeper数据目录
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ docker pull somebottle/haspark
| `GN_NODEMANAGER_WITH_RESOURCEMANAGER` | 在ResourceManager所在节点是否启动NodeManager | `"false"` |
| `GN_HDFS_SETUP_ON_STARTUP` | 是否在容器启动时自动启动HDFS各个节点的守护进程 | `"false"` |
| `GN_YARN_SETUP_ON_STARTUP` | 是否在容器启动时自动启动Yarn各个节点的守护进程 | `"false"` |
| `GN_ZOOKEEPER_START_ON_STARTUP` | 是否在容器启动时自动启动Zookeeper各个节点的守护进程 | `"false"` |

### 3.4. Hadoop高可用(HA)分布式

Expand All @@ -83,6 +84,8 @@ docker pull somebottle/haspark

除了 `bitnami/spark` 提供的只读环境变量外,本镜像还提供了:

(可以调用 `source /etc/profile` 来载入这些环境变量到当前 Shell 中)

| 名称 | 说明 |
| --- | --- |
|`ZOOKEEPER_VER` | Zookeeper版本 |
Expand All @@ -93,8 +96,8 @@ docker pull somebottle/haspark
|`HADOOP_HOME` | Hadoop安装目录 |
|`HADOOP_CONF_DIR` | Hadoop配置文件目录 |
|`HADOOP_LOG_DIR` | Hadoop日志目录 |


|`HDFS_SERVICE_ADDR`| HDFS 服务地址。示例: 普通分布式-> `host:port`; HA 分布式-> `mycluster` |
|`ZOOKEEPER_QUORUM`| Zookeeper集群各节点地址,逗号分隔。示例: `host1:2181,host2:2181,host3:2181` |

## 4. 提供的脚本

Expand Down Expand Up @@ -160,7 +163,7 @@ version: '3'

services:
haspark-main:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shmain
env_file: ./conf.env
environment:
Expand All @@ -178,7 +181,7 @@ services:
- '9870:9870'
- '19888:19888'
haspark-worker-1:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shworker1
env_file: ./conf.env
environment:
Expand All @@ -194,7 +197,7 @@ services:
ports:
- '8081:8081'
haspark-worker-2:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shworker2
env_file: ./conf.env
environment:
Expand Down
4 changes: 4 additions & 0 deletions conf.env
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ GN_HDFS_SETUP_ON_STARTUP=true
# 容器集群启动时顺带启动Yarn集群
GN_YARN_SETUP_ON_STARTUP=true

# Whether to start Zookeeper on container startup
# 容器集群启动时是否启动Zookeeper集群
GN_ZOOKEEPER_START_ON_STARTUP=false



# ***********Hadoop High Availability Section - Hadoop高可用分布式配置部分***********
Expand Down
6 changes: 3 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: '3'

services:
haspark-main:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shmain
env_file: ./conf.env
environment:
Expand All @@ -20,7 +20,7 @@ services:
- '9870:9870'
- '19888:19888'
haspark-worker-1:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shworker1
env_file: ./conf.env
environment:
Expand All @@ -36,7 +36,7 @@ services:
ports:
- '8081:8081'
haspark-worker-2:
image: somebottle/haspark:3.1.3
image: somebottle/haspark:3.1.4
hostname: shworker2
env_file: ./conf.env
environment:
Expand Down
8 changes: 7 additions & 1 deletion scripts/entry.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
#!/bin/bash
# 容器启动时执行的脚本

. /opt/somebottle/haspark/utils.sh # 导入工具函数

# 指定家目录
# 不指定的话,ssh-copy-id没法正常运作
export HOME="$(eval echo ~$(whoami))"
# 各组件的守护进程启动顺序
export HDFS_DAEMON_SEQ_FILE=/opt/somebottle/haspark/daemon_sequence/hdfs.seq
export YARN_DAEMON_SEQ_FILE=/opt/somebottle/haspark/daemon_sequence/yarn.seq

# Zookeeper Quorum列表
export ZOOKEEPER_QUORUM=$(join_by "$SH_HOSTS" ',' ':2181')

# 创建容器部署日志目录
mkdir -p /opt/somebottle/haspark/logs
# 创建守护进程启动记录目录
Expand All @@ -23,7 +28,8 @@ export HOME='$HOME'\n\
export HDFS_DAEMON_SEQ_FILE='$HDFS_DAEMON_SEQ_FILE'\n\
export YARN_DAEMON_SEQ_FILE='$YARN_DAEMON_SEQ_FILE'\n\
export TEMP_PASS_FILE='$TEMP_PASS_FILE'\n\
export INIT_FLAG_FILE='$INIT_FLAG_FILE'\n" >/etc/profile.d/sh_basics.sh
export INIT_FLAG_FILE='$INIT_FLAG_FILE'\n\
export ZOOKEEPER_QUORUM='$ZOOKEEPER_QUORUM'\n" >/etc/profile.d/sh_basics.sh

# 把JAVA_HOME也输出到/etc/profile
echo "export JAVA_HOME=$JAVA_HOME" >/etc/profile.d/java.sh
Expand Down
7 changes: 7 additions & 0 deletions scripts/hadoop-general-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

. /opt/somebottle/haspark/utils.sh # 导入工具函数

if [[ "$GN_ZOOKEEPER_START_ON_STARTUP" == "true" ]]; then
# 容器启动时,启动Zookeeper守护进程
$ZOOKEEPER_HOME/bin/zkServer.sh start
fi

if [ -e $INIT_FLAG_FILE ]; then
# 仅在容器初次启动时执行
echo "Initializing Hadoop (General)."
Expand All @@ -15,6 +20,8 @@ if [ -e $INIT_FLAG_FILE ]; then
remove_ha_conf $HADOOP_CONF_DIR/mapred-site.xml
# 修改core-site.xml
sed -i "s/%%HDFS_DEF_HOST%%/$GN_NAMENODE_HOST:8020/g" $HADOOP_CONF_DIR/core-site.xml
# 将HDFS服务地址加入持久环境变量
echo "export HDFS_SERVICE_ADDR='${GN_NAMENODE_HOST}:8020'" >>/etc/profile.d/sh_basics.sh
# 修改hdfs-site.xml
sed -i "s/%%HDFS_REPLICATION%%/$HADOOP_HDFS_REPLICATION/g" $HADOOP_CONF_DIR/hdfs-site.xml
# 修改mapred-site.xml
Expand Down
9 changes: 4 additions & 5 deletions scripts/hadoop-ha-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ $ZOOKEEPER_HOME/bin/zkServer.sh start
# 协调: 等待所有结点的Zookeeper守护进程启动
wait_for_java_process_on_specified_nodes QuorumPeerMain "$SH_HOSTS"

# Zookeeper Quorum列表
zookeeper_nodes=$(join_by "$SH_HOSTS" ',' ':2181')

# **************************************************** 如果需要HDFS高可用
if [[ "$HA_HDFS_SETUP_ON_STARTUP" == "true" ]]; then

Expand All @@ -33,9 +30,11 @@ if [[ "$HA_HDFS_SETUP_ON_STARTUP" == "true" ]]; then
# ***********修改core-site.xml***********
# HDFS的NameNode的NameService名
sed -i "s/%%HDFS_DEF_HOST%%/$HA_HDFS_NAMESERVICE/g" $HADOOP_CONF_DIR/core-site.xml
# 将HDFS服务地址加入持久环境变量
echo "export HDFS_SERVICE_ADDR='$HA_HDFS_NAMESERVICE'" >>/etc/profile.d/sh_basics.sh
# 修改hdfs-site.xml
sed -i "s/%%HDFS_NAMESERVICE%%/$HA_HDFS_NAMESERVICE/g" $HADOOP_CONF_DIR/hdfs-site.xml
sed -i "s/%%ZK_ADDRS%%/$zookeeper_nodes/g" $HADOOP_CONF_DIR/core-site.xml
sed -i "s/%%ZK_ADDRS%%/$ZOOKEEPER_QUORUM/g" $HADOOP_CONF_DIR/core-site.xml

# ***********修改hdfs-site.xml***********
# HDFS副本数
Expand Down Expand Up @@ -201,7 +200,7 @@ if [[ "$HA_YARN_SETUP_ON_STARTUP" == "true" ]]; then
# 处理完成后把HA_REPEAT_XXX_START/END部分用生成的配置替换
replace_repeat_conf 'RESOURCEMANAGER' "$generated_rm_conf" $HADOOP_CONF_DIR/yarn-site.xml
# Zookeeper节点地址
sed -i "s/%%ZK_ADDRS%%/$zookeeper_nodes/g" $HADOOP_CONF_DIR/yarn-site.xml
sed -i "s/%%ZK_ADDRS%%/$ZOOKEEPER_QUORUM/g" $HADOOP_CONF_DIR/yarn-site.xml
fi

# ################# 容器每次启动都执行的部分 SECTION-START #################
Expand Down

0 comments on commit 0535ca9

Please sign in to comment.