diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b32f1e6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,96 @@ +# 采用bitnami/spark镜像,此镜像基于精简Debian 11系统 +# 基于Spark 3.5.0版本 +# 适配Hadoop 3.3+ +FROM bitnami/spark:3.5.0 + +LABEL maintainer="somebottle " +LABEL description="Docker image with Spark 3.5.0 and Hadoop 3.3.6, based on bitnami/spark image. For my graduation project." + +# 环境变量配置 +# 所有节点的主机名,用于SSH配置 +ENV SH_HOSTS="shmain shworker1 shworker2" +# Hadoop版本 +ENV HADOOP_VER="3.3.6" +# Hadoop安装目录 +ENV HADOOP_HOME="/opt/hadoop" +# Hadoop配置目录 +ENV HADOOP_CONF_DIR="/opt/hadoop/etc/hadoop" +# Hadoop日志目录 +ENV HADOOP_LOG_DIR="/var/log/hadoop" +# 把Hadoop目录加入环境变量 +ENV PATH="$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$PATH" + +# 以Root用户完成 +USER root + +# 先生成一个临时SSH密码,用于首次启动时交换ssh密钥 +RUN echo $(openssl rand -base64 32) > /root/temp.pass +# 修改root用户的密码 +RUN echo -e "$(cat /root/temp.pass)\n$(cat /root/temp.pass)" | passwd root +# 把主机名写入文件 +RUN echo "$SH_HOSTS" > /root/exchange_hosts + + +# 若.ssh目录不存在则建立 +RUN [ -d /root/.ssh ] || mkdir -p /root/.ssh +# 建立标记目录 +RUN mkdir -p /root/.ssh/exchange_flags + +# 更换镜像源 +COPY resources/sources.list /tmp/sources.list +RUN mv /tmp/sources.list /etc/apt/sources.list + +# 更新apt-get以及openssh-server, wget, vim, sshpass +RUN apt-get update && apt-get install -y openssh-server wget vim sshpass + +# 切换到安装目录/opt +WORKDIR /opt +# 下载Hadoop并解压至/opt/hadoop,使用清华镜像 +RUN wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-${HADOOP_VER}/hadoop-${HADOOP_VER}.tar.gz \ + && tar -zxf hadoop-${HADOOP_VER}.tar.gz \ + && mv hadoop-${HADOOP_VER} hadoop \ + && rm -f hadoop-${HADOOP_VER}.tar.gz + +# 临时配置目录 +RUN mkdir /tmp/tmp_configs + +# 拷贝配置文件 +COPY configs/* /tmp/tmp_configs/ + +# 移动配置文件到对应目录 +RUN mv /tmp/tmp_configs/core-site.xml ${HADOOP_CONF_DIR}/core-site.xml \ + && mv /tmp/tmp_configs/hdfs-site.xml ${HADOOP_CONF_DIR}/hdfs-site.xml \ + && mv /tmp/tmp_configs/mapred-site.xml ${HADOOP_CONF_DIR}/mapred-site.xml \ + && mv /tmp/tmp_configs/yarn-site.xml ${HADOOP_CONF_DIR}/yarn-site.xml \ + && mv /tmp/tmp_configs/hadoop-env.sh ${HADOOP_CONF_DIR}/hadoop-env.sh \ + && mv /tmp/tmp_configs/workers ${HADOOP_CONF_DIR}/workers \ + && mv /tmp/tmp_configs/ssh_config /root/.ssh/config \ + && mv /tmp/tmp_configs/sshd_config /etc/ssh/sshd_config \ + && rm -rf /tmp/tmp_configs + +# 调整.ssh目录下文件权限 +RUN chmod 600 /root/.ssh/config \ + && chmod 700 /root/.ssh + +# 拷贝启动脚本 +COPY scripts/* /opt/ + +# 增加执行权限 +RUN chmod +x /opt/start-hadoop.sh \ + && chmod +x /opt/stop-hadoop.sh \ + && chmod +x /opt/entry.sh \ + && chmod +x /opt/ssh_key_exchange.sh \ + && chmod +x $HADOOP_HOME/sbin/start-dfs.sh \ + && chmod +x $HADOOP_HOME/sbin/start-yarn.sh \ + && chmod +x $HADOOP_HOME/sbin/stop-dfs.sh \ + && chmod +x $HADOOP_HOME/sbin/stop-yarn.sh + +# 建立HDFS目录 +RUN mkdir -p /root/hdfs/name \ + && mkdir -p /root/hdfs/data + +# 初始化HDFS +RUN hdfs namenode -format + +# 容器启动待执行的脚本 +ENTRYPOINT [ "/opt/entry.sh" ] \ No newline at end of file diff --git a/README.md b/README.md index 3c20ec0..c89a7f4 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,203 @@ -# haspark -Hadoop + Spark 伪分布式容器化部署 +# Hadoop + Spark 伪分布式容器化部署 + +本镜像基于`bitnami/spark:3.5.0`镜像,系统为`Debian 11`,执行用户为`root`。 + +面向本地集群环境测试,即**伪分布式**。 + +* 本镜像配置完成,用docker compose上线容器后,能**自动交换SSH公钥实现节点间SSH免密登录**。 +* 本镜像在**WSL**上测试完成。 +* [Docker hub](https://hub.docker.com/r/somebottle/haspark) + +## 版本 + +* Hadoop `3.3.6` +* Spark `3.5.0` + +## 节点分配 + +1 master + 2 workers. + +> 如果需要修改则需要[编辑多个文件](#修改节点数)进行重新构建。 + +## 特殊环境变量 + +在`bitnami/spark`的基础上添加如下环境变量: + +| 名称 | 说明 | 默认值 | +| --- | --- | --- | +| HADOOP_MODE | Hadoop模式,若设为`master`则会在此容器中执行启动集群的指令 | 空 | + +## 容器部署 + +### 1. 拉取 + +```bash +docker pull somebottle/haspark[:tag] +``` + +### 2. 编写Docker Compose配置 + +**首次上线**时,会创建几个Docker卷,并且将镜像内格式化过的Namenode数据复制过来。 + +随后这些Docker卷会保持映射到HDFS的`NameNode`和`DataNode`目录,实现HDFS数据持久化(除非你移除了这些卷)。 + +> Docker Compose Volume配置文档: +> https://docs.docker.com/storage/volumes/#use-a-volume-with-docker-compose + +在某个新目录下建立`docker-compose.yml`。 + +示例配置如下,1 master + 2 worker的分配。 + +
+展开查看 + +```yaml +version: '3' + +services: + haspark-main: + image: somebottle/haspark:3.0.1 + hostname: shmain + environment: + - SPARK_MODE=master + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + - HADOOP_MODE=master # 在主容器中其启动Hadoop集群 + volumes: + - haspark-hdfs-name-data:/root/hdfs/name:copy # 映射docker卷到主容器的/root/hdfs/name,创建卷时复制镜像中初始化过的namenode数据 + - ~/docker/spark/share:/opt/share # 三个容器映射到相同的共享目录 + ports: + - '8080:8080' + - '4040:4040' + - '8088:8088' + - '8042:8042' + - '9870:9870' + - '19888:19888' + haspark-worker-1: + image: somebottle/haspark:3.0.1 + hostname: shworker1 + environment: + - SPARK_MODE=worker + - SPARK_MASTER_URL=spark://shmain:7077 + - SPARK_WORKER_MEMORY=1G + - SPARK_WORKER_CORES=1 + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + volumes: + - ~/docker/spark/share:/opt/share + - haspark-hdfs-worker1-data:/root/hdfs/data # datanode数据 + ports: + - '8081:8081' + haspark-worker-2: + image: somebottle/haspark:3.0.1 + hostname: shworker2 + environment: + - SPARK_MODE=worker + - SPARK_MASTER_URL=spark://shmain:7077 + - SPARK_WORKER_MEMORY=1G + - SPARK_WORKER_CORES=1 + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + volumes: + - ~/docker/spark/share:/opt/share + - haspark-hdfs-worker2-data:/root/hdfs/data # datanode数据 + ports: + - '8082:8081' + +volumes: + haspark-hdfs-name-data: + haspark-hdfs-worker1-data: + haspark-hdfs-worker2-data: +``` + +
+ +**当然你也可以直接用本仓库的`docker-compose.yml`配置**。 + +### 3. 上线容器 + +在`docker-compose.yml`所在目录中执行。 + +```bash +docker compose up -d +``` + +### 4. 下线容器 + +在`docker-compose.yml`所在目录中执行。 + +下线容器,保留HDFS数据: + +```bash +docker compose down +``` + +如果你想把HDFS的数据连带清空: + +(这个操作会把相关的Docker卷全部移除) + +```bash +docker compose down -v # v代表volumes +``` + +### 5. 启动与停止Hadoop + +按理说容器启动后,**在完成免密登录配置后会自动执行**Hadoop集群启动脚本,如果没有的话你可以手动执行: + +```bash +/opt/start-hadoop.sh +``` + +Hadoop集群停止脚本: + +```bash +/opt/stop-hadoop.sh +``` + +## 重构建容器镜像 + +### 修改节点数 + +默认的节点主机名是: + +- `shmain` (master) +- `shworker1` (worker1) +- `shworker2` (worker2) + +如果你要修改节点主机名或者新增工人(worker)节点: + +1. 修改`docker-compose.yml`的`hostname`, `SPARK_MASTER_URL`,目录挂载等配置。 +2. 修改`Dockerfile`头部的`SH_HOSTS`环境变量。 +3. 修改`Hadoop`相关配置。主要是`core-site.xml`, `workers`文件,可能也要改动`yarn-site.xml`。 +4. 修改`ssh_config`配置文件。 +5. 重新构建镜像。 + + ```bash + docker build -t somebottle/haspark[:tag] . --network host + ``` + + > `--network host` 在WSL平台上很有效,采用和宿主机相同的网络,否则可能在容器内无法联网。 + +### 修改目录 + +如果你想以非root用户来运行容器,那么就需要进行比较大面积的改动。 + +你可能需要改动的文件: + +1. `docker-compose.yml` +2. `Dockerfile` +3. Hadoop配置: `hdfs-site.xml` +4. 脚本`ssh_key_exchange.sh` +5. 脚本`start-hadoop.sh` + +然后重新构建镜像即可。 + +## 感谢 + +* [使用 Docker 快速部署 Spark + Hadoop 大数据集群 - s1mple的文章 - 知乎](https://zhuanlan.zhihu.com/p/421375012) diff --git a/configs/core-site.xml b/configs/core-site.xml new file mode 100644 index 0000000..f9cd04b --- /dev/null +++ b/configs/core-site.xml @@ -0,0 +1,9 @@ + + + + + + fs.defaultFS + hdfs://shmain:9000 + + \ No newline at end of file diff --git a/configs/hadoop-env.sh b/configs/hadoop-env.sh new file mode 100644 index 0000000..410bd14 --- /dev/null +++ b/configs/hadoop-env.sh @@ -0,0 +1,14 @@ +export JAVA_HOME=/opt/bitnami/java # 指定为bitnami镜像自带的jdk +export HADOOP_HOME=/opt/hadoop # 安装目录 +export HADOOP_MAPRED_HOME=/opt/hadoop +export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop +export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native +# JDK17环境下的Hadoop启动参数 +export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib --add-opens java.base/java.lang=ALL-UNNAMED" + +# 以root用户运行 +export HDFS_NAMENODE_USER="root" +export HDFS_DATANODE_USER="root" +export HDFS_SECONDARYNAMENODE_USER="root" +export YARN_RESOURCEMANAGER_USER="root" +export YARN_NODEMANAGER_USER="root" \ No newline at end of file diff --git a/configs/hdfs-site.xml b/configs/hdfs-site.xml new file mode 100644 index 0000000..17e9cf0 --- /dev/null +++ b/configs/hdfs-site.xml @@ -0,0 +1,21 @@ + + + + + + + dfs.namenode.name.dir + file:///root/hdfs/name + NameNode directory for namespace and transaction logs storage. + + + dfs.datanode.data.dir + file:///root/hdfs/data + DataNode directory + + + dfs.replication + 2 + + \ No newline at end of file diff --git a/configs/mapred-site.xml b/configs/mapred-site.xml new file mode 100644 index 0000000..86d7039 --- /dev/null +++ b/configs/mapred-site.xml @@ -0,0 +1,25 @@ + + + + + + mapreduce.framework.name + yarn + + + yarn.app.mapreduce.am.env + HADOOP_MAPRED_HOME=/opt/hadoop + + + mapreduce.map.env + HADOOP_MAPRED_HOME=/opt/hadoop + + + mapreduce.reduce.env + HADOOP_MAPRED_HOME=/opt/hadoop + + + mapreduce.application.classpath + $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/common/*,$HADOOP_MAPRED_HOME/share/hadoop/common/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/yarn/*,$HADOOP_MAPRED_HOME/share/hadoop/yarn/lib/*,$HADOOP_MAPRED_HOME/share/hadoop/hdfs/*,$HADOOP_MAPRED_HOME/share/hadoop/hdfs/lib/* + + \ No newline at end of file diff --git a/configs/ssh_config b/configs/ssh_config new file mode 100644 index 0000000..3e86204 --- /dev/null +++ b/configs/ssh_config @@ -0,0 +1,14 @@ +Host localhost + StrictHostKeyChecking no + +Host 0.0.0.0 + StrictHostKeyChecking no + +Host shmain + StrictHostKeyChecking no + +Host shworker1 + StrictHostKeyChecking no + +Host shworker2 + StrictHostKeyChecking no \ No newline at end of file diff --git a/configs/sshd_config b/configs/sshd_config new file mode 100644 index 0000000..0d1bcea --- /dev/null +++ b/configs/sshd_config @@ -0,0 +1,122 @@ + +# This is the sshd server system-wide configuration file. See +# sshd_config(5) for more information. + +# This sshd was compiled with PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games + +# The strategy used for options in the default sshd_config shipped with +# OpenSSH is to specify options with their default value where +# possible, but leave them commented. Uncommented options override the +# default value. + +Include /etc/ssh/sshd_config.d/*.conf + +#Port 22 +#AddressFamily any +#ListenAddress 0.0.0.0 +#ListenAddress :: + +#HostKey /etc/ssh/ssh_host_rsa_key +#HostKey /etc/ssh/ssh_host_ecdsa_key +#HostKey /etc/ssh/ssh_host_ed25519_key + +# Ciphers and keying +#RekeyLimit default none + +# Logging +#SyslogFacility AUTH +#LogLevel INFO + +# Authentication: + +#LoginGraceTime 2m +PermitRootLogin yes +#StrictModes yes +#MaxAuthTries 6 +#MaxSessions 10 + +PubkeyAuthentication yes + +# Expect .ssh/authorized_keys2 to be disregarded by default in future. +#AuthorizedKeysFile .ssh/authorized_keys .ssh/authorized_keys2 + +#AuthorizedPrincipalsFile none + +#AuthorizedKeysCommand none +#AuthorizedKeysCommandUser nobody + +# For this to work you will also need host keys in /etc/ssh/ssh_known_hosts +#HostbasedAuthentication no +# Change to yes if you don't trust ~/.ssh/known_hosts for +# HostbasedAuthentication +#IgnoreUserKnownHosts no +# Don't read the user's ~/.rhosts and ~/.shosts files +#IgnoreRhosts yes + +# To disable tunneled clear text passwords, change to no here! +PasswordAuthentication yes +#PermitEmptyPasswords no + +# Change to yes to enable challenge-response passwords (beware issues with +# some PAM modules and threads) +KbdInteractiveAuthentication no + +# Kerberos options +#KerberosAuthentication no +#KerberosOrLocalPasswd yes +#KerberosTicketCleanup yes +#KerberosGetAFSToken no + +# GSSAPI options +#GSSAPIAuthentication no +#GSSAPICleanupCredentials yes +#GSSAPIStrictAcceptorCheck yes +#GSSAPIKeyExchange no + +# Set this to 'yes' to enable PAM authentication, account processing, +# and session processing. If this is enabled, PAM authentication will +# be allowed through the KbdInteractiveAuthentication and +# PasswordAuthentication. Depending on your PAM configuration, +# PAM authentication via KbdInteractiveAuthentication may bypass +# the setting of "PermitRootLogin without-password". +# If you just want the PAM account and session checks to run without +# PAM authentication, then enable this but set PasswordAuthentication +# and KbdInteractiveAuthentication to 'no'. +UsePAM yes + +#AllowAgentForwarding yes +#AllowTcpForwarding yes +#GatewayPorts no +X11Forwarding yes +#X11DisplayOffset 10 +#X11UseLocalhost yes +#PermitTTY yes +PrintMotd no +#PrintLastLog yes +#TCPKeepAlive yes +#PermitUserEnvironment no +#Compression delayed +#ClientAliveInterval 0 +#ClientAliveCountMax 3 +#UseDNS no +#PidFile /run/sshd.pid +#MaxStartups 10:30:100 +#PermitTunnel no +#ChrootDirectory none +#VersionAddendum none + +# no default banner path +#Banner none + +# Allow client to pass locale environment variables +AcceptEnv LANG LC_* + +# override default of no subsystems +Subsystem sftp /usr/lib/openssh/sftp-server + +# Example of overriding settings on a per-user basis +#Match User anoncvs +# X11Forwarding no +# AllowTcpForwarding no +# PermitTTY no +# ForceCommand cvs server diff --git a/configs/workers b/configs/workers new file mode 100644 index 0000000..d19adc1 --- /dev/null +++ b/configs/workers @@ -0,0 +1,2 @@ +shworker1 +shworker2 \ No newline at end of file diff --git a/configs/yarn-site.xml b/configs/yarn-site.xml new file mode 100644 index 0000000..c0e26b7 --- /dev/null +++ b/configs/yarn-site.xml @@ -0,0 +1,21 @@ + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.aux-services.mapreduce_shuffle.class + org.apache.hadoop.mapred.ShuffleHandler + + + yarn.resourcemanager.hostname + shmain + + + yarn.nodemanager.env-whitelist + + JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME + + \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..29ab027 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,62 @@ +version: '3' + +services: + haspark-main: + image: somebottle/haspark:3.0.1 + hostname: shmain + environment: + - SPARK_MODE=master + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + - HADOOP_MODE=master # 在主容器中其启动Hadoop集群 + volumes: + - haspark-hdfs-name-data:/root/hdfs/name:copy # 映射docker卷到主容器的/root/hdfs/name,创建卷时复制镜像中初始化过的namenode数据 + - ~/docker/spark/share:/opt/share # 三个容器映射到相同的共享目录 + ports: + - '8080:8080' + - '4040:4040' + - '8088:8088' + - '8042:8042' + - '9870:9870' + - '19888:19888' + haspark-worker-1: + image: somebottle/haspark:3.0.1 + hostname: shworker1 + environment: + - SPARK_MODE=worker + - SPARK_MASTER_URL=spark://shmain:7077 + - SPARK_WORKER_MEMORY=1G + - SPARK_WORKER_CORES=1 + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + volumes: + - ~/docker/spark/share:/opt/share + - haspark-hdfs-worker1-data:/root/hdfs/data # datanode数据 + ports: + - '8081:8081' + haspark-worker-2: + image: somebottle/haspark:3.0.1 + hostname: shworker2 + environment: + - SPARK_MODE=worker + - SPARK_MASTER_URL=spark://shmain:7077 + - SPARK_WORKER_MEMORY=1G + - SPARK_WORKER_CORES=1 + - SPARK_RPC_AUTHENTICATION_ENABLED=no + - SPARK_RPC_ENCRYPTION_ENABLED=no + - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no + - SPARK_SSL_ENABLED=no + volumes: + - ~/docker/spark/share:/opt/share + - haspark-hdfs-worker2-data:/root/hdfs/data # datanode数据 + ports: + - '8082:8081' + +volumes: + haspark-hdfs-name-data: + haspark-hdfs-worker1-data: + haspark-hdfs-worker2-data: \ No newline at end of file diff --git a/resources/sources.list b/resources/sources.list new file mode 100644 index 0000000..d827972 --- /dev/null +++ b/resources/sources.list @@ -0,0 +1,13 @@ +# 替换apt源 https://mirrors.tuna.tsinghua.edu.cn/help/debian/ + +deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm main contrib non-free non-free-firmware +# deb-src https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm main contrib non-free non-free-firmware + +deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-updates main contrib non-free non-free-firmware +# deb-src https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-updates main contrib non-free non-free-firmware + +deb https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-backports main contrib non-free non-free-firmware +# deb-src https://mirrors.tuna.tsinghua.edu.cn/debian/ bookworm-backports main contrib non-free non-free-firmware + +deb https://mirrors.tuna.tsinghua.edu.cn/debian-security bookworm-security main contrib non-free non-free-firmware +# deb-src https://mirrors.tuna.tsinghua.edu.cn/debian-security bookworm-security main contrib non-free non-free-firmware \ No newline at end of file diff --git a/scripts/entry.sh b/scripts/entry.sh new file mode 100644 index 0000000..49843a5 --- /dev/null +++ b/scripts/entry.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# 容器启动时执行的脚本 + +# 修正家目录,bitnami不知道怎么想的,把文件系统根目录当家目录 +export HOME="$(eval echo ~$(whoami))" + +# 启动SSH +/etc/init.d/ssh start +# 后台执行SSH KEY交换脚本,实现免密登录 +nohup /opt/ssh_key_exchange.sh > exchange.log 2>&1 & + +# 如果 HADOOP_MODE 为 master,则启动 Hadoop 集群 +if [ "$HADOOP_MODE" = "master" ]; then + # 在主容器下启动 Hadoop + nohup /opt/start-hadoop.sh > hadoop_launch.log 2>&1 & +else + echo "Hadoop will not automatically start in this container. Set HADOOP_MODE to 'master' to start." +fi + +# 执行bitnami的entry脚本 + +source /opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh \ No newline at end of file diff --git a/scripts/ssh_key_exchange.sh b/scripts/ssh_key_exchange.sh new file mode 100644 index 0000000..8e5b496 --- /dev/null +++ b/scripts/ssh_key_exchange.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +HOSTS_FILE="/root/exchange_hosts" +FLAG_DIR="/root/.ssh/exchange_flags" +TEMP_PASS_FILE="/root/temp.pass" +# 重试公钥分发次数 +# 因为其他容器SSH服务还没有完全启动时,有概率会导致公钥分发失败 +MAX_RETRY=5 + +# 临时密码文件不存在,说明已经交换过了 +if [ ! -e $TEMP_PASS_FILE ]; then + echo "SSH KEY has been exchanged before, exit." + exit 0 +fi + +# 先建立RSA密钥对 +ssh-keygen -t rsa -f /root/.ssh/id_rsa -N '' + +retryCnt=0 +# 将公钥复制到其他容器 +# 利用sshpass结合ssh-copy-id命令分发本主机公钥到其他容器 +for i in $(cat $HOSTS_FILE); do + retryCnt=0 + if [ $i != "$(hostname)" ]; then + while [ $retryCnt -lt $MAX_RETRY ]; do + # 分发公钥 + # 然后在其他容器放置标记文件,表示已经分发过公钥 + # 注意一定要配置.ssh/config中的StrictHostKeyChecking,不然首次连接会有警告,导致sshpass找不到prompt + sshpass -p $(cat $TEMP_PASS_FILE) ssh-copy-id -i /root/.ssh/id_rsa.pub root@$i && \ + sshpass -p $(cat $TEMP_PASS_FILE) ssh root@$i "touch $FLAG_DIR/$(hostname)" && \ + echo "Key sent: $(hostname) -> $i" + if [ $? -eq 0 ]; then + break + else + # 分发不成功则重试 + ((retryCnt++)) + echo "Failed to send key. Will retry $retry_count/$MAX_RETRY after 5 seconds..." + sleep 5 # 重试间隔5秒 + fi + done + if [ $retryCnt -ge $MAX_RETRY ]; then # 分发失败 + echo "Failed to send key to $i !" + exit 1 + fi + fi +done + +# 本机公钥也加入authorized_keys,Hadoop启动时还要和本机进行ssh连接 +cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys +# 把本机先标记上 +touch $FLAG_DIR/$(hostname) + +# 等待整个分发过程收敛 +while true; do + # 每个容器与本机交换公钥后会在$FLAG_DIR目录下放置一个标记文件,文件名为其hostname + finished=true + for i in $(cat $HOSTS_FILE); do + if [ ! -e $FLAG_DIR/$i ]; then # 如果有的主机名还没出现,则表示还没收敛 + finished=false + break + fi + done + # 收敛 + if $finished; then + break + fi + sleep 1 +done + +# 分发完成删除临时密码文件 +rm -f $TEMP_PASS_FILE + +# 禁止密码登录 +sed -i 's/PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config +/etc/init.d/ssh restart \ No newline at end of file diff --git a/scripts/start-hadoop.sh b/scripts/start-hadoop.sh new file mode 100644 index 0000000..0555c73 --- /dev/null +++ b/scripts/start-hadoop.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +TEMP_PASS_FILE="/root/temp.pass" + +# 临时密码文件还存在就说明SSH公钥还没交换完毕,需要等待交换完毕后再启动Hadoop +while [ -e $TEMP_PASS_FILE ]; do + sleep 3 +done + +$HADOOP_HOME/sbin/start-dfs.sh +$HADOOP_HOME/sbin/start-yarn.sh diff --git a/scripts/stop-hadoop.sh b/scripts/stop-hadoop.sh new file mode 100644 index 0000000..cf9890e --- /dev/null +++ b/scripts/stop-hadoop.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$HADOOP_HOME/sbin/stop-dfs.sh +$HADOOP_HOME/sbin/stop-yarn.sh