Skip to content
This repository has been archived by the owner on Feb 17, 2023. It is now read-only.

Commit

Permalink
Make the deploy ceph script repeatable and fault tolerant
Browse files Browse the repository at this point in the history
  • Loading branch information
JayjeetAtGithub committed Jul 24, 2021
1 parent b12d02d commit 93c008c
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 36 deletions.
117 changes: 104 additions & 13 deletions cpp/src/arrow/adapters/arrow-rados-cls/scripts/deploy_ceph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,45 +16,120 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
set -e
set -eu

if [[ $# -lt 4 ]] ; then
echo "usage: ./deploy_ceph.sh [mon hosts] [osd hosts] [mds hosts] [mgr hosts]"
if [[ $# -lt 6 ]] ; then
echo "usage: ./deploy_ceph.sh [mon hosts] [osd hosts] [mds hosts] [mgr hosts] [blkdevice] [pool size]"
echo " "
echo "for example: ./deploy_ceph.sh node1,node2,node3 node4,node5,node6 node1 node1"
echo "for example: ./deploy_ceph.sh node1,node2,node3 node4,node5,node6 node1 node1 /dev/sdb 3"
exit 1
fi

MON=$1
OSD=$2
MDS=$3
MGR=$4
# in default mode (without any arguments), deploy a single OSD Ceph cluster
MON=${1:-node1}
OSD=${2:-node1}
MDS=${3:-node1}
MGR=${4:-node1}
BLKDEV=${5:-/dev/nvme0n1p4}
POOL_SIZE=${6:-1}

# split the comma separated nodes into a list
IFS=',' read -ra MON_LIST <<< "$MON"; unset IFS
IFS=',' read -ra OSD_LIST <<< "$OSD"; unset IFS
IFS=',' read -ra MDS_LIST <<< "$MDS"; unset IFS
IFS=',' read -ra MGR_LIST <<< "$MGR"; unset IFS

MON_LIST=${MON_LIST[@]}
OSD_LIST=${OSD_LIST[@]}
MDS_LIST=${MDS_LIST[@]}
MGR_LIST=${MGR_LIST[@]}

# disable host key checking
cat > ~/.ssh/config << EOF
Host *
StrictHostKeyChecking no
EOF

# delete CephFS application along with any mountpoint
function delete_cephfs {
echo "deleting cephfs"
fusermount -uz /mnt/cephfs || true
rm -rf /mnt/cephfs
ceph fs fail cephfs || true
ceph fs rm cephfs --yes-i-really-mean-it || true
}

# delete the trailing pools
function delete_pools {
echo "deleting pools"
ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it || true
ceph osd pool delete cephfs_metadata cephfs_metadata --yes-i-really-really-mean-it || true
ceph osd pool delete device_health_metrics device_health_metrics --yes-i-really-really-mean-it || true
}

# delete trailing OSD daemons
function delete_osds {
# kill the daemon and format the LVM partition
for node in ${OSD_LIST}; do
ssh $node pkill ceph-osd || true
# try to zap the block devices.
ssh $node ceph-volume lvm zap $BLKDEV --destroy || true
ssh $node rm -rf /etc/ceph/*
done

# remove all the OSDs
NUM_OSDS=${#OSD_LIST[@]}
for ((i=0; i<$NUM_OSDS; i++)); do
# mark an OSD down and remove it
ceph osd down osd.${i} || true
ceph osd out osd.${i} || true
ceph osd rm osd.${i} || true

# remove entry from crush map
ceph osd crush rm osd.${i} || true

# remove auth entry
ceph auth del osd.${i} || true
done
}

# kill the MONs, MGRs and MDSs.
function delete_mon_mgr_mds {
for node in ${MON_LIST}; do
ssh $node rm -rf /etc/ceph/*
ssh $node pkill ceph-mon || true
done
for node in ${MGR_LIST}; do
ssh $node rm -rf /etc/ceph/*
ssh $node pkill ceph-mgr || true
done
for node in ${MDS_LIST}; do
ssh $node rm -rf /etc/ceph/*
ssh $node pkill ceph-mds || true
done
}

echo "[0] cleaning up a previous Ceph installation"
# clean the cluster
delete_cephfs
delete_pools
delete_osds
delete_mon_mgr_mds

# clean the old workspace
rm -rf /tmp/deployment
rm -rf /tmp/ceph-deploy
rm -rf /etc/ceph/*

echo "[1] installing common packages"
apt update
apt install -y python3-venv python3-pip ceph-fuse ceph-common
apt install -y python3-venv python3-pip ceph-fuse ceph-common attr

echo "[2] installing ceph-deploy"
git clone https://github.com/ceph/ceph-deploy /tmp/ceph-deploy
pip3 install --upgrade /tmp/ceph-deploy

mkdir /tmp/deployment
cd /tmp/deployment/
cd /tmp/deployment

echo "[3] initializng Ceph config"
ceph-deploy new $MON_LIST
Expand All @@ -69,23 +144,32 @@ ceph-deploy admin $MON_LIST
echo "[6] deploying MGRs"
ceph-deploy mgr create $MGR_LIST

# update the Ceph config to allow pool deletion and to recognize object class libs.
cat >> ceph.conf << EOF
mon allow pool delete = true
osd class load list = *
osd op threads = 16
EOF

# deploy the updated Ceph config and restat the MONs for the config to take effect
ceph-deploy --overwrite-conf config push $MON_LIST $OSD_LIST $MDS_LIST $MGR_LIST
for node in ${MON_LIST}; do
ssh $node systemctl restart ceph-mon.target
done

# copy the config to the default location on the admin node
cp ceph.conf /etc/ceph/ceph.conf
cp ceph.client.admin.keyring /etc/ceph/ceph.client.admin.keyring

# pause and let user's can take a quick look if everything is fine before deploying OSDs
ceph -s
sleep 5

echo "[7] deploying OSDs"
for node in ${OSD_LIST}; do
scp /tmp/deployment/ceph.bootstrap-osd.keyring $node:/etc/ceph/ceph.keyring
scp /tmp/deployment/ceph.bootstrap-osd.keyring $node:/var/lib/ceph/bootstrap-osd/ceph.keyring
ceph-deploy osd create --data /dev/nvme0n1p4 $node
ceph-deploy osd create --data $BLKDEV $node
done

echo "[8] deploying MDSs"
Expand All @@ -94,8 +178,15 @@ ceph-deploy mds create $MDS_LIST
echo "[9] creating pools for deploying CephFS"
ceph osd pool create cephfs_data 128
ceph osd pool create cephfs_metadata 16

# turn off pg autoscale
ceph osd pool set cephfs_data pg_autoscale_mode off

# set the pool sizes based on commandline arguments
ceph osd pool set cephfs_data size $POOL_SIZE
ceph osd pool set cephfs_metadata size $POOL_SIZE
ceph osd pool set device_health_metrics size $POOL_SIZE

echo "[9] deploying CephFS"
ceph fs new cephfs cephfs_metadata cephfs_data
mkdir -p /mnt/cephfs
Expand All @@ -104,5 +195,5 @@ echo "[10] mounting CephFS at /mnt/cephfs"
sleep 5
ceph-fuse /mnt/cephfs

echo "Done."
echo "Ceph deployed successfully !"
ceph -s
52 changes: 29 additions & 23 deletions cpp/src/arrow/adapters/arrow-rados-cls/scripts/deploy_skyhook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
set -eu

if [[ $# -lt 2 ]] ; then
echo "./deploy_skyhook.sh [nodes] [skyhook-branch]"
echo "./deploy_skyhook.sh [nodes] [skyhook-branch] [deploy cls libs] [build python bindings]"
exit 1
fi

NODES=$1
BRANCH=$2
DEPLOY_CLS_LIBS=${3:-true}
BUILD_PYTHON_BINDINGS=${4:-true}

IFS=',' read -ra NODE_LIST <<< "$NODES"; unset IFS

Expand All @@ -44,32 +46,36 @@ cd cpp/release
cmake -DARROW_CLS=ON -DARROW_PARQUET=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_ZLIB=ON -DARROW_BUILD_EXAMPLES=ON -DPARQUET_BUILD_EXAMPLES=ON -DARROW_PYTHON=ON -DARROW_DATASET=ON -DARROW_CSV=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_ZSTD=ON ..
make -j4 install

export WORKDIR=${WORKDIR:-$HOME}
export ARROW_HOME=$WORKDIR/dist
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_RADOS=1
if [[ "${BUILD_PYTHON_BINDINGS}" == "true" ]]; then
export WORKDIR=${WORKDIR:-$HOME}
export ARROW_HOME=$WORKDIR/dist
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_RADOS=1

mkdir -p /root/dist/lib
mkdir -p /root/dist/include
mkdir -p /root/dist/lib
mkdir -p /root/dist/include

cp -r /usr/local/lib/. /root/dist/lib
cp -r /usr/local/include/. /root/dist/include
cp -r /usr/local/lib/. /root/dist/lib
cp -r /usr/local/include/. /root/dist/include

cd /tmp/arrow/python
pip3 install -r requirements-build.txt -r requirements-test.txt
pip3 install wheel
rm -rf dist/*
python3 setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel
pip3 install --upgrade dist/*.whl
cd /tmp/arrow/python
pip3 install -r requirements-build.txt -r requirements-test.txt
pip3 install wheel
rm -rf dist/*
python3 setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel
pip3 install --upgrade dist/*.whl
fi

cd /tmp/arrow/cpp/release/release
for node in ${NODE_LIST[@]}; do
scp libcls* $node:/usr/lib/rados-classes/
scp libarrow* $node:/usr/lib/
scp libparquet* $node:/usr/lib/
ssh $node systemctl restart ceph-osd.target
done
if [[ "${DEPLOY_CLS_LIBS}" == "true" ]]; then
cd /tmp/arrow/cpp/release/release
for node in ${NODE_LIST[@]}; do
scp libcls* $node:/usr/lib/rados-classes/
scp libarrow* $node:/usr/lib/
scp libparquet* $node:/usr/lib/
ssh $node systemctl restart ceph-osd.target
done
fi

export LD_LIBRARY_PATH=/usr/local/lib
cp /usr/local/lib/libparq* /usr/lib/

0 comments on commit 93c008c

Please sign in to comment.