From 4780f9fffe672718ef8120663008fab2598eccc6 Mon Sep 17 00:00:00 2001 From: Vadim Rutkovsky Date: Fri, 17 May 2019 17:33:23 +0200 Subject: [PATCH] templates/master/00-master: use etcd certs for snapshot save --- ...usr-local-bin-etcd-snapshot-backup-sh.yaml | 1 + ...local-bin-openshift-recovery-tools-sh.yaml | 100 +++++++++--------- 2 files changed, 53 insertions(+), 48 deletions(-) diff --git a/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-backup-sh.yaml b/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-backup-sh.yaml index 020cf34471..5dea276177 100644 --- a/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-backup-sh.yaml +++ b/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-backup-sh.yaml @@ -38,6 +38,7 @@ contents: function run { init dl_etcdctl + backup_etcd_client_certs backup_manifest snapshot_data_dir } diff --git a/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml b/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml index 034ab0e194..00321debd6 100644 --- a/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml +++ b/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml @@ -4,7 +4,7 @@ path: "/usr/local/bin/openshift-recovery-tools" contents: inline: | #!/usr/bin/env bash - + init() { ASSET_BIN=${ASSET_DIR}/bin if [ ! -d "$ASSET_BIN" ]; then @@ -14,12 +14,12 @@ contents: done fi } - + # download and test etcdctl from upstream release assets dl_etcdctl() { GOOGLE_URL=https://storage.googleapis.com/etcd DOWNLOAD_URL=${GOOGLE_URL} - + echo "Downloading etcdctl binary.." curl -s -L ${DOWNLOAD_URL}/${ETCD_VERSION}/etcd-${ETCD_VERSION}-linux-amd64.tar.gz -o $ASSET_DIR/tmp/etcd-${ETCD_VERSION}-linux-amd64.tar.gz \ && tar -xzf $ASSET_DIR/tmp/etcd-${ETCD_VERSION}-linux-amd64.tar.gz -C $ASSET_DIR/shared --strip-components=1 \ @@ -27,7 +27,7 @@ contents: && rm $ASSET_DIR/shared/etcd \ && ETCDCTL_API=3 $ASSET_DIR/bin/etcdctl version } - + #backup etcd client certs backup_etcd_client_certs() { echo "Trying to backup etcd client certs.." @@ -48,7 +48,7 @@ contents: done fi } - + # backup current etcd-member pod manifest backup_manifest() { if [ -e "${ASSET_DIR}/backup/etcd-member.yaml" ]; then @@ -58,7 +58,7 @@ contents: cp ${ETCD_MANIFEST} ${ASSET_DIR}/backup/ fi } - + # backup etcd.conf backup_etcd_conf() { if [ -e "${ASSET_DIR}/backup/etcd.conf" ]; then @@ -68,7 +68,7 @@ contents: cp /etc/etcd/etcd.conf ${ASSET_DIR}/backup/ fi } - + backup_data_dir() { if [ -f "$ASSET_DIR/backup/etcd/member/snap/db" ]; then echo "etcd data-dir backup found $ASSET_DIR/backup/etcd.." @@ -81,7 +81,11 @@ contents: } snapshot_data_dir() { - ETCDCTL_API=3 ${ETCDCTL} snapshot save ${SNAPSHOT_FILE} + ETCDCTL_API=3 ${ETCDCTL} \ + --cert $ASSET_DIR/backup/etcd-client.crt \ + --key $ASSET_DIR/backup/etcd-client.key \ + --cacert $ASSET_DIR/backup/etcd-ca-bundle.crt \ + snapshot save ${SNAPSHOT_FILE} } # backup etcd peer, server and metric certs @@ -98,20 +102,20 @@ contents: cp $ETCD_STATIC_RESOURCES/system\:etcd-* $ASSET_DIR/backup/ fi } - + # stop etcd by moving the manifest out of /etcd/kubernetes/manifests # we wait for all etcd containers to die. stop_etcd() { echo "Stopping etcd.." - + if [ ! -d "$MANIFEST_STOPPED_DIR" ]; then mkdir $MANIFEST_STOPPED_DIR fi - + if [ -e "$ETCD_MANIFEST" ]; then mv $ETCD_MANIFEST $MANIFEST_STOPPED_DIR fi - + for name in {etcd-member,etcd-metric} do while [ ! -z "$(crictl pods -name $name --state Ready -q)" ]; do @@ -120,12 +124,12 @@ contents: done done } - + remove_data_dir() { echo "Removing etcd data-dir ${ETCD_DATA_DIR}" rm -rf ${ETCD_DATA_DIR} } - + remove_certs() { COUNT=$(ls $ETCD_STATIC_RESOURCES/system\:etcd-* 2>/dev/null | wc -l) if [ "$COUNT" -gt 1 ]; then @@ -133,7 +137,7 @@ contents: rm -f $ETCD_STATIC_RESOURCES/system\:etcd-* fi } - + restore_snapshot() { HOSTNAME=$(hostname) HOSTDOMAIN=$(hostname -d) @@ -143,38 +147,38 @@ contents: fi source /run/etcd/environment - + if [ ! -f "$SNAPSHOT_FILE" ]; then echo "Snapshot file not found, restore failed: $SNAPSHOT_FILE." exit 1 fi - + sleep 2 - + echo "Restoring etcd member $ETCD_NAME from snapshot.." - + env ETCDCTL_API=3 ${ETCDCTL} snapshot restore $SNAPSHOT_FILE \ --name $ETCD_NAME \ --initial-cluster ${ETCD_CONNSTRING} \ --initial-cluster-token etcd-cluster-1 \ --skip-hash-check=true \ --initial-advertise-peer-urls https://${ETCD_IPV4_ADDRESS}:2380 \ - --data-dir $ETCD_DATA_DIR + --data-dir $ETCD_DATA_DIR } - + patch_manifest() { echo "Patching etcd-member manifest.." cp $ASSET_DIR/backup/etcd-member.yaml $ASSET_DIR/tmp/etcd-member.yaml.template sed -i /' '--discovery-srv/d $ASSET_DIR/tmp/etcd-member.yaml.template mv $ASSET_DIR/tmp/etcd-member.yaml.template $MANIFEST_STOPPED_DIR/etcd-member.yaml } - + # generate a kubeconf like file for the cert agent to consume and contact signer. gen_config() { CA=$(base64 $ASSET_DIR/backup/etcd-ca-bundle.crt | tr -d '\n') CERT=$(base64 $ASSET_DIR/backup/etcd-client.crt | tr -d '\n') KEY=$(base64 $ASSET_DIR/backup/etcd-client.key | tr -d '\n') - + cat > $ETCD_STATIC_RESOURCES/.recoveryconfig << EOF clusters: - cluster: @@ -195,23 +199,23 @@ contents: client-key-data: ${KEY} EOF } - + # add member cluster etcd_member_add() { source /run/etcd/environment HOSTNAME=$(hostname) HOSTDOMAIN=$(hostname -d) ETCD_NAME=etcd-member-${HOSTNAME}.${HOSTDOMAIN} - + if [ -d "$ETCD_DATA_DIR" ]; then rm -rf $ETCD_DATA_DIR fi - + echo "Updating etcd membership.." - + RESPONSE=$(env ETCDCTL_API=3 $ETCDCTL --cert $ASSET_DIR/backup/etcd-client.crt --key $ASSET_DIR/backup/etcd-client.key --cacert $ASSET_DIR/backup/etcd-ca-bundle.crt \ --endpoints ${RECOVERY_SERVER_IP}:2379 member add $ETCD_NAME --peer-urls=https://${ETCD_DNS_NAME}:2380) - + if [ $? -eq 0 ]; then echo "$RESPONSE" APPEND_CONF=$(echo "$RESPONSE" | sed -e '1,2d') @@ -221,24 +225,24 @@ contents: exit 1 fi } - + start_etcd() { echo "Starting etcd.." mv ${MANIFEST_STOPPED_DIR}/etcd-member.yaml $MANIFEST_DIR } - + download_cert_recover_template() { curl -s https://raw.githubusercontent.com/hexfusion/openshift-recovery/master/manifests/etcd-generate-certs.yaml.template -o $ASSET_DIR/templates/etcd-generate-certs.yaml.template } - + populate_template() { FIND="$1" REPLACE="$2" TEMPLATE="$3" OUT="$4" - + echo "Populating template $TEMPLATE" - + if [ -z "$FIND" ] || [ -z "$REPLACE" ] || [ -z "$TEMPLATE" ] || [ -z "$OUT" ]; then echo "populate_template requires 4 arguments FIND, REPLACE, TEMPLATE and OUT" exit 1 @@ -246,33 +250,33 @@ contents: echo "template $TEMPLATE does not exist" exit 1 fi - + TMP_FILE=$(date +"%m-%d-%Y-%H%M") cp $TEMPLATE "$ASSET_DIR/tmp/${TMP_FILE}" - + sed -i "s|${FIND}|${REPLACE}|" "$ASSET_DIR/tmp/${TMP_FILE}" mv "$ASSET_DIR/tmp/${TMP_FILE}" "$OUT" } - + start_cert_recover() { echo "Starting etcd client cert recovery agent.." mv ${MANIFEST_STOPPED_DIR}/etcd-generate-certs.yaml $MANIFEST_DIR } - + verify_certs() { while [ "$(ls $ETCD_STATIC_RESOURCES | wc -l)" -lt 9 ]; do echo "Waiting for certs to generate.." sleep 10 done } - + stop_cert_recover() { echo "Stopping cert recover.." - + if [ -f "${CONFIG_FILE_DIR}/manifests/etcd-generate-certs.yaml" ]; then mv ${CONFIG_FILE_DIR}/manifests/etcd-generate-certs.yaml $MANIFEST_STOPPED_DIR fi - + for name in {generate-env,generate-certs}; do while [ ! -z "$(crictl pods -name $name --state Ready -q)" ]; do echo "Waiting for $name to stop" @@ -280,22 +284,22 @@ contents: done done } - + stop_static_pods() { echo "Stopping all static pods.." - + if [ ! -d "$MANIFEST_STOPPED_DIR" ]; then mkdir $MANIFEST_STOPPED_DIR fi - + find ${MANIFEST_DIR} -maxdepth 1 -type f -printf "%f\n" > $STOPPED_STATIC_PODS - + while read STATIC_POD; do echo "..stopping $STATIC_POD" mv ${MANIFEST_DIR}/${STATIC_POD} $MANIFEST_STOPPED_DIR done <$STOPPED_STATIC_PODS } - + start_static_pods() { echo "Starting static pods.." while read STATIC_POD; do @@ -303,17 +307,17 @@ contents: mv ${MANIFEST_STOPPED_DIR}/${STATIC_POD} $MANIFEST_DIR done <$STOPPED_STATIC_PODS } - + stop_kubelet() { echo "Stopping kubelet.." systemctl stop kubelet.service } - + start_kubelet() { echo "Starting kubelet.." systemctl start kubelet.service } - + stop_all_containers() { echo "Stopping all containers.." crictl ps -q | xargs -r crictl stop