diff --git a/templates/master/00-master/_base/files/usr-local-bin-etcd-member-recover-sh.yaml b/templates/master/00-master/_base/files/usr-local-bin-etcd-member-recover-sh.yaml index 7263d7c538..de4c1a1af9 100644 --- a/templates/master/00-master/_base/files/usr-local-bin-etcd-member-recover-sh.yaml +++ b/templates/master/00-master/_base/files/usr-local-bin-etcd-member-recover-sh.yaml @@ -33,6 +33,7 @@ contents: ASSET_DIR_TMP="$ASSET_DIR/tmp" CONFIG_FILE_DIR=/etc/kubernetes MANIFEST_DIR="${CONFIG_FILE_DIR}/manifests" + RUN_ENV=/run/etcd/environment MANIFEST_STOPPED_DIR="$ASSET_DIR/manifests-stopped" ETCD_MANIFEST="${MANIFEST_DIR}/etcd-member.yaml" @@ -51,6 +52,12 @@ contents: init dl_etcdctl backup_manifest + DISCOVERY_DOMAIN=$(grep -oP '(?<=discovery-srv=).*[^"]' $ASSET_DIR/backup/etcd-member.yaml ) + if [ -z "$DISCOVERY_DOMAIN" ]; then + echo "Discovery domain can not be extracted from $ASSET_DIR/backup/etcd-member.yaml" + exit 1 + fi + validate_environment backup_etcd_conf backup_etcd_client_certs stop_etcd @@ -58,11 +65,6 @@ contents: backup_certs remove_certs gen_config - DISCOVERY_DOMAIN=$(grep -oP '(?<=discovery-srv=).*[^"]' $ASSET_DIR/backup/etcd-member.yaml ) - if [ -z "$DISCOVERY_DOMAIN" ]; then - echo "Discovery domain can not be extracted from $ASSET_DIR/backup/etcd-member.yaml" - exit 1 - fi CLUSTER_NAME=$(echo ${DISCOVERY_DOMAIN} | grep -oP '^.*?(?=\.)') populate_template '__ETCD_DISCOVERY_DOMAIN__' "$DISCOVERY_DOMAIN" "$TEMPLATE" "$ASSET_DIR/tmp/etcd-generate-certs.stage1" populate_template '__SETUP_ETCD_ENVIRONMENT__' "$SETUP_ETCD_ENVIRONMENT" "$ASSET_DIR/tmp/etcd-generate-certs.stage1" "$ASSET_DIR/tmp/etcd-generate-certs.stage2" diff --git a/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-restore-sh.yaml b/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-restore-sh.yaml index f79e2d9f2a..89492b41f5 100644 --- a/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-restore-sh.yaml +++ b/templates/master/00-master/_base/files/usr-local-bin-etcd-snapshot-restore-sh.yaml @@ -4,47 +4,58 @@ path: "/usr/local/bin/etcd-snapshot-restore.sh" contents: inline: | #!/usr/bin/env bash - + set -o errexit set -o pipefail - + # example - # etcd-snapshot-restore.sh $path-to-snapshot $etcd-connection-string + if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" exit 1 fi - - ASSET_DIR=./assets - SNAPSHOT_FILE="${ASSET_DIR}/backup/etcd/member/snap/db" - - if [ "$1" != "" ]; then - SNAPSHOT_FILE="$1" + + usage () { + echo 'Path to snapshot is required: ./etcd-member-recover.sh $path-to-snapshot' + exit + } + + if [ "$1" == "" ]; then + usage fi - + SNAPSHOT_FILE="$1" ETCD_CONNSTRING="" if [ "$2" != "" ]; then ETCD_CONNSTRING="$2" fi + ASSET_DIR=./assets CONFIG_FILE_DIR=/etc/kubernetes MANIFEST_DIR="${CONFIG_FILE_DIR}/manifests" MANIFEST_STOPPED_DIR="${ASSET_DIR}/manifests-stopped" + RUN_ENV=/run/etcd/environment + ETCD_VERSION=v3.3.10 ETCDCTL="${ASSET_DIR}/bin/etcdctl" ETCD_DATA_DIR=/var/lib/etcd ETCD_MANIFEST="${MANIFEST_DIR}/etcd-member.yaml" ETCD_STATIC_RESOURCES="${CONFIG_FILE_DIR}/static-pod-resources/etcd-member" STOPPED_STATIC_PODS="${ASSET_DIR}/tmp/stopped-static-pods" - + source "/usr/local/bin/openshift-recovery-tools" - + function run { init dl_etcdctl backup_manifest + DISCOVERY_DOMAIN=$(grep -oP '(?<=discovery-srv=).*[^"]' $ASSET_DIR/backup/etcd-member.yaml ) + if [ -z "$DISCOVERY_DOMAIN" ]; then + echo "Discovery domain can not be extracted from $ASSET_DIR/backup/etcd-member.yaml" + exit 1 + fi + validate_environment stop_static_pods stop_etcd stop_kubelet @@ -55,5 +66,5 @@ contents: start_static_pods start_kubelet } - + run diff --git a/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml b/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml index 29138b82d8..385b53d9c9 100644 --- a/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml +++ b/templates/master/00-master/_base/files/usr-local-bin-openshift-recovery-tools-sh.yaml @@ -320,3 +320,42 @@ contents: echo "Stopping all containers.." crictl ps -q | xargs -r crictl stop } + + # validate_environment performs the same actions as the discovery container in etcd-member init + # sometimes $RUN_ENV is not available if the node is rebooted so we recreate here. + validate_environment() { + if [ -f "$RUN_ENV" ] && [ -s "$RUN_ENV" ];then + return 0 + fi + SRV_A_RECORD=$(dig +noall +answer SRV _etcd-server-ssl._tcp.${DISCOVERY_DOMAIN} | grep -oP '(?<=2380 ).*[^\.]' | xargs) + HOST_IPS=$(ip -o addr | grep -oP '(?<=inet )(\d{1,3}\.?){4}') + + if [ -z "$SRV_A_RECORD" ]; then + echo "SRV A record query for ${DISCOVERY_DOMAIN} failed please update DNS" + exit 1 + elif [ -z "$HOST_IPS" ]; then + echo "Unable to find any IPv4 addresses for host interfaces" + exit 1 + fi + + for a in ${SRV_A_RECORD[@]}; do + echo "checking against $a" + for i in ${HOST_IPS[@]}; do + DIG_IP=$(dig +short $a) + if [ -z "$DIG_IP" ]; then + echo "No matching A record found for $a skipping" + continue + elif [ "$DIG_IP" == "$i" ]; then + echo "dns name is $a" + cat > $RUN_ENV << EOF + ETCD_IPV4_ADDRESS=$DIG_IP + ETCD_DNS_NAME=$a + ETCD_WILDCARD_DNS_NAME=*.${DISCOVERY_DOMAIN} + EOF + return 0 + fi + done + done + echo "SRV query failed no matching records found" + exit 1 + }