From 298fa32133875d4a6a8652e17349c6f240e88e39 Mon Sep 17 00:00:00 2001 From: ramr Date: Mon, 13 Apr 2015 14:11:18 -0700 Subject: [PATCH 1/2] Add new ha-config keepalived failover service container code, image and scripts. --- hack/build-images.sh | 2 + images/ha-config/keepalived/.gitignore | 2 + images/ha-config/keepalived/Dockerfile | 17 + images/ha-config/keepalived/README.md | 312 ++++++++++++++++++ images/ha-config/keepalived/bin/.gitignore | 2 + .../conf/hello-openshift-template.json | 63 ++++ images/ha-config/keepalived/conf/settings.sh | 78 +++++ .../keepalived/lib/config-generators.sh | 270 +++++++++++++++ .../keepalived/lib/failover-functions.sh | 39 +++ images/ha-config/keepalived/lib/utils.sh | 132 ++++++++ images/ha-config/keepalived/makefile.test | 11 + images/ha-config/keepalived/monitor.sh | 15 + .../ha-config/keepalived/tests/echoserver.py | 36 ++ .../keepalived/tests/verify_failover_image.sh | 90 +++++ 14 files changed, 1069 insertions(+) create mode 100644 images/ha-config/keepalived/.gitignore create mode 100644 images/ha-config/keepalived/Dockerfile create mode 100644 images/ha-config/keepalived/README.md create mode 100644 images/ha-config/keepalived/bin/.gitignore create mode 100644 images/ha-config/keepalived/conf/hello-openshift-template.json create mode 100644 images/ha-config/keepalived/conf/settings.sh create mode 100755 images/ha-config/keepalived/lib/config-generators.sh create mode 100755 images/ha-config/keepalived/lib/failover-functions.sh create mode 100755 images/ha-config/keepalived/lib/utils.sh create mode 100644 images/ha-config/keepalived/makefile.test create mode 100755 images/ha-config/keepalived/monitor.sh create mode 100644 images/ha-config/keepalived/tests/echoserver.py create mode 100755 images/ha-config/keepalived/tests/verify_failover_image.sh diff --git a/hack/build-images.sh b/hack/build-images.sh index f2e457517974..ac49ea01d0d3 100755 --- a/hack/build-images.sh +++ b/hack/build-images.sh @@ -35,6 +35,7 @@ tar xzf "${OS_IMAGE_RELEASE_TAR}" -C "${imagedir}" # Copy primary binaries to the appropriate locations. cp -f "${imagedir}/openshift" images/origin/bin cp -f "${imagedir}/openshift" images/router/haproxy/bin +cp -f "${imagedir}/openshift" images/ha-config/keepalived/bin # Copy image binaries to the appropriate locations. cp -f "${imagedir}/pod" images/pod/bin @@ -54,6 +55,7 @@ image openshift/origin-pod images/pod image openshift/origin images/origin image openshift/origin-haproxy-router images/router/haproxy image openshift/origin-docker-registry images/dockerregistry +image openshift/origin-keepalived-ha-config images/ha-config/keepalived # images that depend on openshift/origin image openshift/origin-deployer images/deployer image openshift/origin-docker-builder images/builder/docker/docker-builder diff --git a/images/ha-config/keepalived/.gitignore b/images/ha-config/keepalived/.gitignore new file mode 100644 index 000000000000..a87627daf9b9 --- /dev/null +++ b/images/ha-config/keepalived/.gitignore @@ -0,0 +1,2 @@ +settings.minion* +!.gitignore diff --git a/images/ha-config/keepalived/Dockerfile b/images/ha-config/keepalived/Dockerfile new file mode 100644 index 000000000000..6408353cf792 --- /dev/null +++ b/images/ha-config/keepalived/Dockerfile @@ -0,0 +1,17 @@ +# +# VIP failover monitoring container for OpenShift Origin. +# +# ImageName: openshift/origin-keepalived-ha-config +# +# FROM fedora +FROM openshift/origin-base + +RUN yum -y install kmod keepalived iproute psmisc nc net-tools + +ADD conf/ /var/lib/openshift/ha-config/keepalived/conf/ +ADD lib/ /var/lib/openshift/ha-config/keepalived/lib/ +ADD bin/ /var/lib/openshift/ha-config/keepalived/bin/ +ADD monitor.sh /var/lib/openshift/ha-config/keepalived/ + +EXPOSE 1985 +ENTRYPOINT ["/var/lib/openshift/ha-config/keepalived/monitor.sh"] diff --git a/images/ha-config/keepalived/README.md b/images/ha-config/keepalived/README.md new file mode 100644 index 000000000000..0a0b479b052c --- /dev/null +++ b/images/ha-config/keepalived/README.md @@ -0,0 +1,312 @@ +HA Router and Failover +====================== +This readme describes steps to add multiple HA OpenShift routers with +failover capability to achieve several nines of availability. + + +Build and Test +-------------- +1. Verify docker image build and run tests. + + $ make -f makefile.test # or make -f makefile.test image + $ make -f makefile.test test + + +Pre-requisites/Prep Time +------------------------ + +1. Launch a OpenShift cluster via whatever mechanism you use. The steps + below assume you are doing this on a dev machine using vagrant. + + $ export OPENSHIFT_DEV_CLUSTER=1 + $ cd $this-repo-git-co-dir # cloned from git@github.com:ramr/origin + $ vagrant up + + +2. Wait for the cluster to come up and then start the OpenShift router + with two (_2_) replicas. + + $ vagrant ssh minion-1 # (or master or minion-2). + # Ensure KUBECONFIG is set or else set it. + [ -n "$KUBECONFIG" ] || \ + export KUBECONFIG=/openshift.local.certificates/admin/.kubeconfig + # openshift kube get dc,rc,pods,se,mi,routes + openshift ex router arparp --create --replicas=2 \ + --credentials="${KUBECONFIG}" + + +3. Wait for the Router pods to get into running state (I'm just sitting + here watching the wheels go round and round). + + $ vagrant ssh minion-1 # (or master or minion-2). + pods="openshift/origin-haproxy-router|openshift/origin-deployer" + while openshift kube get pods | egrep -e "$pods" | \ + grep "Pending" > /dev/null; do + echo -n "." + # "OkOk" + sleep 1 + done + echo "" + + +4. Check that the two OpenShift router replicas are up and serving. + + $ # This will be a bit slow, but it should return a 503 HTTP code + $ # indicating that haproxy is serving on port 80. + $ vagrant ssh minion-1 + sudo docker ps | grep "openshift/origin-haproxy-router" + curl -s -o /dev/null -w "%{http_code}\n" http://localhost/ + + $ # Repeat on minion-2: + $ vagrant ssh minion-2 + sudo docker ps | grep "openshift/origin-haproxy-router" + curl -s -o /dev/null -w "%{http_code}\n" http://localhost/ + + +5. Create an user, project and app. + + $ vagrant ssh minion-1 + # Add user and project. + openshift ex policy add-user view anypassword:test-admin + openshift ex new-project test --display-name="Failover Sample" \ + --description="Router Failover" --admin=anypassword:test-admin + # Create a test app using the template. + cd /vagrant/hack/exp/router-failover + openshift cli create -n test -f conf/hello-openshift-template.json + + echo "Wait for the app to startup and check app is reachable." + for ip in 10.245.2.3 10.245.2.4; do + curl -H "Host: hello.openshift.test" -o /dev/null -s -m 5 \ + -w "%{http_code}\n" http://$ip/ + done + echo "Ensure HTTP status code is 200 for both http://10.245.2.{3,4}" + # && echo "YAY" + + +6. Ensure you can get to the hello openshift app from inside/outside the vm. + + $ # minion-{1,2} use IPs 10.245.2.{3,4} in the dev environment. + for ip in 10.245.2.3 10.245.2.4; do + echo "$ip: $(curl -s --resolve hello.openshift.test:80:$ip \ + -m 5 http://hello.openshift.test)" + done + + +HA Routing Failover Setup +========================= + +1. Copy the router HA settings example config and edit it as needed. + + $ cd /vagrant/hack/exp/router-failover + $ cp conf/settings.example settings.minion-1 + $ cp conf/settings.example settings.minion-2 + $ # + $ # And as per your environment, set/edit the values for + $ # ADMIN_EMAILS, EMAIL_FROM, SMTP_SERVER, + $ # PRIMARY_HA_VIPS, SLAVE_HA_VIPS and INTERFACE. + +2. For demo purposes, we are going to flip the PRIMARY and SLAVE groups + on minion-2 ... this allows both minions to serve in an Active-Active + fashion. + + $ # Flip PRIMARY+SLAVE groups on minion-2 ("Papoy?! Ah Papoy!!"). + $ sed -i "s/^PRIMARY_GROUPS=\(.*\)/PRIMARY_GROUPS_OLD=\1/g; + s/^SLAVE_GROUPS=\(.*\)/PRIMARY_GROUPS=\1/g; + s/^PRIMARY_GROUPS_OLD=\(.*\)/SLAVE_GROUPS=\1/g;" \ + settings.minion-2 + + $ # Check what the differences are on the minions. + $ diff conf/settings.example settings.minion-1 + $ diff conf/settings.example settings.minion-2 + + +3. Optionally clear the config - just so that we have a completely clean + slate. Step 4 below does this - but this is here just for my demo env + reuse purposes. + + $ # Run these commands on the minions via vagrant ssh minion-{1,2} + $ # sudo service keepalived stop + $ # sudo rm -f /etc/keepalived/keepalived.conf + + $ # OkOk + for m in minion-1 minion-2; do + vagrant ssh $m -c "sudo service keepalived stop; \ + sudo rm -f /etc/keepalived/keepalived.conf" + done + + +4. Setup router HA with failover using the 2 config files we created. + + $ # Run these commands on the minions via vagrant ssh minion-{1,2} + $ # cd /vagrant/hack/exp/router-failover + $ # sudo ./failover-setup.sh settings.minion-{1,2} + + $ # OkOk - minion-1 + for m in minion-1 minion-2; do + vagrant ssh $m -c "cd /vagrant/hack/exp/router-failover; \ + sudo ./failover-setup.sh settings.$m" + done + + +5. On each minion, you can check what VIPs are being serviced by that + minion via `ip a ls dev enp0s8`. Substitute the appropriate interface + name for `enp0s8` in your environment. + + $ # "minions laughing" ... + for m in minion-1 minion-2; do + vagrant ssh $m -c "ip a ls dev enp0s8" + done + + +6. Check that you can get to the hello openshift app using the VIPs from + inside/outside the vms. + + for ip in 10.245.2.90 10.245.2.111 10.245.2.222 10.245.2.223; do + echo "$ip: $(curl -s --resolve hello.openshift.test:80:$ip \ + -m 5 http://hello.openshift.test)" + done + # && echo "YAY" + + +HA Routing Failover Demo +======================== +Whilst following the steps below, you can also monitor one of the VIPs on a +terminal on your host system. This just busy loops sending requests to a +specific VIP. + + tko="--connect-timeout 2" # Maybe use -m 2 instead. + resolver="--resolve hello.openshift.test:80:10.245.2.111" + while true; do + echo "$(date): $(curl -s $tko $resolver hello.openshift.test)" + done | tee /tmp/foo + + +HA Simple Failover Test (keepalived) +==================================== +The simplest test on VIP failover is to stop keepalived on one of the +minions. + + $ vagrant ssh minion-1 + + $ # Check which VIPs are served by this minion. + ip a ls dev enp0s8 + + $ # Make sure the VIP in the busy loop above 10.245.2.111 is + $ # "owned"/serviced by this minion. Or then use a VIP that's + $ # serviced by this minion in the above mentioned busy looper + $ # monitoring script (while true; curl ... done). + sudo service keepalived stop + + $ vagrant ssh minion-2 + # Check that the VIPs from minion-1 are taken over by this minion. + ip a ls dev enp0s8 + + $ vagrant ssh minion-1 + $ # Set things back to a "good" state by starting back keepalived. + sudo service keepalived start + + $ # Check the VIPs served by this minion. + ip a ls dev enp0s8 + + +HA Hard Failover Test (bring down the minion) +============================================= +The hard failover VIP test basically involves stopping the whole shebang +(keepalived, openshift-router and haproxy) by bringing down one of +the minions. + +1. Halt one of the minions ("Aww") ... + + $ # If you are monitoring a specific VIP ala 10.245.2.111 in the + $ # example mentioned above, then bring down the minion that's + $ # "owns" that VIP. For now, bringing a random one down. + $ vagrant halt minion-$((RANDOM%2 + 1)) + + +2. Check that you can still get to the hello openshift app using the VIPs + from inside/outside the vms. + + for ip in 10.245.2.90 10.245.2.111 10.245.2.222 10.245.2.223; do + echo "$ip: $(curl -s --resolve hello.openshift.test:80:$ip \ + -m 5 http://hello.openshift.test)" + done + $ # && echo "YAY" + + +3. Bring back the minion ("YAY") ... + + $ vagrant up minion-{1,2} + + +4. Wait for the minion to come back online. + +5. Check how the VIPs are balanced between the 2 minions. + + for m in minion-1 minion-2; do + vagrant ssh $m -c "ip a ls dev enp0s8" + done + +6. Check that you can still get to the hello openshift app using the VIPs + from inside/outside the vms. + + for ip in 10.245.2.90 10.245.2.111 10.245.2.222 10.245.2.223; do + echo "$ip: $(curl -s --resolve hello.openshift.test:80:$ip \ + -m 5 http://hello.openshift.test)" + done + $ # && echo "YAY" + + + +HA Soft Failover Test +===================== + +1. Eventually this would test the keepalived process - but for now this + just shows how long the Kubernetes Replication Controller takes to + restart the services. + + $ # Stop the router on one of the minions ("Aaw"). + $ vagrant ssh minion-$((RANDOM%2 + 1)) + sudo kill -9 $(ps -e -opid,args | grep openshift-router | \ + grep -v grep | awk '{print $1}') + $ # OR: + sudo docker rm -f $(sudo docker ps | \ + grep openshift/origin-haproxy-router | \ + awk '{print $1}') + +2. Check that you can still get to the hello openshift app using the VIPs + from inside/outside the vms. + + for ip in 10.245.2.90 10.245.2.111 10.245.2.222 10.245.2.223; do + echo "$ip: $(curl -s --resolve hello.openshift.test:80:$ip \ + -m 5 http://hello.openshift.test)" + done + $ # && echo "YAY" + $ # Wait for the router to come back up and run above check again. + + + +TODOs/Edge CASES: +----------------- + +## *Beware of the dog - it bites! You have been warned* +There's a 2 second delay (process existence check) as of now, we can +tune this up/down appropriately. +And it is pertinent to mention here that this solution is not true +fault-tolerance (100% availbility) - its just failover capability to +provide high availability (99.[9]{n}% availability - cheap but by no +means perfect). +So be aware of this and use it appropriately within your environment. + +One alternative to achieve several more 9s of availability is to + * stop keepalived immediately if the router or the docker container + running the router goes down. + * And start keepalived start it when the router comes back up because + the replication controller notices things ain't kosher. +But the bang for buck here is a low. + + +## *Sound Effects* +Link quoted sound effects (ala "OkOk") to + + http://www.soundboard.com/sb/minions + diff --git a/images/ha-config/keepalived/bin/.gitignore b/images/ha-config/keepalived/bin/.gitignore new file mode 100644 index 000000000000..d6b7ef32c847 --- /dev/null +++ b/images/ha-config/keepalived/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/images/ha-config/keepalived/conf/hello-openshift-template.json b/images/ha-config/keepalived/conf/hello-openshift-template.json new file mode 100644 index 000000000000..a0d9dde2940e --- /dev/null +++ b/images/ha-config/keepalived/conf/hello-openshift-template.json @@ -0,0 +1,63 @@ +{ + "metadata":{ + "name":"hello-service-pod-meta" + }, + "kind":"Config", + "apiVersion":"v1beta1", + "creationTimestamp":"2014-09-18T18:28:38-04:00", + "items":[ + { + "id": "hello-openshift-service", + "kind": "Service", + "apiVersion": "v1beta1", + "port": 6061, + "selector": { + "name": "hello-openshift-label" + } + }, + { + "id": "hello-openshift", + "kind": "ReplicationController", + "apiVersion": "v1beta1", + "desiredState": { + "podTemplate": { + "desiredState": { + "manifest": { + "version": "v1beta1", + "id": "", + "containers": [{ + "name": "hello-openshift-container", + "image": "openshift/hello-openshift", + "ports": [{ + "containerPort": 8080 + }] + }], + "dnsPolicy": "ClusterFirst", + "restartPolicy": { + "always": {} + }, + "volumes": null + } + }, + "labels": { + "name": "hello-openshift-label" + } + }, + "replicaSelector": { + "name": "hello-openshift-label" + }, + "replicas": 2 + } + }, + { + "kind": "Route", + "apiVersion": "v1beta1", + "metadata": { + "name": "hello-openshift-route" + }, + "id": "hello-openshift-route", + "host": "hello.openshift.test", + "serviceName": "hello-openshift-service" + } + ] +} diff --git a/images/ha-config/keepalived/conf/settings.sh b/images/ha-config/keepalived/conf/settings.sh new file mode 100644 index 000000000000..7338f8e306e7 --- /dev/null +++ b/images/ha-config/keepalived/conf/settings.sh @@ -0,0 +1,78 @@ +#!/bin/bash + + +# ======================================================================== +# Settings passed by the failover coordinator on OpenShift Origin. +# ======================================================================== + +# Name of this HA config instance. +HA_CONFIG_NAME=${OPENSHIFT_HA_CONFIG_NAME:-"OpenShift-HA-Config"} + +# HA config selector. +HA_SELECTOR=${OPENSHIFT_HA_SELECTOR:-""} + + +# List of virtual IP addresses. +# +# The value entries are comma-separated entries of the form: +# +# +# where: ipaddress-range = - +# +# Example: +# OPENSHIFT_HA_VIRTUAL_IPS="10.42.42.42,10.100.1.20-24" +# +HA_VIPS=${OPENSHIFT_HA_VIRTUAL_IPS:-""} + + +# Interface (ethernet) to use - bound by vrrp. +NETWORK_INTERFACE=${OPENSHIFT_HA_NETWORK_INTERFACE:-""} # "enp0s8" + + +# Service port to monitor for failover. +HA_MONITOR_PORT=${OPENSHIFT_HA_MONITOR_PORT:-"80"} + + + +# ======================================================================== +# Default settings - not currently exposed or overriden on OpenShift. +# ======================================================================== + +# If your environment doesn't support multicast, you can send VRRP adverts +# to a list of IPv{4,6} addresses using unicast. +# Example: +# UNICAST_PEERS="5.6.7.8,9.10.11.12,13.14.15.16" +UNICAST_PEERS=${OPENSHIFT_HA_UNICAST_PEERS:-""} + + +# List of emails to send admin messages to. If the list of email ids is +# too long, you can use a DL (distribution list) ala: +# ADMIN_EMAILS=("ramr@redhat.com" "cops@acme.org") +ADMIN_EMAILS=(${OPENSHIFT_HA_ADMIN_EMAILS:-"root@localhost"}) + +# Email sender - the from address in the email headers. +EMAIL_FROM="ha-config@openshift.local" + +# IP address of the SMTP server. +SMTP_SERVER=${OPENSHIFT_HA_SMTP_SERVER:-"127.0.0.1"} + +# SMTP connect timeout (in seconds). +SMTP_CONNECT_TIMEOUT=30 + + +# VRRP will preempt a lower priority machine when a higher priority one +# comes back online. You can change the preemption strategy to either: +# "nopreempt" - which allows the lower priority machine to maintain its +# 'MASTER' status. +# OR +# "preempt_delay 300" - waits 5 mins (in seconds) after startup to +# preempt lower priority MASTERs. +PREEMPTION="preempt_delay 300" + + +# By default, the IP for binding vrrpd is the primary IP on the above +# specified interface. If you want to hide the location of vrrpd, you can +# specify a src_addr for multicast/unicast vrrp packets. +# MULTICAST_SOURCE_IPADDRESS="1.2.3.4" +# UNICAST_SOURCE_IPADDRESS="1.2.3.4" + diff --git a/images/ha-config/keepalived/lib/config-generators.sh b/images/ha-config/keepalived/lib/config-generators.sh new file mode 100755 index 000000000000..d4421ed1c7df --- /dev/null +++ b/images/ha-config/keepalived/lib/config-generators.sh @@ -0,0 +1,270 @@ +#!/bin/bash + + +# Includes. +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + + +# Constants. +readonly CHECK_SCRIPT_NAME="chk_${HA_CONFIG_NAME//-/_}" +readonly CHECK_INTERVAL_SECS=2 +readonly VRRP_SLAVE_PRIORITY=42 + +readonly DEFAULT_PREEMPTION_STRATEGY="preempt_delay 300" + + +# +# Generate global config section. +# +# Example: +# generate_global_config arparp +# +function generate_global_config() { + local routername=$(scrub "$1") + + echo "global_defs {" + echo " notification_email {" + + for email in ${ADMIN_EMAILS[@]}; do + echo " $email" + done + + echo " }" + echo "" + echo " notification_email_from ${EMAIL_FROM:-"ha-config@openshift.local"}" + echo " smtp_server ${SMTP_SERVER:-"127.0.0.1"}" + echo " smtp_connect_timeout ${SMTP_CONNECT_TIMEOUT:-"30"}" + echo " router_id $routername" + echo "}" +} + + +# +# Generate VRRP checker script configuration section. +# +# Example: +# generate_script_config +# generate_script_config "10.1.2.3" 8080 +# +function generate_script_config() { + local serviceip=${1:-"127.0.0.1"} + local port=${2:-80} + + echo "" + echo "vrrp_script $CHECK_SCRIPT_NAME {" + echo " script \" 0 ? $nodecount : 1)) + + local idx=$((ipslot % $nodecount)) + idx=$((idx + 1)) + + local counter=1 + + for vip in ${vips}; do + local offset=$((RANDOM % 32)) + local priority=$(($((ipslot % 64)) + $offset)) + local instancetype="slave" + local n=$((counter % $idx)) + + if [ $n -eq 0 ]; then + instancetype="master" + priority=$((255 - $ipslot)) + fi + + generate_vrrpd_instance_config "$HA_CONFIG_NAME" "$counter" "$vip" \ + "$interface" "$priority" "$instancetype" + + counter=$((counter + 1)) + done +} + diff --git a/images/ha-config/keepalived/lib/failover-functions.sh b/images/ha-config/keepalived/lib/failover-functions.sh new file mode 100755 index 000000000000..faf4cab859bc --- /dev/null +++ b/images/ha-config/keepalived/lib/failover-functions.sh @@ -0,0 +1,39 @@ +#!/bin/bash + + +# Includes. +mydir=$(dirname "${BASH_SOURCE[0]}") +source "$mydir/../conf/settings.sh" +source "$mydir/utils.sh" +source "$mydir/config-generators.sh" + +# Constants. +readonly KEEPALIVED_CONFIG="/etc/keepalived/keepalived.conf" +readonly KEEPALIVED_DEFAULTS="/etc/sysconfig/keepalived" + + +function setup_failover() { + echo " - Loading ip_vs module ..." + modprobe ip_vs + + echo " - Checking if ip_vs module is available ..." + if lsmod | grep '^ip_vs'; then + echo " - Module ip_vs is loaded." + else + echo "ERROR: Module ip_vs is NOT available." + fi + + echo " - Generating and writing config to $KEEPALIVED_CONFIG" + generate_failover_config > "$KEEPALIVED_CONFIG" +} + + +function start_failover_services() { + echo " - Starting failover services ..." + + [ -f "$KEEPALIVED_DEFAULTS" ] && source "$KEEPALIVED_DEFAULTS" + + killall -9 /usr/sbin/keepalived &> /dev/null || : + /usr/sbin/keepalived $KEEPALIVED_OPTIONS -n --log-console +} + diff --git a/images/ha-config/keepalived/lib/utils.sh b/images/ha-config/keepalived/lib/utils.sh new file mode 100755 index 000000000000..98b44718f89f --- /dev/null +++ b/images/ha-config/keepalived/lib/utils.sh @@ -0,0 +1,132 @@ +#!/bin/bash + + +# Constants. +LIB_DIR=$(dirname "${BASH_SOURCE[0]}") +VBOX_INTERFACES="enp0s3 enp0s8" + + +# +# Returns "scrubbed" name - removes characters that are not alphanumeric or +# underscore and replacing dashes with underscores. +# +# Examples: +# scrub "config\!@#@$%$^&*()-+=1_{}|[]\\:;'<>?,./ha-failover" +# # -> config_1_ha_failover +# +# scrub "ha-1" # -> ha_1 +# +function scrub() { + local val=$(echo "$1" | tr -dc '[:alnum:]\-_') + echo "${val//-/_}" +} + + +# +# Expands list of virtual IP addresses. List elements can be an IP address +# range or an IP address and elements can be space or comma separated. +# +# Examples: +# expand_ip_ranges "1.1.1.1, 2.2.2.2,3.3.3.3-4 4.4.4.4" +# # -> 1.1.1.1 2.2.2.2 3.3.3.3 3.3.3.4 4.4.4.4 +# +# expand_ip_ranges "10.1.1.100-102 10.1.1.200-200 10.42.42.42" +# # -> 10.1.1.100 10.1.1.101 10.1.1.102 10.1.1.200 10.42.42.42 +# +function expand_ip_ranges() { + local vips=${1:-""} + local expandedset=() + + for iprange in $(echo "$vips" | sed 's/[^0-9\.\,-]//g' | tr "," " "); do + local ip1=$(echo "$iprange" | awk '{print $1}' FS='-') + local ip2=$(echo "$iprange" | awk '{print $2}' FS='-') + if [ -z "$ip2" ]; then + expandedset=(${expandedset[@]} "$ip1") + else + local base=$(echo "$ip1" | cut -f 1-3 -d '.') + local start=$(echo "$ip1" | awk '{print $NF}' FS='.') + local end=$(echo "$ip2" | awk '{print $NF}' FS='.') + for n in `seq $start $end`; do + expandedset=(${expandedset[@]} "${base}.$n") + done + fi + done + + echo "${expandedset[@]}" +} + + +# +# Generate base name for the VRRP instance. +# +# Examples: +# vrrp_instance_basename "arp" # -> arp_VIP +# +# vrrp_instance_basename "ha-1" # -> ha_1_VIP +# +function vrrp_instance_basename() { + echo "$(scrub "$1")_VIP" +} + + +# +# Generate VRRP instance name. +# +# Examples: +# generate_vrrp_instance_name arp 42 # -> arp_VIP_42 +# +# generate_vrrp_instance_name ha-1 # -> ha_1_VIP_0 +# +function generate_vrrp_instance_name() { + local iid=${2:-0} + echo "$(vrrp_instance_basename "$1")_${iid}" +} + + +# +# Returns the network device name to use for VRRP. +# +# Examples: +# get_network_device +# +# get_network_device "eth0" +# +function get_network_device() { + for dev in $1 ${VBOX_INTERFACES}; do + if ip addr show dev "$dev" &> /dev/null; then + echo "$dev" + return + fi + done + + ip route get 8.8.8.8 | awk '/dev/ { f=NR }; f && (NR-1 == f)' RS=" " +} + + +# +# Returns the IP address associated with a network device. +# +# Examples: +# get_device_ip_address +# +# get_device_ip_address "docker0" +# +function get_device_ip_address() { + local dev=${1:-"$(get_network_device)"} + ifconfig "$dev" | awk '/inet / { print $2 }' +} + + +# +# Get matching node count. +# +# Examples: +# get_matching_node_count +# +function get_matching_node_count() { + local bindir=$(cd -P -- "$LIB_DIR/../bin/" && pwd) + + $bindir/openshift kube get nodes -l "$1" --no-headers 2> /dev/null | \ + wc -l +} + diff --git a/images/ha-config/keepalived/makefile.test b/images/ha-config/keepalived/makefile.test new file mode 100644 index 000000000000..fb4a61c72070 --- /dev/null +++ b/images/ha-config/keepalived/makefile.test @@ -0,0 +1,11 @@ + +IMAGE_NAME="openshift/origin-keepalived-ha-config" + + +all: image + +test: image + (cd tests && ./verify_failover_image.sh) + +image: + docker build -t $(IMAGE_NAME) . diff --git a/images/ha-config/keepalived/monitor.sh b/images/ha-config/keepalived/monitor.sh new file mode 100755 index 000000000000..860a50d71f78 --- /dev/null +++ b/images/ha-config/keepalived/monitor.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Includes. +source "$(dirname "${BASH_SOURCE[0]}")/lib/failover-functions.sh" + + +# +# main(): +# +setup_failover + +start_failover_services + +echo "`basename $0`: OpenShift HA-Config failover service terminated." + diff --git a/images/ha-config/keepalived/tests/echoserver.py b/images/ha-config/keepalived/tests/echoserver.py new file mode 100644 index 000000000000..56293d3a0ffa --- /dev/null +++ b/images/ha-config/keepalived/tests/echoserver.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +""" Echo server - reply back with the received message. """ + +import os +import signal +import socket +import sys + + +def sigusr1_handler(signum, frame): + print 'signal %s received, exiting ...' % signum + sys.exit(0) + + +def setup(): + signal.signal(signal.SIGUSR1, sigusr1_handler) + + +def runserver(): + sock = socket.socket() + sock.bind(('0.0.0.0', int(os.environ.get('PORT', '12345')))) + sock.listen(10) + + while True: + c, raddr = sock.accept() + try: + d = c.recv(4096) + c.send(d if d else '') + finally: + c.close() + + +if "__main__" == __name__: + setup() + runserver() diff --git a/images/ha-config/keepalived/tests/verify_failover_image.sh b/images/ha-config/keepalived/tests/verify_failover_image.sh new file mode 100755 index 000000000000..40abd94e01b6 --- /dev/null +++ b/images/ha-config/keepalived/tests/verify_failover_image.sh @@ -0,0 +1,90 @@ +#!/bin/bash -e + +# Constants. +readonly TEST_DIR=$(dirname "${BASH_SOURCE[0]}") +readonly FAILOVER_IMAGE="openshift/origin-keepalived-ha-config" +readonly TEST_VIPS="10.0.2.100-102" +readonly MONITOR_PORT="12345" + + +function stop_echo_server() { + local pid=$1 + if [ -z "$pid" ]; then + pid=$(ps -e -opid,args | grep echoserver.py | grep -v grep | awk '{print $1}') + fi + + # Send SIGUSR1 to the echo server to terminate it. + [ -n "$pid" ] && kill -s USR1 $pid +} + + +function start_echo_server() { + stop_echo_server + + export PORT=${MONITOR_PORT} + nohup python ${TEST_DIR}/echoserver.py &> /dev/null & + echo $! +} + + +function start_failover_container() { + local cfg="-e OPENSHIFT_HA_CONFIG_NAME="roto-r00ter"" + local vips="-e OPENSHIFT_HA_VIRTUAL_IPS="${TEST_VIPS}"" + local netif="-e OPENSHIFT_HA_NETWORK_INTERFACE="enp0s3"" + local port="-e OPENSHIFT_HA_MONITOR_PORT="${MONITOR_PORT}"" + # local unicast="-e export OPENSHIFT_HA_USE_UNICAST="true"" + # local unicastpeers="-e OPENSHIFT_HA_UNICAST_PEERS="127.0.0.1"" + local selector="-e OPENSHIFT_HA_SELECTOR=""" + local envopts="$cfg $vips $netif $port $unicast $unicastpeers $selector" + + docker run -dit --net=host --privileged=true \ + -v /lib/modules:/lib/modules $envopts $FAILOVER_IMAGE & + +} + + +function run_image_verification_test() { + echo " - starting echo server ..." + local pid=$(start_echo_server) + echo " - started echo server pid=$pid ..." + + # On interrupt, cleanup - stop echo server. + trap "stop_echo_server $pid" INT + + local cname=$(start_failover_container) + echo " - started docker container $cname ..." + + # Wait a bit for all the services to startup. + sleep 10 + + # Check container is up and has keepalived processes. + local cmd="ps -ef | grep '/usr/sbin/keepalived' | grep -v grep | wc -l" + local numprocs=$(echo "$cmd" | docker exec -i $cname /bin/bash) + + # Stop echo server. + stop_echo_server $pid + + if [[ -n "$numprocs" && $numprocs -gt 0 ]]; then + # Success - print info and kill the container. + echo " - There are $numprocs keepalived processes running" + echo " - Cleaning up docker containers ..." + docker rm -f $cname + echo " - All tests PASSED." + return 0 + fi + + # Failure - print info and dump logs (keep the docker container around + # for debugging). + echo " - There are $numprocs keepalived processes running" + echo " - logs from container $cname:" + docker logs $cname || : + echo " - Test FAILED." + exit 1 +} + + +# +# main(): +# +run_image_verification_test + From 404bab8edd94491ecc62d0bebb3571141d45c344 Mon Sep 17 00:00:00 2001 From: ramr Date: Mon, 13 Apr 2015 17:48:51 -0700 Subject: [PATCH 2/2] Add HA configuration proposal. --- docs/proposals/ha-configuration.md | 186 +++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 docs/proposals/ha-configuration.md diff --git a/docs/proposals/ha-configuration.md b/docs/proposals/ha-configuration.md new file mode 100644 index 000000000000..3f44a1ba3072 --- /dev/null +++ b/docs/proposals/ha-configuration.md @@ -0,0 +1,186 @@ +# High Availability (HA) Configuration + +## Problem +In the current OpenShift operational flow, one or more HAProxy routers are +used to direct network traffic to the target services. This makes use of +HAProxy as a layer-7 load balancer to reach multiple services/applications. +This require the HAProxy routers to be "discoverable" - meaning the IP +address resolution scheme needs to know where each HAProxy router is +running. This "discovery" mechanism in most cases would be via DNS but +that doesn't preclude using some other service discovery mechanism ala +via zookeeper or etcd. + +In any case, that methodology works fine in steady state conditions. +It does, however have implications on failure conditions where the machine +running the router process goes down or the router process dies or there +is a network split. When these failure conditions arise, the above model +breaks down as it requires the caller (or something in the caller's +execution chain) to implement either a health checker or retry failed +requests and/or update the "discovery" mechanism to remove the failed +instance(s) from the traffic mix. Otherwise, a certain subset of requests +(failed/total) will fail via the upstream "discovery" mechanism (e.g. DNS). + + +## Use Cases + 1. As an administrator, I want my cluster to be assigned a resource set + and I want the cluster to automatically manage those resources. + 2. As an administrator, I want my cluster to be assigned a set of virtual + IP addresses that the cluster manages and migrates (with zero or + minimal downtime) on failure conditions. + 3. As an addendum to use case #2, the administrator should not be + required to perform any manual interaction to update the upstream + "discovery" sources (e.g. DNS). The cluster should service all the + assigned virtual IPs when atleast a single node is available - and + this should be inspite of the fact that the current available + resources are not sufficient to reach "critical mass" aka the + desired state. + + +## Goals +The goal here is to provide the OpenShift environment with one or more +floating Virtual IP addresses which can be automatically migrated across +the cluster when the target resource (the HAProxy router specific to the +above mentioned problem) is not available. + + +## Basic Concepts +This proposal adds a new admin command that allows an administrator the +ability to setup a high availability configuration on a selection of nodes. + +### Proposed Syntax (production): + + openshift admin ha-config [] + + where: + = Name of the HA configuration. + Default: generated name (e.g. ha-config-1) + = One or more of: + --type=keepalived # For now, always keepalived. + --create + --delete + --credentials= + --no-headers= + -o|--output= + --output-version= + -t, --template=