-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Run ssh bastion during openshift-tests #4161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -144,9 +144,44 @@ objects: | |
| oc create -f /tmp/cluster/insights-live.yaml || true | ||
| fi | ||
|
|
||
| # set up cloud-provider-specific env vars | ||
| export KUBE_SSH_BASTION="$( oc --insecure-skip-tls-verify get node -l node-role.kubernetes.io/master -o 'jsonpath={.items[0].status.addresses[?(@.type=="ExternalIP")].address}' ):22" | ||
| # set up SSH for the e2e tests + for this script | ||
| function setup_ssh_bastion() { | ||
| echo "Setting up ssh bastion" | ||
| mkdir -p ~/.ssh | ||
| cp "${KUBE_SSH_KEY_PATH}" ~/.ssh/id_rsa | ||
| chmod 0600 ~/.ssh/id_rsa | ||
| if ! whoami &> /dev/null; then | ||
| if [[ -w /etc/passwd ]]; then | ||
| echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd | ||
| fi | ||
| fi | ||
| curl https://raw.githubusercontent.com/eparis/ssh-bastion/master/deploy/deploy.sh | bash | ||
| for i in $(seq 0 60) | ||
| do | ||
| # AWS fills only .hostname of a service | ||
| BASTION_HOST=$(oc get service -n "${SSH_BASTION_NAMESPACE}" ssh-bastion -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') | ||
| if [[ -n "${BASTION_HOST}" ]]; then break; fi | ||
| # Azure fills only .ip of a service. Use it as bastion host. | ||
| BASTION_HOST=$(oc get service -n "${SSH_BASTION_NAMESPACE}" ssh-bastion -o jsonpath='{.status.loadBalancer.ingress[0].ip}') | ||
| if [[ -n "${BASTION_HOST}" ]]; then break; fi | ||
| echo "Waiting for SSH bastion load balancer service" | ||
| sleep 10 | ||
| done | ||
| } | ||
|
|
||
| function bastion_ssh() { | ||
| retry 60 \ | ||
| ssh -o LogLevel=error -o ConnectionAttempts=100 -o ConnectTimeout=30 -o StrictHostKeyChecking=no \ | ||
| -o ProxyCommand="ssh -A -o StrictHostKeyChecking=no -o LogLevel=error -o ServerAliveInterval=30 -o ConnectionAttempts=100 -o ConnectTimeout=30 -W %h:%p core@${BASTION_HOST} 2>/dev/null" \ | ||
| $@ | ||
| } | ||
|
|
||
| export SSH_BASTION_NAMESPACE="testing-ssh-bastion" | ||
| export KUBE_SSH_KEY_PATH=/tmp/cluster/ssh-privatekey | ||
| setup_ssh_bastion | ||
| export KUBE_SSH_BASTION="${BASTION_HOST}:22" | ||
|
|
||
| # set up cloud-provider-specific env vars | ||
| if [[ "${CLUSTER_TYPE}" == "gcp" ]]; then | ||
| export GOOGLE_APPLICATION_CREDENTIALS="/tmp/cluster/gce.json" | ||
| export KUBE_SSH_USER=cloud-user | ||
|
|
@@ -212,32 +247,6 @@ objects: | |
| if [ "${RETRY_IGNORE_EXIT_CODE}" != "" ]; then return 0; else return "${rc}"; fi | ||
| } | ||
|
|
||
| function setup_ssh_bastion() { | ||
| echo "Setting up ssh bastion" | ||
| mkdir -p ~/.ssh || true | ||
| cp "${KUBE_SSH_KEY_PATH}" ~/.ssh/id_rsa | ||
| chmod 0600 ~/.ssh/id_rsa | ||
| if ! whoami &> /dev/null; then | ||
| if [ -w /etc/passwd ]; then | ||
| echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd | ||
| fi | ||
| fi | ||
| curl https://raw.githubusercontent.com/eparis/ssh-bastion/master/deploy/deploy.sh | bash | ||
| for i in $(seq 0 60) | ||
| do | ||
| BASTION_HOST=$(oc get service -n openshift-ssh-bastion ssh-bastion -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') | ||
| if [ ! -z "${BASTION_HOST}" ]; then break; fi | ||
| sleep 10 | ||
| done | ||
| } | ||
|
|
||
| function bastion_ssh() { | ||
| retry 60 \ | ||
| ssh -o LogLevel=error -o ConnectionAttempts=100 -o ConnectTimeout=30 -o StrictHostKeyChecking=no \ | ||
| -o ProxyCommand="ssh -A -o StrictHostKeyChecking=no -o LogLevel=error -o ServerAliveInterval=30 -o ConnectionAttempts=100 -o ConnectTimeout=30 -W %h:%p core@${BASTION_HOST} 2>/dev/null" \ | ||
| $@ | ||
| } | ||
|
|
||
| function restore-cluster-state() { | ||
| echo "Placing file /etc/rollback-test with contents A" | ||
| cat > /tmp/machineconfig.yaml <<'EOF' | ||
|
|
@@ -268,7 +277,6 @@ objects: | |
|
|
||
| wait_for_machineconfigpool_to_apply | ||
|
|
||
| setup_ssh_bastion | ||
|
|
||
| echo "Make etcd backup on first master - /usr/local/bin/etcd-snapshot-backup.sh" | ||
| FIRST_MASTER=$(oc get node -l node-role.kubernetes.io/master= -o name | head -n1 | cut -d '/' -f 2) | ||
|
|
@@ -343,17 +351,12 @@ objects: | |
|
|
||
| if [[ "${rc}" == "1" ]]; then exit 1; fi | ||
|
|
||
| echo "Removing ssh-bastion" | ||
|
||
| oc delete project openshift-ssh-bastion | ||
|
|
||
| echo "Remove existing openshift-apiserver pods" | ||
| # This would ensure "Pod 'openshift-apiserver/apiserver-xxx' is not healthy: container openshift-apiserver has restarted more than 5 times" test won't fail | ||
| oc delete pod --all -n openshift-apiserver | ||
| } | ||
|
|
||
| function recover-from-etcd-quorum-loss() { | ||
| setup_ssh_bastion | ||
|
|
||
| # Machine API won't let the user to destroy the node which runs the controller | ||
| echo "Finding two masters to destroy" | ||
| MAPI_POD=$(oc get pod -l k8s-app=controller -n openshift-machine-api --no-headers -o name) | ||
|
|
@@ -516,9 +519,6 @@ objects: | |
| retry 10 oc wait pod/etcd-member-${master} -n openshift-etcd --for condition=Ready | ||
| done | ||
|
|
||
| echo "Removing ssh-bastion" | ||
| retry 10 oc delete project openshift-ssh-bastion | ||
|
|
||
| echo "Scale etcd-quorum guard" | ||
| retry 10 oc scale --replicas=3 deployment.apps/etcd-quorum-guard -n openshift-machine-config-operator | ||
|
|
||
|
|
@@ -850,6 +850,9 @@ objects: | |
| mkdir -p /tmp/artifacts/must-gather | ||
| queue /tmp/artifacts/must-gather/must-gather.log oc --insecure-skip-tls-verify adm must-gather --dest-dir /tmp/artifacts/must-gather | ||
|
|
||
| echo "Removing ssh-bastion ..." | ||
| queue /dev/null oc --insecure-skip-tls-verify --request-timeout=5s delete project testing-ssh-bastion | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you doing this? Why isn't this being torn down by the cluster tear down?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was requested in #4161 (review)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Service load balancers aren't leaked. |
||
|
|
||
| echo "Waiting for logs ..." | ||
| wait | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic should be in the script, let's get Eric to fix his bastion