diff --git a/ci-operator/jobs/openshift/release/openshift-release-release-4.3-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-release-4.3-periodics.yaml index 62ca30bb58d9d..0235fc91a7d93 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-release-4.3-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-release-4.3-periodics.yaml @@ -6871,6 +6871,291 @@ periodics: - name: release-pull-secret secret: secretName: ci-pull-credentials +- agent: kubernetes + cluster: api.ci + decorate: true + decoration_config: + skip_cloning: true + interval: 24h + labels: + ci.openshift.io/release-type: informing + job-env: openstack + job-release: "4.3" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: release-openshift-ocp-installer-e2e-openstack-ppc64le-4.3 + spec: + containers: + - args: + - --artifact-dir=$(ARTIFACTS) + - --give-pr-author-access-to-namespace=true + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --input-hash=$(BUILD_ID) + - --input-hash=$(JOB_NAME) + - --kubeconfig=/etc/apici/kubeconfig + - --lease-server-password-file=/etc/boskos/password + - --lease-server-username=ci + - --lease-server=https://boskos-ci.svc.ci.openshift.org + - --secret-dir=/usr/local/e2e-openstack-cluster-profile + - --secret-dir=/usr/local/pull-secret + - --target=e2e-openstack + - --template=/usr/local/e2e-openstack + command: + - ci-operator + env: + - name: MULTIARCH_RELEASE_IMAGE_LATEST + value: registry.svc.ci.openshift.org/ocp-ppc64le/release-ppc64le:4.3 + - name: RELEASE_IMAGE_LATEST + value: registry.svc.ci.openshift.org/ocp/release:4.3 + - name: BRANCH + value: "4.3" + - name: CLUSTER_TYPE + value: openstack-ppc64le + - name: CONFIG_SPEC + value: | + resources: + '*': + limits: + memory: 4Gi + requests: + cpu: 100m + memory: 200Mi + tag_specification: + cluster: https://api.ci.openshift.org + name: "$(BRANCH)" + namespace: ocp + tests: + - as: e2e-$(CLUSTER_TYPE) + commands: TEST_SUITE=openshift/conformance/parallel run-tests + openshift_installer: + cluster_profile: "$(CLUSTER_TYPE)" + - name: JOB_NAME_SAFE + value: e2e-openstack + - name: KURYR_ENABLED + value: "false" + - name: TEST_COMMAND + value: | + cat > excluded_tests < invert_excluded.py < test-suite.txt + TEST_SUITE="--file=test-suite.txt" run-tests + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/apici + name: apici-ci-operator-credentials + readOnly: true + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /usr/local/e2e-openstack-cluster-profile + name: cluster-profile + - mountPath: /usr/local/e2e-openstack + name: job-definition + subPath: cluster-launch-installer-openstack-e2e-ppc64le.yaml + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /usr/local/pull-secret + name: release-pull-secret + serviceAccountName: ci-operator + volumes: + - name: apici-ci-operator-credentials + secret: + items: + - key: sa.ci-operator.apici.config + path: kubeconfig + secretName: apici-ci-operator-credentials + - name: boskos + secret: + items: + - key: password + path: password + secretName: boskos-credentials + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-openstack-ppc64le + - configMap: + name: prow-job-cluster-launch-installer-openstack-e2e + name: job-definition + - name: pull-secret + secret: + secretName: regcred + - name: release-pull-secret + secret: + secretName: ci-pull-credentials - agent: kubernetes cluster: api.ci decorate: true diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-openstack-e2e-ppc64le.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-openstack-e2e-ppc64le.yaml new file mode 100644 index 0000000000000..c651e5b15abb5 --- /dev/null +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-openstack-e2e-ppc64le.yaml @@ -0,0 +1,732 @@ +kind: Template +apiVersion: template.openshift.io/v1 + +parameters: +- name: JOB_NAME_SAFE + required: true +- name: JOB_NAME_HASH + required: true +- name: NAMESPACE + required: true +- name: IMAGE_TESTS + required: true +- name: IMAGE_OPENSTACK_INSTALLER + required: true +- name: CLUSTER_TYPE + value: "openstack-ppc64le" +- name: TEST_COMMAND + required: true +- name: RELEASE_IMAGE_LATEST + required: true +- name: BASE_DOMAIN + value: openshift.testing + required: true +- name: KURYR_ENABLED + value: "false" +- name: BUILD_ID + required: false +- name: USE_LEASE_CLIENT +- name: CLUSTER_NAME + value: "ci-op-ppc64le" +- name: MULTIARCH_RELEASE_IMAGE_LATEST + required: true + +objects: + +# We want the cluster to be able to access these images +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-image-puller + namespace: ${NAMESPACE} + roleRef: + name: system:image-puller + subjects: + - kind: SystemGroup + name: system:unauthenticated + - kind: SystemGroup + name: system:authenticated + +# Give admin access to a known bot +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-namespace-admins + namespace: ${NAMESPACE} + roleRef: + name: admin + subjects: + - kind: ServiceAccount + namespace: ci + name: ci-chat-bot + +# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. +- kind: Pod + apiVersion: v1 + metadata: + name: ${JOB_NAME_SAFE} + namespace: ${NAMESPACE} + annotations: + # we want to gather the teardown logs no matter what + ci-operator.openshift.io/wait-for-container-artifacts: teardown + ci-operator.openshift.io/save-container-logs: "true" + ci-operator.openshift.io/container-sub-tests: "setup,test,teardown" + spec: + restartPolicy: Never + activeDeadlineSeconds: 18000 + terminationGracePeriodSeconds: 900 + volumes: + - name: artifacts + emptyDir: {} + - name: shared-tmp + emptyDir: {} + - name: cluster-profile + secret: + secretName: ${JOB_NAME_SAFE}-cluster-profile + initContainers: + - name: cp-shared + image: ${IMAGE_TESTS} + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + command: + - cp + - /usr/bin/openshift-tests + - /usr/bin/oc + - /usr/bin/kubectl + - /tmp/shared/ + containers: + # NOTE openshift-tests requires access to openstackclient/cinder + # so we re-use the install image here which already contains + # the required binaries. The initContainer above is used to copy over + # the latest openshift-tests/oc binaries. + # + # Once the cluster is up, executes shared tests + - name: test + image: registry.svc.ci.openshift.org/ppc64le-openstack-installer-43/ppc64le-ci:latest + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 3 + memory: 600Mi + limits: + memory: 4Gi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /tmp/cluster + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: ARTIFACT_DIR + value: /tmp/artifacts + - name: HOME + value: /tmp/home + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + export PATH=/tmp/shared:/usr/libexec/origin:$PATH + + trap 'touch /tmp/shared/exit' EXIT + trap 'jobs -p | xargs -r kill || true; exit 0' TERM + + mkdir -p "${HOME}" + + # wait for the API to come up + while true; do + if [[ -f /tmp/shared/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ ! -f /tmp/shared/setup-success ]]; then + sleep 15 & wait + continue + fi + # don't let clients impact the global kubeconfig + cp "${KUBECONFIG}" /tmp/admin.kubeconfig + export KUBECONFIG=/tmp/admin.kubeconfig + break + done + + # if the cluster profile included an insights secret, install it to the cluster to + # report support data from the support-operator + if [[ -f /tmp/cluster/insights-live.yaml ]]; then + oc create -f /tmp/cluster/insights-live.yaml || true + fi + + # set up env vars + export KUBE_SSH_BASTION="$( oc --insecure-skip-tls-verify get node -l node-role.kubernetes.io/master -o 'jsonpath={.items[0].status.addresses[?(@.type=="ExternalIP")].address}' ):22" + export KUBE_SSH_KEY_PATH=/tmp/cluster/ssh-privatekey + export KUBE_SSH_USER=core + mkdir -p ~/.ssh + cp /tmp/cluster/ssh-privatekey ~/.ssh/kube_openstack_rsa || true + + # set up openstack env vars for testing + function get_clouds_param() { + awk '/^\s+'$1':/ { print $2 }' /tmp/cluster/clouds.yaml | sed -e 's/^"//' -e 's/"$//' + } + + XTRACE_ENABLED=0 + if set -o | grep xtrace.*on &>/dev/null; then + XTRACE_ENABLED=1 + fi + set +x #make extra sure we aren't echo'ing these to stdout + export OS_AUTH_URL="$(get_clouds_param 'auth_url')" + export OS_PROJECT_ID="$(get_clouds_param 'project_id')" + export OS_PROJECT_NAME="$(get_clouds_param 'project_name')" + export OS_USERNAME="$(get_clouds_param 'username')" + export OS_PASSWORD="$(get_clouds_param 'password')" + export OS_ENDPOINT_TYPE="$(get_clouds_param 'interface')" + export OS_IDENTITY_API_VERSION="$(get_clouds_param 'identity_api_version')" + export OS_USER_DOMAIN_NAME="$(get_clouds_param 'user_domain_name')" + if [[ "$XTRACE_ENABLED" == 1 ]]; then + set -x + fi + + mkdir -p /tmp/output + cd /tmp/output + + cat << EOREGISTRY > /tmp/kube-test-repo-list + quayK8sCSI: quay.io/multiarch-k8s-e2e + quayIncubator: quay.io/multiarch-k8s-e2e + e2eRegistry: quay.io/multiarch-k8s-e2e + gcRegistry: quay.io/multiarch-k8s-e2e + EOREGISTRY + export KUBE_TEST_REPO_LIST=/tmp/kube-test-repo-list + + function run-upgrade-tests() { + openshift-tests run-upgrade "${TEST_SUITE}" --to-image "${RELEASE_IMAGE_LATEST}" \ + -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + return 0 + } + + function run-tests() { + openshift-tests run "${TEST_SUITE}" \ + -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + return 0 + } + + function run-minimal-tests() { + # Only execute Smoke tests while the test infrastructure is getting + # prepared to run the actual suite reliably. + openshift-tests run openshift/conformance/parallel --dry-run | + grep 'Smoke' | + openshift-tests run -o /tmp/artifacts/e2e.log \ + --junit-dir /tmp/artifacts/junit -f - + return 0 + } + + function run-no-tests() { + # This can be used if we just want to check the installer exits 0 + echo "WARNING: No tests were run against the installed cluster" + return 0 + } + + ${TEST_COMMAND} + + # Runs an install + - name: setup + image: registry.svc.ci.openshift.org/ppc64le-openstack-installer-43/ppc64le-ci:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ${CLUSTER_NAME} + - name: BASE_DOMAIN + value: ${BASE_DOMAIN} + - name: SSH_PUB_KEY_PATH + value: /etc/openshift-installer/ssh-publickey + - name: SSH_PRIV_KEY_PATH + value: /etc/openshift-installer/ssh-privatekey + - name: PULL_SECRET_PATH + value: /etc/openshift-installer/pull-secret + - name: CA_CERT_PEM_PATH + value: /etc/openshift-installer/ca-cert.pem + - name: OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE + value: ${MULTIARCH_RELEASE_IMAGE_LATEST} + - name: OPENSHIFT_INSTALL_INVOKER + value: openshift-internal-ci/${JOB_NAME_SAFE}/${BUILD_ID} + - name: OPENSTACK_FLAVOR + value: m1.s2.xlarge + - name: OPENSTACK_EXTERNAL_NETWORK + value: external + - name: OS_CLOUD + value: openstack + - name: OS_CLIENT_CONFIG_FILE + value: /etc/openshift-installer/clouds.yaml + - name: USER + value: test + - name: HOME + value: /tmp + - name: INSTALL_INITIAL_RELEASE + - name: RELEASE_IMAGE_INITIAL + command: + - /bin/sh + - -c + - | + #!/bin/sh + set -e + + trap 'rc=$?; if test "${rc}" -eq 0; then touch /tmp/setup-success; else touch /tmp/exit; fi; exit "${rc}"' EXIT + trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi' TERM + + # The bastion service can be incredibly flakey. + # Therefore, try calling it a number of times in a row in the hope + # that it will eventually succeed. + # The output of the openstack command is saved in a file because + # debugging information on stdout/stderr may be helpful. + cat << '__EOF__' > /tmp/openstack_cmd.py; chmod u+x /tmp/openstack_cmd.py + #!/usr/bin/env python + import subprocess + import sys + import time + if __name__ == '__main__': + if len(sys.argv) <=3: + sys.exit(1) + filename = sys.argv[1] + args = sys.argv[2:] + print(args) + tries = 0 + while tries <= 15: + tries += 1 + try: + pop = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (output_stdout, output_stderr) = pop.communicate() + print('output_stdout=%s' % (output_stdout, )) + print('output_stderr=%s' % (output_stderr, )) + print('RC = %d' % (pop.returncode, )) + if pop.returncode == 0: + fp = open(filename, "w") + fp.write(output_stdout) + fp.close() + sys.exit(0) + except Exception as e: + print('ERROR: Caught %s' % (e,)) + time.sleep(5) + sys.exit(1) + __EOF__ + + mkdir /tmp/artifacts/installer + + echo "MULTIARCH_RELEASE_IMAGE_LATEST=${MULTIARCH_RELEASE_IMAGE_LATEST}" + echo "IMAGE_OPENSTACK_INSTALLER=${IMAGE_OPENSTACK_INSTALLER}" + echo "OPENSHIFT_INSTALL_OS_IMAGE_OVERRIDE=${OPENSHIFT_INSTALL_OS_IMAGE_OVERRIDE}" + echo "OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE=${OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE}" + echo "-----------------------------------------------------------" + + if [[ -n "${INSTALL_INITIAL_RELEASE}" && -n "${RELEASE_IMAGE_INITIAL}" ]]; then + echo "Installing from initial release ${RELEASE_IMAGE_INITIAL}" + OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="${RELEASE_IMAGE_INITIAL}" + else + echo "Installing from release ${RELEASE_IMAGE_LATEST}" + fi + echo "-----------------------------------------------------------" + + oc adm release info --registry-config=${PULL_SECRET_PATH} ${OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE} | egrep '(Name: |Pull From: )' + echo "-----------------------------------------------------------" + + # Display the version of openshift-install and make sure we are using the special sauce version + PATH=/ppc64le-openstack-installer-43/:${PATH} + LOCATION=$(which openshift-install) + echo ${LOCATION} + openshift-install version + if [ "${LOCATION}" != "/ppc64le-openstack-installer-43/openshift-install" ] + then + echo "Error: expecting ${LOCATION} to be /ppc64le-openstack-installer-43/openshift-install" + exit 1 + fi + echo "-----------------------------------------------------------" + + sha1sum ${OS_CLIENT_CONFIG_FILE} + sha1sum ${CA_CERT_PEM_PATH} + sha1sum ${SSH_PUB_KEY_PATH} + sha1sum ${SSH_PRIV_KEY_PATH} + echo "-----------------------------------------------------------" + + grep 'Red Hat' /etc/pki/tls/certs/ca-bundle.crt || true + echo "-----------------------------------------------------------" + + # Run simple tests to see if the bastion is up! + curl -v https://sshd.bastion-ppc64le:13000; echo + echo "-----------------------------------------------------------" + + /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server list -f value; echo $? + echo "-----------------------------------------------------------" + + export EXPIRATION_DATE=$(date -d '4 hours' --iso=minutes --utc) + export SSH_PUB_KEY=$(cat "${SSH_PUB_KEY_PATH}") + export PULL_SECRET=$(cat "${PULL_SECRET_PATH}") + + # move private key to ~/.ssh/ so that installer can use it to gather logs on bootstrap failure + mkdir -p ~/.ssh + cp "${SSH_PRIV_KEY_PATH}" ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + + # create a new floating ip tagged with CLUSTER_NAME so it can be deleted later + /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack floating ip create --description $CLUSTER_NAME $OPENSTACK_EXTERNAL_NETWORK --format value -c 'floating_ip_address' + LB_FIP=$(cat /tmp/artifacts/openstack_cmd.out) + + echo "Creating DNS record for api.${CLUSTER_NAME}.${BASE_DOMAIN} -> ${LB_FIP}" + # create A record for the api + curl -v https://sshd.bastion-ppc64le:13053/add -d '{"cluster_name": "'${CLUSTER_NAME}.${BASE_DOMAIN}'", "lb43_fip": "'$LB_FIP'"}' + + cat > /tmp/artifacts/installer/install-config.yaml << EOF + apiVersion: v1beta4 + baseDomain: ${BASE_DOMAIN} + controlPlane: + architecture: ppc64le + hyperthreading: Enabled + name: master + replicas: 3 + compute: + - architecture: ppc64le + hyperthreading: Enabled + name: worker + replicas: 3 + metadata: + name: ${CLUSTER_NAME} + platform: + openstack: + cloud: ${OS_CLOUD} + externalNetwork: ${OPENSTACK_EXTERNAL_NETWORK} + computeFlavor: ${OPENSTACK_FLAVOR} + lbFloatingIP: ${LB_FIP} + pullSecret: '${PULL_SECRET}' + sshKey: | + ${SSH_PUB_KEY} + additionalTrustBundle: | + EOF + sed -e 's,^, ,' ${CA_CERT_PEM_PATH} >> /tmp/artifacts/installer/install-config.yaml + + # Create a ramdisk for the etcd storage. This helps with disk latency + # unpredictability in the OpenStack cloud used by the CI: + TF_LOG=trace openshift-install --dir=/tmp/artifacts/installer create ignition-configs --log-level=debug + python -c \ + 'import json, sys; j = json.load(sys.stdin); j[u"systemd"][u"units"] = [{u"contents": "[Unit]\nDescription=Mount etcd as a ramdisk\nBefore=local-fs.target\n[Mount]\n What=none\nWhere=/var/lib/etcd\nType=tmpfs\nOptions=size=2G\n[Install]\nWantedBy=local-fs.target", u"enabled": True, u"name":u"var-lib-etcd.mount"}]; json.dump(j, sys.stdout)' \ + /tmp/artifacts/installer/master.ign.out + mv /tmp/artifacts/installer/master.ign.out /tmp/artifacts/installer/master.ign + + # What we're doing here is we generate manifests first and force that OpenShift SDN is configured. Kuryr is only + # allowed when KURYR_ENABLED variable is set to "true". + TF_LOG=trace openshift-install --dir=/tmp/artifacts/installer create manifests --log-level=debug + + if [[ $KURYR_ENABLED != "true" ]]; then + echo "Forcing OpenShiftSDN by modifying manifests" + sed -i -e 's/networkType: .*$/networkType: OpenShiftSDN/g' /tmp/artifacts/installer/manifests/cluster-network-02-config.yml + fi + + OS_DEBUG=1 TF_LOG=trace openshift-install --dir=/tmp/artifacts/installer create cluster --log-level=debug & + PID_OPENSHIFT_INSTALL=$! + echo "PID_OPENSHIFT_INSTALL=${PID_OPENSHIFT_INSTALL}" + + # Copy the ephemeral installer files in case there is a problem with them + mkdir /tmp/artifacts/openshift-installer-files + TRIES=0 + while (( ${TRIES} <= 30 )) + do + sleep 2s + (( TRIES += 1 )) + ls -ld /tmp/openshift-install-*/ || true + if [ -d /tmp/openshift-install-*/ ] + then + /bin/cp -r /tmp/openshift-install-*/ /tmp/artifacts/openshift-installer-files/ + break + fi + done + + # wait for installer create cluster and return its status + wait "${PID_OPENSHIFT_INSTALL}" + RC=$? + echo "RC=${RC}" + if [ ${RC} -gt 0 ] + then + OS_DEBUG=1 TF_LOG=trace openshift-install --dir=/tmp/artifacts/installer wait-for install-complete --log-level=debug + fi + + # @TODO - search for specific ingress port + echo "NAMESPACE=${NAMESPACE}" + echo "JOB_NAME_HASH=${JOB_NAME_HASH}" + echo "Searching for one ingress port (${CLUSTER_NAME})" + /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack port list --format value -c Name + INGRESS_PORT=$(cat /tmp/artifacts/openstack_cmd.out | tr ' ' '\n' | awk "/${CLUSTER_NAME}.*-ingress-port/ {print}") + echo "INGRESS_PORT=${INGRESS_PORT}" + + if [ $(echo "${INGRESS_PORT}" | wc -l) -gt 1 ]; then + echo "Found more than one port!" + exit 1 + fi + if [ -n "$INGRESS_PORT" ]; then + # Assign a FIP to ingress port to allow getting to apps + # create a new floating ip tagged with CLUSTER_NAME so it can be deleted later + /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack floating ip create --description $CLUSTER_NAME $OPENSTACK_EXTERNAL_NETWORK --format value -c 'floating_ip_address' --port $INGRESS_PORT + APPS_FIP=$(cat /tmp/artifacts/openstack_cmd.out) + + echo "Creating DNS record for *.apps.${CLUSTER_NAME}.${BASE_DOMAIN} -> $APPS_FIP" + # create A record for *.apps + curl -v https://sshd.bastion-ppc64le:13053/add -d '{"cluster_name": "'${CLUSTER_NAME}.${BASE_DOMAIN}'", "apps43_fip": "'$APPS_FIP'"}' + else + echo "Failed to discover one ingress port" + exit 1 + fi + + echo "********** FINISHED SETUP CONTAINER **********" + + # Performs cleanup of all created resources + - name: teardown + image: registry.svc.ci.openshift.org/ppc64le-openstack-installer-43/ppc64le-ci:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ${CLUSTER_NAME} + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + - name: OS_CLOUD + value: openstack + - name: OS_CLIENT_CONFIG_FILE + value: /etc/openshift-installer/clouds.yaml + - name: SSH_PRIV_KEY_PATH + value: /etc/openshift-installer/ssh-privatekey + - name: USER + value: test + - name: HOME + value: /tmp + command: + - /bin/bash + - -c + - | + #!/bin/bash + function queue() { + local TARGET="${1}" + shift + local LIVE="$(jobs | wc -l)" + while [[ "${LIVE}" -ge 45 ]]; do + sleep 1 + LIVE="$(jobs | wc -l)" + done + echo "${@}" + if [[ -n "${FILTER}" ]]; then + "${@}" | "${FILTER}" >"${TARGET}" & + else + "${@}" >"${TARGET}" & + fi + } + + function teardown() { + set +e + touch /tmp/shared/exit + export PATH=${PATH}:/tmp/shared + + echo "Gathering artifacts ..." + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap /tmp/artifacts/network + set +x + + if /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server list + then + grep $CLUSTER_NAME /tmp/artifacts/openstack_cmd.out >/tmp/artifacts/openstack_nodes.log + fi + if /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server list -c Name -f value + then + for server in $(grep $CLUSTER_NAME /tmp/artifacts/openstack_cmd.out | sort) + do + echo -e "\n$ openstack server show $server" >>/tmp/artifacts/openstack_nodes.log + if /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server show $server + then + cat /tmp/artifacts/openstack_cmd.out >>/tmp/artifacts/openstack_nodes.log + fi + done + fi + + # Get bootstrap nova logs + # NOTE(shadower): The server names are in the form + # `$CLUSTER_NAME-fmszv-bootstrap`. I could not find how to + # get the middle part (`fmszv`) (it's not in the shell + # environment here) so we'll list all the servers and look + # the bootstrap name up that way. + if /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server list --format value --column Name + then + BOOTSTRAP_NAME=$(grep "${CLUSTER_NAME}-.*-bootstrap" /tmp/artifacts/openstack_cmd.out) + if [ -n "$BOOTSTRAP_NAME" ] + then + if /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack console log show "$BOOTSTRAP_NAME" + then + cp /tmp/artifacts/openstack_cmd.out /tmp/artifacts/bootstrap/nova.log + fi + /tmp/openstack_cmd.py /tmp/artifacts/openstack_cmd.out openstack server list -f value --name ${BOOTSTRAP_NAME} -c Networks + NETWORKS=$(cat /tmp/artifacts/openstack_cmd.out) + echo "NETWORKS=${NETWORKS}" + IP=$(echo ${NETWORKS}| awk '{print $2}') + echo "IP=${IP}" + fi + fi + + oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes + oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers + oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + + queue /tmp/artifacts/config-resources.json oc --insecure-skip-tls-verify --request-timeout=5s get apiserver.config.openshift.io authentication.config.openshift.io build.config.openshift.io console.config.openshift.io dns.config.openshift.io featuregate.config.openshift.io image.config.openshift.io infrastructure.config.openshift.io ingress.config.openshift.io network.config.openshift.io oauth.config.openshift.io project.config.openshift.io scheduler.config.openshift.io -o json + queue /tmp/artifacts/apiservices.json oc --insecure-skip-tls-verify --request-timeout=5s get apiservices -o json + queue /tmp/artifacts/clusteroperators.json oc --insecure-skip-tls-verify --request-timeout=5s get clusteroperators -o json + queue /tmp/artifacts/clusterversion.json oc --insecure-skip-tls-verify --request-timeout=5s get clusterversion -o json + queue /tmp/artifacts/configmaps.json oc --insecure-skip-tls-verify --request-timeout=5s get configmaps --all-namespaces -o json + queue /tmp/artifacts/credentialsrequests.json oc --insecure-skip-tls-verify --request-timeout=5s get credentialsrequests --all-namespaces -o json + queue /tmp/artifacts/csr.json oc --insecure-skip-tls-verify --request-timeout=5s get csr -o json + queue /tmp/artifacts/endpoints.json oc --insecure-skip-tls-verify --request-timeout=5s get endpoints --all-namespaces -o json + FILTER=gzip queue /tmp/artifacts/deployments.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get deployments --all-namespaces -o json + FILTER=gzip queue /tmp/artifacts/daemonsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get daemonsets --all-namespaces -o json + queue /tmp/artifacts/events.json oc --insecure-skip-tls-verify --request-timeout=5s get events --all-namespaces -o json + queue /tmp/artifacts/kubeapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get kubeapiserver -o json + queue /tmp/artifacts/kubecontrollermanager.json oc --insecure-skip-tls-verify --request-timeout=5s get kubecontrollermanager -o json + queue /tmp/artifacts/machineconfigpools.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigpools -o json + queue /tmp/artifacts/machineconfigs.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigs -o json + queue /tmp/artifacts/machinesets.json oc --insecure-skip-tls-verify --request-timeout=5s get machinesets -A -o json + queue /tmp/artifacts/machines.json oc --insecure-skip-tls-verify --request-timeout=5s get machines -A -o json + queue /tmp/artifacts/namespaces.json oc --insecure-skip-tls-verify --request-timeout=5s get namespaces -o json + queue /tmp/artifacts/nodes.json oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o json + queue /tmp/artifacts/openshiftapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get openshiftapiserver -o json + queue /tmp/artifacts/pods.json oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces -o json + queue /tmp/artifacts/persistentvolumes.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumes --all-namespaces -o json + queue /tmp/artifacts/persistentvolumeclaims.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumeclaims --all-namespaces -o json + FILTER=gzip queue /tmp/artifacts/replicasets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get replicasets --all-namespaces -o json + queue /tmp/artifacts/rolebindings.json oc --insecure-skip-tls-verify --request-timeout=5s get rolebindings --all-namespaces -o json + queue /tmp/artifacts/roles.json oc --insecure-skip-tls-verify --request-timeout=5s get roles --all-namespaces -o json + queue /tmp/artifacts/services.json oc --insecure-skip-tls-verify --request-timeout=5s get services --all-namespaces -o json + FILTER=gzip queue /tmp/artifacts/statefulsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get statefulsets --all-namespaces -o json + + FILTER=gzip queue /tmp/artifacts/openapi.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get --raw /openapi/v2 + + # gather nodes first in parallel since they may contain the most relevant debugging info + while IFS= read -r i; do + mkdir -p /tmp/artifacts/nodes/$i + queue /tmp/artifacts/nodes/$i/nova.log openstack console log show $i + queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap + done < /tmp/nodes + + FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false + FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false + + # Snapshot iptables-save on each node for debugging possible kube-proxy issues + oc --insecure-skip-tls-verify get --request-timeout=20s -n openshift-sdn -l app=sdn pods --template '{{ range .items }}{{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/sdn-pods + while IFS= read -r i; do + queue /tmp/artifacts/network/iptables-save-$i oc --insecure-skip-tls-verify rsh --timeout=20 -n openshift-sdn -c sdn $i iptables-save -c + done < /tmp/sdn-pods + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + queue /tmp/artifacts/metrics/${file}-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' + queue /tmp/artifacts/metrics/${file}-controllers-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' + done < /tmp/pods-api + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + FILTER=gzip queue /tmp/artifacts/pods/${file}.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s $i + FILTER=gzip queue /tmp/artifacts/pods/${file}_previous.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s -p $i + done < /tmp/containers + + echo "Snapshotting prometheus (may take 15s) ..." + queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus . + + # move private key to ~/.ssh/ so that installer can use it to gather logs + mkdir -p ~/.ssh + cp "${SSH_PRIV_KEY_PATH}" ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + + echo "Running must-gather..." + mkdir -p /tmp/artifacts/must-gather + queue /tmp/artifacts/must-gather/must-gather.log oc --insecure-skip-tls-verify adm must-gather --dest-dir /tmp/artifacts/must-gather + + echo "Waiting for logs ..." + wait + + echo "Removing entries from DNS ..." + curl -v https://sshd.bastion-ppc64le:13053/remove -d '{"cluster_name": "'${CLUSTER_NAME}.${BASE_DOMAIN}'", "lb43_fip": ""}' + curl -v https://sshd.bastion-ppc64le:13053/remove -d '{"cluster_name": "'${CLUSTER_NAME}.${BASE_DOMAIN}'", "apps43_fip": ""}' + + echo "Deleting service VM FIP ..." + openstack floating ip list --long -f csv -c Description -c "Floating IP Address" | grep $CLUSTER_NAME | cut -f1 -d "," | xargs openstack floating ip delete + + echo "Deprovisioning cluster ..." + openshift-install --dir /tmp/artifacts/installer destroy cluster + } + + trap 'teardown' EXIT + trap 'jobs -p | xargs -r kill || true; exit 0' TERM + + # The bastion service can be incredibly flakey. + # Therefore, try calling it a number of times in a row in the hope + # that it will eventually succeed. + # The output of the openstack command is saved in a file because + # debugging information on stdout/stderr may be helpful. + cat << '__EOF__' > /tmp/openstack_cmd.py; chmod u+x /tmp/openstack_cmd.py + #!/usr/bin/env python + import subprocess + import sys + import time + if __name__ == '__main__': + if len(sys.argv) <=3: + sys.exit(1) + filename = sys.argv[1] + args = sys.argv[2:] + print(args) + tries = 0 + while tries <= 15: + tries += 1 + try: + pop = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + (output_stdout, output_stderr) = pop.communicate() + print('output_stdout=%s' % (output_stdout, )) + print('output_stderr=%s' % (output_stderr, )) + print('RC = %d' % (pop.returncode, )) + if pop.returncode == 0: + fp = open(filename, "w") + fp.write(output_stdout) + fp.close() + sys.exit(0) + except Exception as e: + print('ERROR: Caught %s' % (e,)) + time.sleep(5) + sys.exit(1) + __EOF__ + + for i in $(seq 1 220); do + if [[ -f /tmp/shared/exit ]]; then + exit 0 + fi + sleep 60 & wait + done + + # both ports (lb_fip and apps_fip) go through a single reverse ssh tunnel bastion + hostAliases: + - ip: 172.30.128.152 # sshd.bastion-ppc64le ip address + hostnames: + - "api.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "alertmanager-main-openshift-monitoring.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "console-openshift-console.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "downloads-openshift-console.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "grafana-openshift-monitoring.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "oauth-openshift.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" + - "prometheus-k8s-openshift-monitoring.apps.${CLUSTER_NAME}.${BASE_DOMAIN}" diff --git a/core-services/prow/02_config/_plugins.yaml b/core-services/prow/02_config/_plugins.yaml index de0fde015ae37..34237a702bd1c 100644 --- a/core-services/prow/02_config/_plugins.yaml +++ b/core-services/prow/02_config/_plugins.yaml @@ -1080,6 +1080,15 @@ config_updater: build01: - ci name: prow-job-cluster-launch-installer-nested-virt-tests + ci-operator/templates/openshift/installer/cluster-launch-installer-openstack-e2e-ppc64le.yaml: + clusters: + api.ci: + - ci + app.ci: + - ci + build01: + - ci + name: prow-job-cluster-launch-installer-openstack-e2e-ppc64le ci-operator/templates/openshift/installer/cluster-launch-installer-openstack-e2e.yaml: clusters: api.ci: