diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-commands.sh b/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-commands.sh new file mode 100644 index 0000000000000..0ba4395f30774 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-commands.sh @@ -0,0 +1,86 @@ +#!/bin/bash +function queue() { + local TARGET="${1}" + shift + local LIVE="$(jobs | wc -l)" + while [[ "${LIVE}" -ge 45 ]]; do + sleep 1 + LIVE="$(jobs | wc -l)" + done + echo "${@}" + if [[ -n "${FILTER}" ]]; then + "${@}" | "${FILTER}" >"${TARGET}" & + else + "${@}" >"${TARGET}" & + fi +} + +export PATH=$PATH:/tmp/shared + +echo "Gathering artifacts ..." +mkdir -p ${ARTIFACT_DIR}/pods ${ARTIFACT_DIR}/nodes ${ARTIFACT_DIR}/metrics ${ARTIFACT_DIR}/bootstrap ${ARTIFACT_DIR}/network + +oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes +oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers +oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + +queue ${ARTIFACT_DIR}/config-resources.json oc --insecure-skip-tls-verify --request-timeout=5s get apiserver.config.openshift.io authentication.config.openshift.io build.config.openshift.io console.config.openshift.io dns.config.openshift.io featuregate.config.openshift.io image.config.openshift.io infrastructure.config.openshift.io ingress.config.openshift.io network.config.openshift.io oauth.config.openshift.io project.config.openshift.io scheduler.config.openshift.io -o json +queue ${ARTIFACT_DIR}/apiservices.json oc --insecure-skip-tls-verify --request-timeout=5s get apiservices -o json +queue ${ARTIFACT_DIR}/clusteroperators.json oc --insecure-skip-tls-verify --request-timeout=5s get clusteroperators -o json +queue ${ARTIFACT_DIR}/clusterversion.json oc --insecure-skip-tls-verify --request-timeout=5s get clusterversion -o json +queue ${ARTIFACT_DIR}/configmaps.json oc --insecure-skip-tls-verify --request-timeout=5s get configmaps --all-namespaces -o json +queue ${ARTIFACT_DIR}/credentialsrequests.json oc --insecure-skip-tls-verify --request-timeout=5s get credentialsrequests --all-namespaces -o json +queue ${ARTIFACT_DIR}/csr.json oc --insecure-skip-tls-verify --request-timeout=5s get csr -o json +queue ${ARTIFACT_DIR}/endpoints.json oc --insecure-skip-tls-verify --request-timeout=5s get endpoints --all-namespaces -o json +FILTER=gzip queue ${ARTIFACT_DIR}/deployments.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get deployments --all-namespaces -o json +FILTER=gzip queue ${ARTIFACT_DIR}/daemonsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get daemonsets --all-namespaces -o json +queue ${ARTIFACT_DIR}/events.json oc --insecure-skip-tls-verify --request-timeout=5s get events --all-namespaces -o json +queue ${ARTIFACT_DIR}/kubeapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get kubeapiserver -o json +queue ${ARTIFACT_DIR}/kubecontrollermanager.json oc --insecure-skip-tls-verify --request-timeout=5s get kubecontrollermanager -o json +queue ${ARTIFACT_DIR}/machineconfigpools.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigpools -o json +queue ${ARTIFACT_DIR}/machineconfigs.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigs -o json +queue ${ARTIFACT_DIR}/machinesets.json oc --insecure-skip-tls-verify --request-timeout=5s get machinesets -A -o json +queue ${ARTIFACT_DIR}/machines.json oc --insecure-skip-tls-verify --request-timeout=5s get machines -A -o json +queue ${ARTIFACT_DIR}/namespaces.json oc --insecure-skip-tls-verify --request-timeout=5s get namespaces -o json +queue ${ARTIFACT_DIR}/nodes.json oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o json +queue ${ARTIFACT_DIR}/openshiftapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get openshiftapiserver -o json +queue ${ARTIFACT_DIR}/pods.json oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces -o json +queue ${ARTIFACT_DIR}/persistentvolumes.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumes --all-namespaces -o json +queue ${ARTIFACT_DIR}/persistentvolumeclaims.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumeclaims --all-namespaces -o json +FILTER=gzip queue ${ARTIFACT_DIR}/replicasets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get replicasets --all-namespaces -o json +queue ${ARTIFACT_DIR}/rolebindings.json oc --insecure-skip-tls-verify --request-timeout=5s get rolebindings --all-namespaces -o json +queue ${ARTIFACT_DIR}/roles.json oc --insecure-skip-tls-verify --request-timeout=5s get roles --all-namespaces -o json +queue ${ARTIFACT_DIR}/services.json oc --insecure-skip-tls-verify --request-timeout=5s get services --all-namespaces -o json +FILTER=gzip queue ${ARTIFACT_DIR}/statefulsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get statefulsets --all-namespaces -o json + +FILTER=gzip queue ${ARTIFACT_DIR}/openapi.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get --raw /openapi/v2 + +# gather nodes first in parallel since they may contain the most relevant debugging info +while IFS= read -r i; do + mkdir -p ${ARTIFACT_DIR}/nodes/$i + queue ${ARTIFACT_DIR}/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap +done < /tmp/nodes + +FILTER=gzip queue ${ARTIFACT_DIR}/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false +FILTER=gzip queue ${ARTIFACT_DIR}/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false + +# Snapshot iptables-save on each node for debugging possible kube-proxy issues +oc --insecure-skip-tls-verify get --request-timeout=20s -n openshift-sdn -l app=sdn pods --template '{{ range .items }}{{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/sdn-pods +while IFS= read -r i; do + queue ${ARTIFACT_DIR}/network/iptables-save-$i oc --insecure-skip-tls-verify rsh --timeout=20 -n openshift-sdn -c sdn $i iptables-save -c +done < /tmp/sdn-pods + +while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + queue ${ARTIFACT_DIR}/metrics/${file}-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' + queue ${ARTIFACT_DIR}/metrics/${file}-controllers-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' +done < /tmp/pods-api + +while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + FILTER=gzip queue ${ARTIFACT_DIR}/pods/${file}.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s $i + FILTER=gzip queue ${ARTIFACT_DIR}/pods/${file}_previous.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s -p $i +done < /tmp/containers + +echo "Snapshotting prometheus (may take 15s) ..." +queue ${ARTIFACT_DIR}/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus . \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-ref.yaml b/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-ref.yaml new file mode 100644 index 0000000000000..22c964fc790d1 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/artifacts/ipi-deprovision-artifacts-artifacts-ref.yaml @@ -0,0 +1,10 @@ +ref: + as: ipi-deprovision-artifacts-artifacts + from: cli + commands: ipi-deprovision-artifacts-artifacts-commands.sh + resources: + requests: + cpu: 300m + mem: 300Mi + documentation: |- + The pre-deprivison artifacts step collects CI-specific artifacts. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-commands.sh b/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-commands.sh new file mode 100644 index 0000000000000..31d8ff1c387f0 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-commands.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -o nounset +set -o errext +set -o pipefail + +export PATH=$PATH:/tmp/shared + +echo "Gathering installer artifacts ..." +# we don't have jq, so the python equivalent of +# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)' +bootstrap_ip=$(python -c \ + 'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \ + < ${ARTIFACT_DIR}/installer/terraform.tfstate +) + +if [ -n "${bootstrap_ip}" ] +then + for service in bootkube openshift kubelet crio + do + curl \ + --insecure \ + --silent \ + --connect-timeout 5 \ + --retry 3 \ + --cert ${ARTIFACT_DIR}/installer/tls/journal-gatewayd.crt \ + --key ${ARTIFACT_DIR}/installer/tls/journal-gatewayd.key \ + --url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" > "${ARTIFACT_DIR}/bootstrap/${service}.service" + done + if ! whoami &> /dev/null; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi + fi + eval $(ssh-agent) + ssh-add /etc/openshift-installer/ssh-privatekey + ssh -A -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip} /bin/bash -x /usr/local/bin/installer-gather.sh + scp -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip}:log-bundle.tar.gz ${ARTIFACT_DIR}/installer/bootstrap-logs.tar.gz +fi \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-ref.yaml b/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-ref.yaml new file mode 100644 index 0000000000000..0aff7cbf22869 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/bootstrap/ipi-deprovision-artifacts-bootstrap-ref.yaml @@ -0,0 +1,10 @@ +ref: + as: ipi-deprovision-artifacts-bootstrap + from: "TODO(skuznets): need an image with ssh+jq+curl" + commands: ipi-deprovision-artifacts-bootstrap-commands.sh + resources: + requests: + cpu: 300m + mem: 300Mi + documentation: |- + The pre-deprivison bootstrap artifacts step collects artifacts from the installer. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/ipi-deprovision-artifacts-chain.yaml b/ci-operator/step-registry/ipi/deprovision/artifacts/ipi-deprovision-artifacts-chain.yaml new file mode 100644 index 0000000000000..8a99172a64c29 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/ipi-deprovision-artifacts-chain.yaml @@ -0,0 +1,8 @@ +chain: + as: ipi-deprovision-artifacts + steps: + - ref: ipi-deprovision-artifacts-artifacts + - ref: ipi-deprovision-artifacts-bootstrap + - ref: ipi-deprovision-artifacts-must-gather + documentation: |- + The IPI deprovision artifacts step chain contains all the individual steps necessary to collect artifacts from a cluster. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-commands.sh b/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-commands.sh new file mode 100644 index 0000000000000..90c475270eeb6 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-commands.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -o nounset +set -o errext +set -o pipefail + +echo "Running must-gather..." +mkdir -p ${ARTIFACT_DIR}/must-gather +oc --insecure-skip-tls-verify adm must-gather --dest-dir ${ARTIFACT_DIR}/must-gather > ${ARTIFACT_DIR}/must-gather/must-gather.log \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-ref.yaml b/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-ref.yaml new file mode 100644 index 0000000000000..b00dd338ffb3f --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/artifacts/must-gather/ipi-deprovision-artifacts-must-gather-ref.yaml @@ -0,0 +1,10 @@ +ref: + as: ipi-deprovision-artifacts-must-gather + from: cli + commands: ipi-deprovision-artifacts-must-gather-commands.sh + resources: + requests: + cpu: 300m + mem: 300Mi + documentation: |- + The pre-deprivison must-gather step runs the must-gather tool to gather artifacts. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh new file mode 100644 index 0000000000000..1e439ba907edf --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -o nounset +set -o errext +set -o pipefail + +echo "Deprovisioning cluster ..." +openshift-install --dir ${ARTIFACT_DIR}/installer destroy cluster \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-ref.yaml b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-ref.yaml new file mode 100644 index 0000000000000..376ce7e126675 --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-ref.yaml @@ -0,0 +1,10 @@ +ref: + as: ipi-deprovision-deprovision + from: installer + commands: ipi-deprovision-deprovision-commands.sh + resources: + requests: + cpu: 1000m + mem: 300Mi + documentation: |- + The IPI deprivison step tears down the cluster. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/deprovision/ipi-deprovision-chain.yaml b/ci-operator/step-registry/ipi/deprovision/ipi-deprovision-chain.yaml new file mode 100644 index 0000000000000..ae18ad32bc1fe --- /dev/null +++ b/ci-operator/step-registry/ipi/deprovision/ipi-deprovision-chain.yaml @@ -0,0 +1,7 @@ +chain: + as: ipi-deprovision + steps: + - chain: ipi-deprovision-artifacts + - ref: ipi-deprovision-deprovision + documentation: |- + The IPI deprovision step chain contains all the individual steps necessary to deprovision an OpenShift cluster. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh new file mode 100644 index 0000000000000..e8e02aa154bea --- /dev/null +++ b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -o nounset +set -o errext +set -o pipefail + +# TODO(skuznets): oh boy \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/install/install/ipi-install-install-ref.yaml b/ci-operator/step-registry/ipi/install/install/ipi-install-install-ref.yaml new file mode 100644 index 0000000000000..84567e1516152 --- /dev/null +++ b/ci-operator/step-registry/ipi/install/install/ipi-install-install-ref.yaml @@ -0,0 +1,10 @@ +ref: + as: ipi-install-install + from: installer + commands: ipi-install-install-commands.sh + resources: + requests: + cpu: 1000m + mem: 2Gi + documentation: |- + The IPI install step runs the OpenShift Installer in order to bring up an OpenShift cluster, using the provided cluster profile to choose a target IaaS platform. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/install/ipi-install-chain.yaml b/ci-operator/step-registry/ipi/install/ipi-install-chain.yaml new file mode 100644 index 0000000000000..156e071e69fc1 --- /dev/null +++ b/ci-operator/step-registry/ipi/install/ipi-install-chain.yaml @@ -0,0 +1,7 @@ +chain: + as: ipi-install + steps: + - ref: ipi-install-rbac + - ref: ipi-install-install + documentation: |- + The IPI install step chain contains all the individual steps necessary to install an OpenShift cluster. \ No newline at end of file diff --git a/ci-operator/step-registry/ipi/install/rbac/ipi-install-rbac-commands.sh b/ci-operator/step-registry/ipi/install/rbac/ipi-install-rbac-commands.sh new file mode 100644 index 0000000000000..81d030d5eccb9 --- /dev/null +++ b/ci-operator/step-registry/ipi/install/rbac/ipi-install-rbac-commands.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -o nounset +set -o errext +set -o pipefail + +# We want the cluster to be able to access these images +oc adm policy add-role-to-group system:image-puller system:unauthenticated --namespace "${NAMESPACE}" +oc adm policy add-role-to-group system:image-puller system:authenticated --namespace "${NAMESPACE}" + +# Give admin access to a known bot +oc adm policy add-role-to-user admin system:serviceaccount:ci:ci-chat-bot --namespace "${NAMESPACE}" + +# Role for giving the e2e pod permissions to update imagestreams +cat <