Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/bash
function queue() {
local TARGET="${1}"
shift
local LIVE="$(jobs | wc -l)"
while [[ "${LIVE}" -ge 45 ]]; do
sleep 1
LIVE="$(jobs | wc -l)"
done
echo "${@}"
if [[ -n "${FILTER}" ]]; then
"${@}" | "${FILTER}" >"${TARGET}" &
else
"${@}" >"${TARGET}" &
fi
}

export PATH=$PATH:/tmp/shared

echo "Gathering artifacts ..."
mkdir -p ${ARTIFACT_DIR}/pods ${ARTIFACT_DIR}/nodes ${ARTIFACT_DIR}/metrics ${ARTIFACT_DIR}/bootstrap ${ARTIFACT_DIR}/network

oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api

queue ${ARTIFACT_DIR}/config-resources.json oc --insecure-skip-tls-verify --request-timeout=5s get apiserver.config.openshift.io authentication.config.openshift.io build.config.openshift.io console.config.openshift.io dns.config.openshift.io featuregate.config.openshift.io image.config.openshift.io infrastructure.config.openshift.io ingress.config.openshift.io network.config.openshift.io oauth.config.openshift.io project.config.openshift.io scheduler.config.openshift.io -o json
queue ${ARTIFACT_DIR}/apiservices.json oc --insecure-skip-tls-verify --request-timeout=5s get apiservices -o json
queue ${ARTIFACT_DIR}/clusteroperators.json oc --insecure-skip-tls-verify --request-timeout=5s get clusteroperators -o json
queue ${ARTIFACT_DIR}/clusterversion.json oc --insecure-skip-tls-verify --request-timeout=5s get clusterversion -o json
queue ${ARTIFACT_DIR}/configmaps.json oc --insecure-skip-tls-verify --request-timeout=5s get configmaps --all-namespaces -o json
queue ${ARTIFACT_DIR}/credentialsrequests.json oc --insecure-skip-tls-verify --request-timeout=5s get credentialsrequests --all-namespaces -o json
queue ${ARTIFACT_DIR}/csr.json oc --insecure-skip-tls-verify --request-timeout=5s get csr -o json
queue ${ARTIFACT_DIR}/endpoints.json oc --insecure-skip-tls-verify --request-timeout=5s get endpoints --all-namespaces -o json
FILTER=gzip queue ${ARTIFACT_DIR}/deployments.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get deployments --all-namespaces -o json
FILTER=gzip queue ${ARTIFACT_DIR}/daemonsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get daemonsets --all-namespaces -o json
queue ${ARTIFACT_DIR}/events.json oc --insecure-skip-tls-verify --request-timeout=5s get events --all-namespaces -o json
queue ${ARTIFACT_DIR}/kubeapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get kubeapiserver -o json
queue ${ARTIFACT_DIR}/kubecontrollermanager.json oc --insecure-skip-tls-verify --request-timeout=5s get kubecontrollermanager -o json
queue ${ARTIFACT_DIR}/machineconfigpools.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigpools -o json
queue ${ARTIFACT_DIR}/machineconfigs.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigs -o json
queue ${ARTIFACT_DIR}/machinesets.json oc --insecure-skip-tls-verify --request-timeout=5s get machinesets -A -o json
queue ${ARTIFACT_DIR}/machines.json oc --insecure-skip-tls-verify --request-timeout=5s get machines -A -o json
queue ${ARTIFACT_DIR}/namespaces.json oc --insecure-skip-tls-verify --request-timeout=5s get namespaces -o json
queue ${ARTIFACT_DIR}/nodes.json oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o json
queue ${ARTIFACT_DIR}/openshiftapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get openshiftapiserver -o json
queue ${ARTIFACT_DIR}/pods.json oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces -o json
queue ${ARTIFACT_DIR}/persistentvolumes.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumes --all-namespaces -o json
queue ${ARTIFACT_DIR}/persistentvolumeclaims.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumeclaims --all-namespaces -o json
FILTER=gzip queue ${ARTIFACT_DIR}/replicasets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get replicasets --all-namespaces -o json
queue ${ARTIFACT_DIR}/rolebindings.json oc --insecure-skip-tls-verify --request-timeout=5s get rolebindings --all-namespaces -o json
queue ${ARTIFACT_DIR}/roles.json oc --insecure-skip-tls-verify --request-timeout=5s get roles --all-namespaces -o json
queue ${ARTIFACT_DIR}/services.json oc --insecure-skip-tls-verify --request-timeout=5s get services --all-namespaces -o json
FILTER=gzip queue ${ARTIFACT_DIR}/statefulsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get statefulsets --all-namespaces -o json

FILTER=gzip queue ${ARTIFACT_DIR}/openapi.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get --raw /openapi/v2

# gather nodes first in parallel since they may contain the most relevant debugging info
while IFS= read -r i; do
mkdir -p ${ARTIFACT_DIR}/nodes/$i
queue ${ARTIFACT_DIR}/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap
done < /tmp/nodes

FILTER=gzip queue ${ARTIFACT_DIR}/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false
FILTER=gzip queue ${ARTIFACT_DIR}/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false

# Snapshot iptables-save on each node for debugging possible kube-proxy issues
oc --insecure-skip-tls-verify get --request-timeout=20s -n openshift-sdn -l app=sdn pods --template '{{ range .items }}{{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/sdn-pods
while IFS= read -r i; do
queue ${ARTIFACT_DIR}/network/iptables-save-$i oc --insecure-skip-tls-verify rsh --timeout=20 -n openshift-sdn -c sdn $i iptables-save -c
done < /tmp/sdn-pods

while IFS= read -r i; do
file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )"
queue ${ARTIFACT_DIR}/metrics/${file}-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig'
queue ${ARTIFACT_DIR}/metrics/${file}-controllers-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig'
done < /tmp/pods-api

while IFS= read -r i; do
file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )"
FILTER=gzip queue ${ARTIFACT_DIR}/pods/${file}.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s $i
FILTER=gzip queue ${ARTIFACT_DIR}/pods/${file}_previous.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s -p $i
done < /tmp/containers

echo "Snapshotting prometheus (may take 15s) ..."
queue ${ARTIFACT_DIR}/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus .
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-deprovision-artifacts-artifacts
from: cli
commands: ipi-deprovision-artifacts-artifacts-commands.sh
resources:
requests:
cpu: 300m
mem: 300Mi
documentation: |-
The pre-deprivison artifacts step collects CI-specific artifacts.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

set -o nounset
set -o errext
set -o pipefail

export PATH=$PATH:/tmp/shared

echo "Gathering installer artifacts ..."
# we don't have jq, so the python equivalent of
# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)'
bootstrap_ip=$(python -c \
'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \
< ${ARTIFACT_DIR}/installer/terraform.tfstate
)

if [ -n "${bootstrap_ip}" ]
then
for service in bootkube openshift kubelet crio
do
curl \
--insecure \
--silent \
--connect-timeout 5 \
--retry 3 \
--cert ${ARTIFACT_DIR}/installer/tls/journal-gatewayd.crt \
--key ${ARTIFACT_DIR}/installer/tls/journal-gatewayd.key \
--url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" > "${ARTIFACT_DIR}/bootstrap/${service}.service"
done
if ! whoami &> /dev/null; then
if [ -w /etc/passwd ]; then
echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd
fi
fi
eval $(ssh-agent)
ssh-add /etc/openshift-installer/ssh-privatekey
ssh -A -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip} /bin/bash -x /usr/local/bin/installer-gather.sh
scp -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip}:log-bundle.tar.gz ${ARTIFACT_DIR}/installer/bootstrap-logs.tar.gz
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-deprovision-artifacts-bootstrap
from: "TODO(skuznets): need an image with ssh+jq+curl"
commands: ipi-deprovision-artifacts-bootstrap-commands.sh
resources:
requests:
cpu: 300m
mem: 300Mi
documentation: |-
The pre-deprivison bootstrap artifacts step collects artifacts from the installer.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chain:
as: ipi-deprovision-artifacts
steps:
- ref: ipi-deprovision-artifacts-artifacts
- ref: ipi-deprovision-artifacts-bootstrap
- ref: ipi-deprovision-artifacts-must-gather
documentation: |-
The IPI deprovision artifacts step chain contains all the individual steps necessary to collect artifacts from a cluster.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -o nounset
set -o errext
set -o pipefail

echo "Running must-gather..."
mkdir -p ${ARTIFACT_DIR}/must-gather
oc --insecure-skip-tls-verify adm must-gather --dest-dir ${ARTIFACT_DIR}/must-gather > ${ARTIFACT_DIR}/must-gather/must-gather.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-deprovision-artifacts-must-gather
from: cli
commands: ipi-deprovision-artifacts-must-gather-commands.sh
resources:
requests:
cpu: 300m
mem: 300Mi
documentation: |-
The pre-deprivison must-gather step runs the must-gather tool to gather artifacts.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

set -o nounset
set -o errext
set -o pipefail

echo "Deprovisioning cluster ..."
openshift-install --dir ${ARTIFACT_DIR}/installer destroy cluster
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-deprovision-deprovision
from: installer
commands: ipi-deprovision-deprovision-commands.sh
resources:
requests:
cpu: 1000m
mem: 300Mi
documentation: |-
The IPI deprivison step tears down the cluster.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
chain:
as: ipi-deprovision
steps:
- chain: ipi-deprovision-artifacts
- ref: ipi-deprovision-deprovision
documentation: |-
The IPI deprovision step chain contains all the individual steps necessary to deprovision an OpenShift cluster.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

set -o nounset
set -o errext
set -o pipefail

# TODO(skuznets): oh boy
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-install-install
from: installer
commands: ipi-install-install-commands.sh
resources:
requests:
cpu: 1000m
mem: 2Gi
documentation: |-
The IPI install step runs the OpenShift Installer in order to bring up an OpenShift cluster, using the provided cluster profile to choose a target IaaS platform.
7 changes: 7 additions & 0 deletions ci-operator/step-registry/ipi/install/ipi-install-chain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
chain:
as: ipi-install
steps:
- ref: ipi-install-rbac
- ref: ipi-install-install
documentation: |-
The IPI install step chain contains all the individual steps necessary to install an OpenShift cluster.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

set -o nounset
set -o errext
set -o pipefail

# We want the cluster to be able to access these images
oc adm policy add-role-to-group system:image-puller system:unauthenticated --namespace "${NAMESPACE}"
oc adm policy add-role-to-group system:image-puller system:authenticated --namespace "${NAMESPACE}"

# Give admin access to a known bot
oc adm policy add-role-to-user admin system:serviceaccount:ci:ci-chat-bot --namespace "${NAMESPACE}"

# Role for giving the e2e pod permissions to update imagestreams
cat <<EOF
kind: Role
apiVersion: authorization.openshift.io/v1
metadata:
name: ${JOB_NAME_SAFE}-imagestream-updater
namespace: ${NAMESPACE}
rules:
- apiGroups: ["image.openshift.io"]
resources: ["imagestreams/layers"]
verbs: ["get", "update"]
- apiGroups: ["image.openshift.io"]
resources: ["imagestreams", "imagestreamtags"]
verbs: ["get", "create", "update", "delete", "list"]
EOF | oc apply -f -

# Give the e2e pod access to the imagestream-updater role
oc adm policy add-role-to-user ${JOB_NAME_SAFE}-imagestream-updater --serviceaccount default --namespace "${NAMESPACE}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ref:
as: ipi-install-rbac
from: cli
commands: ipi-install-rbac-commands.sh
resources:
requests:
cpu: 100m
mem: 100Mi
documentation: |-
The pre-installation RBAC step adds necessary privileges for the cluster under test against the build farm.
9 changes: 9 additions & 0 deletions ci-operator/step-registry/ipi/ipi-workflow.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
workflow:
as: ipi
steps:
pre:
- chain: ipi-install
post:
- chain: ipi-deprovision
documentation: |-
The IPI workflow provides pre- and post- steps that provision and deprovision an OpenShift cluster on a target IaaS platform, allowing job authors to inject their own end-to-end test logic.