diff --git a/ci-operator/config/openshift/installer/openshift-installer-master.yaml b/ci-operator/config/openshift/installer/openshift-installer-master.yaml index 008542677f063..d52751a11c896 100644 --- a/ci-operator/config/openshift/installer/openshift-installer-master.yaml +++ b/ci-operator/config/openshift/installer/openshift-installer-master.yaml @@ -57,6 +57,14 @@ images: - builder paths: null to: baremetal-installer +- dockerfile_path: images/installer/Dockerfile.ci + from: base + inputs: + root: + as: + - build + paths: null + to: ovirt-installer promotion: name: "4.4" namespace: ocp @@ -115,6 +123,12 @@ resources: requests: cpu: "3" memory: 5Gi + ovirt-installer: + limits: + memory: 9Gi + requests: + cpu: "3" + memory: 5Gi tag_specification: name: "4.4" namespace: ocp @@ -182,4 +196,4 @@ tests: - as: e2e-vsphere commands: TEST_SUITE=openshift/conformance/parallel run-tests openshift_installer_upi: - cluster_profile: vsphere + cluster_profile: vsphere \ No newline at end of file diff --git a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml index 09a3677b92853..19375f2ec0782 100644 --- a/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml +++ b/ci-operator/jobs/openshift/installer/openshift-installer-master-presubmits.yaml @@ -1453,6 +1453,71 @@ presubmits: name: prow-job-cluster-launch-installer-openstack-e2e name: job-definition trigger: (?m)^/test( | .* )e2e-openstack-parallel,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - master + context: ci/prow/e2e-ovirt + decorate: true + decoration_config: + skip_cloning: true + labels: + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-openshift-installer-master-e2e-ovirt + optional: true + rerun_command: /test e2e-ovirt + run_if_changed: ^([^d]|d(d|o(d|cd))*([^do]|o([^cd]|c[^ds])))*(d(d|o(d|cd))*(oc?)?)?$ + spec: + containers: + - args: + - --artifact-dir=$(ARTIFACTS) + - --give-pr-author-access-to-namespace=true + - --secret-dir=/usr/local/e2e-ovirt-cluster-profile + - --target=e2e-ovirt + - --template=/usr/local/e2e-ovirt + command: + - ci-operator + env: + - name: CLUSTER_TYPE + value: ovirt + - name: CONFIG_SPEC + valueFrom: + configMapKeyRef: + key: openshift-installer-master.yaml + name: ci-operator-master-configs + - name: JOB_NAME_SAFE + value: e2e-ovirt + - name: LEASE_TYPE + value: minimal + - name: TEST_COMMAND + value: run-minimal-tests + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /usr/local/e2e-ovirt-cluster-profile + name: cluster-profile + - mountPath: /usr/local/e2e-ovirt + name: job-definition + subPath: cluster-launch-installer-ovirt-e2e.yaml + serviceAccountName: ci-operator + volumes: + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-ovirt + - secret: + name: ovirt-infra-secrets + - configMap: + name: cluster-profile-ovirt + - configMap: + name: prow-job-cluster-launch-installer-ovirt-e2e + name: job-definition + trigger: (?m)^/test( | .* )e2e-ovirt,?($|\s.*) - agent: kubernetes always_run: false branches: diff --git a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml index f472dd27c7652..f5bfb83c4c91d 100644 --- a/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml +++ b/ci-operator/jobs/openshift/release/openshift-release-release-4.4-periodics.yaml @@ -4992,3 +4992,86 @@ periodics: - name: pull-secret secret: secretName: ci-pull-credentials +- agent: kubernetes + decorate: true + decoration_config: + skip_cloning: true + interval: 24h + labels: + ci.openshift.io/release-type: informing + job-env: ovirt + job-release: "4.4" + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: release-openshift-ocp-installer-e2e-ovirt-4.4 + spec: + containers: + - args: + - --artifact-dir=$(ARTIFACTS) + - --give-pr-author-access-to-namespace=true + - --secret-dir=/usr/local/pull-secret + - --secret-dir=/usr/local/e2e-ovirt-cluster-profile + - --target=e2e-ovirt + - --template=/usr/local/e2e-ovirt + - --input-hash=$(BUILD_ID) + - --input-hash=$(JOB_NAME) + command: + - ci-operator + env: + - name: RELEASE_IMAGE_LATEST + value: registry.svc.ci.openshift.org/ocp/release:4.4 + - name: BRANCH + value: "4.4" + - name: CLUSTER_TYPE + value: ovirt + - name: LEASE_TYPE + value: conformance + - name: CONFIG_SPEC + value: | + tag_specification: + name: "$(BRANCH)" + namespace: ocp + resources: + '*': + limits: + memory: 4Gi + requests: + cpu: 100m + memory: 200Mi + tests: + - as: e2e-$(CLUSTER_TYPE)-parallel + commands: TEST_SUITE=openshift/conformance/parallel run-tests + openshift_installer: + cluster_profile: "$(CLUSTER_TYPE)" + - name: JOB_NAME_SAFE + value: e2e-ovirt + - name: TEST_COMMAND + value: TEST_SUITE=openshift/conformance/parallel run-tests + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /usr/local/e2e-ovirt-cluster-profile + name: cluster-profile + - mountPath: /usr/local/e2e-ovirt + name: job-definition + subPath: cluster-launch-installer-ovirt-e2e.yaml + - mountPath: /usr/local/pull-secret + name: pull-secret + serviceAccountName: ci-operator + volumes: + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-ovirt + - secret: + name: ovirt-infra-secrets + - configMap: + name: prow-job-cluster-launch-installer-ovirt-e2e + name: job-definition + - name: pull-secret + secret: + secretName: ci-pull-credentials diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml new file mode 100644 index 0000000000000..cfcc3c8589647 --- /dev/null +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-ovirt-e2e.yaml @@ -0,0 +1,595 @@ +kind: Template +apiVersion: template.openshift.io/v1 + +parameters: +- name: JOB_NAME_SAFE + required: true +- name: JOB_NAME_HASH + required: true +- name: NAMESPACE + required: true +- name: IMAGE_TESTS + required: true +- name: IMAGE_INSTALLER + required: true +- name: CLUSTER_TYPE + value: ovirt +- name: TEST_COMMAND + required: true +- name: LEASE_TYPE + required: true +- name: RELEASE_IMAGE_LATEST + required: true +- name: BASE_DOMAIN + required: true + value: gcp.devcluster.openshift.com + +objects: + +# We want the cluster to be able to access these images +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-image-puller + namespace: ${NAMESPACE} + roleRef: + name: system:image-puller + subjects: + - kind: SystemGroup + name: system:unauthenticated + - kind: SystemGroup + name: system:authenticated + +# Give admin access to a known bot +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-namespace-admins + namespace: ${NAMESPACE} + roleRef: + name: admin + subjects: + - kind: ServiceAccount + namespace: ci + name: ci-chat-bot + +# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. +- kind: Pod + apiVersion: v1 + metadata: + name: ${JOB_NAME_SAFE} + namespace: ${NAMESPACE} + annotations: + # we want to gather the teardown logs no matter what + ci-operator.openshift.io/wait-for-container-artifacts: teardown + ci-operator.openshift.io/save-container-logs: "true" + ci-operator.openshift.io/container-sub-tests: "lease,setup,test,teardown" + spec: + restartPolicy: Never + activeDeadlineSeconds: 14400 + terminationGracePeriodSeconds: 900 + volumes: + - name: artifacts + emptyDir: {} + - name: shared-tmp + emptyDir: {} + - name: cluster-profile + secret: + secretName: ${JOB_NAME_SAFE}-cluster-profile + + containers: + + - name: lease + image: registry.svc.ci.openshift.org/ci/boskoscli:latest + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 10m + memory: 10Mi + limits: + memory: 200Mi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: CLUSTER_TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ${NAMESPACE}-${JOB_NAME_HASH} + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + trap 'rc=$?; CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi; if test "${rc}" -ne 0; then touch /tmp/shared/exit; fi; exit "${rc}"' EXIT + + # hack for bazel + function boskosctl() { + /app/boskos/cmd/cli/app.binary "${@}" + } + + function extract_leases_info() { + echo "$( jq ."${1}" --raw-output "${2}" )" + } + + function acquire_lease() { + resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-quota-slice" --state "free" --target-state "leased" --timeout 150m )" + resource_name="$(echo "${resource}"|jq .name --raw-output)" + lease_path="/etc/openshift-installer/${resource_name}.json" + ovirt_engine_template_name="$(extract_leases_info ovirt_engine_template_name ${lease_path})" + if [ "${LEASE_TYPE}" == "conformance" ]; then + bm_name="$(extract_leases_info ovirt_engine_cluster_bm ${lease_path})" + conformance_resource="$( boskosctl --server-url http://boskos.ci --owner-name "${CLUSTER_NAME}" acquire --type "${CLUSTER_TYPE}-${bm_name}" --state "free" --target-state "leased" --timeout 150m )" + conformance_resource_name="$(echo "${conformance_resource}"|jq .name --raw-output)" + worker_cpu=8 + worker_mem=16384 + master_cpu=8 + master_mem=16384 + fi + if [ "${LEASE_TYPE}" == "minimal" ]; then + ovirt_engine_template_name="${ovirt_engine_template_name}-8G" + worker_cpu=4 + worker_mem=8192 + master_cpu=4 + master_mem=8192 + fi + } + + echo "[INFO] Acquiring a lease ..." + acquire_lease + + #Saving parameters for the env + cat > /tmp/shared/ovirt-lease.conf <&1 + exit 0 + fi + + sleep 15 & wait $! + done + + + # Once the cluster is up, executes shared tests + - name: test + image: ${IMAGE_TESTS} + terminationMessagePolicy: FallbackToLogsOnError + resources: + requests: + cpu: 1 + memory: 1Gi + limits: + memory: 7Gi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /tmp/cluster + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: ARTIFACT_DIR + value: /tmp/artifacts + - name: HOME + value: /tmp/home + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + export PATH=/usr/libexec/origin:$PATH + + trap 'touch /tmp/shared/exit' EXIT + trap 'kill $(jobs -p); exit 0' TERM + + mkdir -p "${HOME}" + + # Share oc with other containers + cp "$(command -v oc)" /tmp/shared + + # wait for the API to come up + while true; do + if [[ -f /tmp/shared/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ ! -f /tmp/shared/setup-success ]]; then + sleep 15 & wait + continue + fi + # don't let clients impact the global kubeconfig + cp "${KUBECONFIG}" /tmp/admin.kubeconfig + export KUBECONFIG=/tmp/admin.kubeconfig + break + done + + # if the cluster profile included an insights secret, install it to the cluster to + # report support data from the support-operator + if [[ -f /tmp/cluster/insights-live.yaml ]]; then + oc create -f /tmp/cluster/insights-live.yaml || true + fi + + # set up env vars + export KUBE_SSH_BASTION="$( oc --insecure-skip-tls-verify get node -l node-role.kubernetes.io/master -o 'jsonpath={.items[0].status.addresses[?(@.type=="ExternalIP")].address}' ):22" + export KUBE_SSH_KEY_PATH=/tmp/cluster/ssh-privatekey + mkdir -p ~/.ssh + cp /tmp/cluster/ssh-privatekey ~/.ssh/kube_ovirt_rsa || true + + mkdir -p /tmp/output + cd /tmp/output + + function run-upgrade-tests() { + openshift-tests run-upgrade "${TEST_SUITE}" --to-image "${RELEASE_IMAGE_LATEST}" \ + --provider "${TEST_PROVIDER:-}" -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + exit 0 + } + + function run-tests() { + openshift-tests run "${TEST_SUITE}" \ + --provider "${TEST_PROVIDER:-}" -o /tmp/artifacts/e2e.log --junit-dir /tmp/artifacts/junit + exit 0 + } + + function run-minimal-tests() { + # Grab all of the tests marked Feature:Builds and conformance/parallel/minimal + openshift-tests run openshift/conformance/parallel --dry-run | + grep 'Smoke' | + openshift-tests run -o /tmp/artifacts/e2e.log \ + --junit-dir /tmp/artifacts/junit -f - + exit 0 + } + + function run-no-tests() { + # This can be used if we just want to check the installer exits 0 + echo "WARNING: No tests were run against the installed cluster" + exit 0 + } + + ${TEST_COMMAND} + + # Runs an install + - name: setup + # A midstep till we have the installer work merged, then we + # can use the CI artifact + image: quay.io/rgolangh/openshift-installer:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ovirt + - name: BASE_DOMAIN + value: ${BASE_DOMAIN} + - name: SSH_PUB_KEY_PATH + value: /etc/openshift-installer/ssh-publickey + - name: PULL_SECRET_PATH + value: /etc/openshift-installer/pull-secret + - name: OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE + value: registry.svc.ci.openshift.org/ovirt/ovirt-release:latest + - name: USER + value: test + - name: HOME + value: /tmp + - name: INSTALL_INITIAL_RELEASE + - name: RELEASE_IMAGE_INITIAL + command: + - /bin/sh + - -c + - | + #!/bin/sh + trap 'rc=$?; if test "${rc}" -eq 0; then touch /tmp/setup-success; else touch /tmp/exit; fi; exit "${rc}"' EXIT + trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi' TERM + # Wait untill lease is acquired + while true; do + if [[ -f /tmp/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ -f /tmp/leased ]]; then + echo "Lease acquired, installing..." + break + fi + sleep 15 & wait + done + + if [[ -n "${INSTALL_INITIAL_RELEASE}" && -n "${RELEASE_IMAGE_INITIAL}" ]]; then + echo "Installing from initial release ${RELEASE_IMAGE_INITIAL}" + OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE="${RELEASE_IMAGE_INITIAL}" + else + echo "Installing from release ${RELEASE_IMAGE_LATEST}" + fi + # poll to make sure that the test container has dropped oc into the shared volume + while [ ! command -V oc ]; do sleep 1; done + cp "$(command -v openshift-install)" /tmp + mkdir /tmp/artifacts/installer + source /tmp/ovirt-lease.conf + source /etc/openshift-installer/ovirt.conf + export PATH=$PATH:/tmp + export EXPIRATION_DATE=$(date -d '4 hours' --iso=minutes --utc) + export SSH_PUB_KEY=$(cat "${SSH_PUB_KEY_PATH}") + export PULL_SECRET=$(cat "${PULL_SECRET_PATH}") + export TF_VAR_ovirt_template_mem=${WORKER_MEM} + export TF_VAR_ovirt_template_cpu=${WORKER_CPU} + export TF_VAR_ovirt_master_mem=${MASTER_MEM} + export TF_VAR_ovirt_master_cpu=${MASTER_CPU} + + ## Image handling - for now the CI uses a fixed rhcos template + ## TODO - the fixed template is saving time and space when creating the + ## cluster in the cost of having to maitain the supported version. This + ## maintnance procedure does not exist yet. + export OPENSHIFT_INSTALL_OS_IMAGE_OVERRIDE=${OVIRT_ENGINE_TEMPLATE_NAME} + + # We want the setup to download the latest CA from the engine + # Therefor living it empty + export OVIRT_CONFIG=/tmp/artifacts/installer/ovirt-config.yaml + cat > /tmp/artifacts/installer/ovirt-config.yaml < /tmp/artifacts/installer/install-config.yaml << EOF + apiVersion: v1 + baseDomain: ${BASE_DOMAIN} + metadata: + name: ${OCP_CLUSTER} + compute: + - hyperthreading: Enabled + name: worker + platform: {} + replicas: 2 + controlPlane: + hyperthreading: Enabled + name: master + platform: {} + replicas: 3 + platform: + ovirt: + ovirt_cluster_id: ${OVIRT_ENGINE_CLUSTER_ID} + ovirt_storage_domain_id: ${OVIRT_ENGINE_STORAGE_DOMAIN_ID} + api_vip: ${OVIRT_APIVIP} + dns_vip: ${OVIRT_DNSVIP} + ingress_vip: ${OVIRT_INGRESSVIP} + pullSecret: > + ${PULL_SECRET} + sshKey: | + ${SSH_PUB_KEY} + EOF + + #change the masters igntion , to use tempfs for etcd IOPS optimization + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create ignition-configs --log-level=debug + python -c \ + 'import json, sys; j = json.load(sys.stdin); j[u"systemd"][u"units"] = [{u"contents": "[Unit]\nDescription=Mount etcd as a ramdisk\nBefore=local-fs.target\n[Mount]\n What=none\nWhere=/var/lib/etcd\nType=tmpfs\nOptions=size=2G\n[Install]\nWantedBy=local-fs.target", u"enabled": True, u"name":u"var-lib-etcd.mount"}]; json.dump(j, sys.stdout)' \ + /tmp/artifacts/installer/master.ign.out + mv /tmp/artifacts/installer/master.ign.out /tmp/artifacts/installer/master.ign + + # What we're doing here is we generate manifests first and force that OpenShift SDN is configured. + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create manifests --log-level=debug + TF_LOG=debug openshift-install --dir=/tmp/artifacts/installer create cluster --log-level=debug & + wait "$!" + install_exit_status=$? + export KUBECONFIG=/tmp/artifacts/installer/auth/kubeconfig + oc patch configs.imageregistry.operator.openshift.io cluster --type merge --patch '{"spec":{"managementState":"Managed","storage":{"emptyDir":{}}}}' + sleep 10m + oc get co/image-registry + exit $install_exit_status + + # Performs cleanup of all created resources + - name: teardown + # A midstep till we have the installer work merged, then we + # can use the CI artifact + image: quay.io/rgolangh/openshift-installer:latest + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: TYPE + value: ${CLUSTER_TYPE} + - name: CLUSTER_NAME + value: ovirt + - name: KUBECONFIG + value: /tmp/artifacts/installer/auth/kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + function queue() { + local TARGET="${1}" + shift + local LIVE="$(jobs | wc -l)" + while [[ "${LIVE}" -ge 45 ]]; do + sleep 1 + LIVE="$(jobs | wc -l)" + done + echo "${@}" + if [[ -n "${FILTER}" ]]; then + "${@}" | "${FILTER}" >"${TARGET}" & + else + "${@}" >"${TARGET}" & + fi + } + + function teardown() { + set +e + touch /tmp/shared/exit + export PATH=$PATH:/tmp/shared + source /etc/openshift-installer/ovirt.conf + + echo "Gathering artifacts ..." + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap /tmp/artifacts/network + + + if [ -f /tmp/artifacts/installer/terraform.tfstate ] + then + if [ -n "${bootstrap_ip}" ] + then + for service in bootkube openshift kubelet crio + do + queue "/tmp/artifacts/bootstrap/${service}.service" curl \ + --insecure \ + --silent \ + --connect-timeout 5 \ + --retry 3 \ + --cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \ + --key /tmp/artifacts/installer/tls/journal-gatewayd.key \ + --url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" + done + if ! whoami &> /dev/null; then + if [ -w /etc/passwd ]; then + echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd + fi + fi + eval $(ssh-agent) + ssh-add /etc/openshift-installer/ssh-privatekey + ssh -A -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip} /bin/bash -x /usr/local/bin/installer-gather.sh + scp -o PreferredAuthentications=publickey -o StrictHostKeyChecking=false -o UserKnownHostsFile=/dev/null core@${bootstrap_ip}:log-bundle.tar.gz /tmp/artifacts/installer/bootstrap-logs.tar.gz + fi + else + echo "No terraform statefile found. Skipping collection of bootstrap logs." + fi + # WORKAROUND https://github.com/openshift/installer/issues/1467 + # We need this to be able to collect logs + oc --insecure-skip-tls-verify --request-timeout=5s get csr -o name | xargs oc --insecure-skip-tls-verify --request-timeout=5s adm certificate approve + + oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes + oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers + oc --insecure-skip-tls-verify --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + + queue /tmp/artifacts/apiservices.json oc --insecure-skip-tls-verify --request-timeout=5s get apiservices -o json + queue /tmp/artifacts/clusteroperators.json oc --insecure-skip-tls-verify --request-timeout=5s get clusteroperators -o json + queue /tmp/artifacts/clusterversion.json oc --insecure-skip-tls-verify --request-timeout=5s get clusterversion -o json + queue /tmp/artifacts/configmaps.json oc --insecure-skip-tls-verify --request-timeout=5s get configmaps --all-namespaces -o json + queue /tmp/artifacts/csr.json oc --insecure-skip-tls-verify --request-timeout=5s get csr -o json + queue /tmp/artifacts/endpoints.json oc --insecure-skip-tls-verify --request-timeout=5s get endpoints --all-namespaces -o json + queue /tmp/artifacts/deployments.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get deployments --all-namespaces -o json + queue /tmp/artifacts/daemonsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get daemonsets --all-namespaces -o json + queue /tmp/artifacts/events.json oc --insecure-skip-tls-verify --request-timeout=5s get events --all-namespaces -o json + queue /tmp/artifacts/kubeapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get kubeapiserver -o json + queue /tmp/artifacts/kubecontrollermanager.json oc --insecure-skip-tls-verify --request-timeout=5s get kubecontrollermanager -o json + queue /tmp/artifacts/machineconfigpools.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigpools -o json + queue /tmp/artifacts/machineconfigs.json oc --insecure-skip-tls-verify --request-timeout=5s get machineconfigs -o json + queue /tmp/artifacts/namespaces.json oc --insecure-skip-tls-verify --request-timeout=5s get namespaces -o json + queue /tmp/artifacts/nodes.json oc --insecure-skip-tls-verify --request-timeout=5s get nodes -o json + queue /tmp/artifacts/openshiftapiserver.json oc --insecure-skip-tls-verify --request-timeout=5s get openshiftapiserver -o json + queue /tmp/artifacts/pods.json oc --insecure-skip-tls-verify --request-timeout=5s get pods --all-namespaces -o json + queue /tmp/artifacts/persistentvolumes.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumes --all-namespaces -o json + queue /tmp/artifacts/persistentvolumeclaims.json oc --insecure-skip-tls-verify --request-timeout=5s get persistentvolumeclaims --all-namespaces -o json + queue /tmp/artifacts/replicasets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get replicasets --all-namespaces -o json + queue /tmp/artifacts/rolebindings.json oc --insecure-skip-tls-verify --request-timeout=5s get rolebindings --all-namespaces -o json + queue /tmp/artifacts/roles.json oc --insecure-skip-tls-verify --request-timeout=5s get roles --all-namespaces -o json + queue /tmp/artifacts/services.json oc --insecure-skip-tls-verify --request-timeout=5s get services --all-namespaces -o json + queue /tmp/artifacts/statefulsets.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get statefulsets --all-namespaces -o json + + FILTER=gzip queue /tmp/artifacts/openapi.json.gz oc --insecure-skip-tls-verify --request-timeout=5s get --raw /openapi/v2 + + # gather nodes first in parallel since they may contain the most relevant debugging info + while IFS= read -r i; do + mkdir -p /tmp/artifacts/nodes/$i + queue /tmp/artifacts/nodes/$i/heap oc --insecure-skip-tls-verify get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap + done < /tmp/nodes + + FILTER=gzip queue /tmp/artifacts/nodes/masters-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=master --unify=false + FILTER=gzip queue /tmp/artifacts/nodes/workers-journal.gz oc --insecure-skip-tls-verify adm node-logs --role=worker --unify=false + + # Snapshot iptables-save on each node for debugging possible kube-proxy issues + oc --insecure-skip-tls-verify get --request-timeout=20s -n openshift-sdn -l app=sdn pods --template '{{ range .items }}{{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/sdn-pods + while IFS= read -r i; do + queue /tmp/artifacts/network/iptables-save-$i oc --insecure-skip-tls-verify rsh --timeout=20 -n openshift-sdn -c sdn $i iptables-save -c + done < /tmp/sdn-pods + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + queue /tmp/artifacts/metrics/${file}-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' + queue /tmp/artifacts/metrics/${file}-controllers-heap oc --insecure-skip-tls-verify exec $i -- /bin/bash -c 'oc --insecure-skip-tls-verify get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' + done < /tmp/pods-api + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + FILTER=gzip queue /tmp/artifacts/pods/${file}.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s $i + FILTER=gzip queue /tmp/artifacts/pods/${file}_previous.log.gz oc --insecure-skip-tls-verify logs --request-timeout=20s -p $i + done < /tmp/containers + + echo "Snapshotting prometheus (may take 15s) ..." + queue /tmp/artifacts/metrics/prometheus.tar.gz oc --insecure-skip-tls-verify exec -n openshift-monitoring prometheus-k8s-0 -- tar cvzf - -C /prometheus . + + echo "Waiting for logs ..." + wait + + #We set OVIRT_CONFIG and insert he path to the engine ca to the config file + export OVIRT_CONFIG=/tmp/artifacts/installer/ovirt-config.yaml + curl -k -o "/tmp/artifacts/installer/ovirt-engine.ca" ${OVIRT_ENGINE_URL::-4}/services/pki-resource?resource=ca-certificate + sed 's|ovirt_cafile: ""|ovirt_cafile: /tmp/artifacts/installer/ovirt-engine.ca|' -i /tmp/artifacts/installer/ovirt-config.yaml + + echo "Destroy cluster ..." + openshift-install --dir /tmp/artifacts/installer destroy cluster + } + + trap 'teardown; exit $DELETE_FAIL' EXIT + trap 'kill $(jobs -p); teardown; exit $DELETE_FAIL' TERM + + for i in $(seq 1 180); do + if [[ -f /tmp/shared/exit ]]; then + exit 0 + fi + sleep 60 & wait + done diff --git a/core-services/release-controller/_releases/release-ocp-4.4.json b/core-services/release-controller/_releases/release-ocp-4.4.json index f1f61b098bc18..4eeaa304b4872 100644 --- a/core-services/release-controller/_releases/release-ocp-4.4.json +++ b/core-services/release-controller/_releases/release-ocp-4.4.json @@ -97,6 +97,10 @@ "azure-ovn":{ "optional":true, "prowJob":{"name":"release-openshift-ocp-installer-e2e-azure-ovn-4.4"} + }, + "ovirt":{ + "optional":true, + "prowJob":{"name":"release-openshift-ocp-installer-e2e-ovirt-4.4"} } } }