diff --git a/ci-operator/config/openshift/installer/master.yaml b/ci-operator/config/openshift/installer/master.yaml index f23b3d18e837b..a597e7caabb52 100644 --- a/ci-operator/config/openshift/installer/master.yaml +++ b/ci-operator/config/openshift/installer/master.yaml @@ -4,6 +4,11 @@ base_images: name: origin-v4.0 namespace: openshift tag: base + base-smoke: + cluster: https://api.ci.openshift.org + namespace: openshift + name: origin-release + tag: bazel binary_build_commands: go build ./installer/cmd/tectonic canonical_go_repository: github.com/openshift/installer images: @@ -18,6 +23,20 @@ images: as: - build to: installer +- dockerfile_path: images/tectonic-installer/Dockerfile.ci + from: base-smoke + inputs: + bin: + paths: + - destination_dir: . + source_path: /go/src/github.com/openshift/installer/tectonic + root: + as: + - build + to: installer-bazel +- dockerfile_path: images/installer-origin-release/Dockerfile.ci + from: installer-bazel + to: installer-smoke resources: '*': limits: diff --git a/ci-operator/jobs/openshift/installer/openshift-installer-presubmits.yaml b/ci-operator/jobs/openshift/installer/openshift-installer-presubmits.yaml index 127f3b459d440..4ce881026726b 100644 --- a/ci-operator/jobs/openshift/installer/openshift-installer-presubmits.yaml +++ b/ci-operator/jobs/openshift/installer/openshift-installer-presubmits.yaml @@ -90,6 +90,62 @@ presubmits: - configMap: name: cluster-profile-aws trigger: ((?m)^/test( all| e2e-aws),?(\s+|$)) + - agent: kubernetes + always_run: false + branches: + - master + context: ci/prow/e2e-aws-smoke + decorate: true + name: pull-ci-origin-installer-e2e-aws-smoke + rerun_command: /test e2e-aws-smoke + # The abomination below is equivalent to `^((?!Documentation).)*$`. Since + # Go doesn't support negative lookaheads, we are stuck with the following. + run_if_changed: ^([^D]|D(D|oD|ocD|ocuD|ocum(D|e(D|n(D|t(D|aD|atD|atiD|atioD)))))*([^Do]|o[^Dc]|oc[^Du]|ocu[^Dm]|ocum([^De]|e([^Dn]|n([^Dt]|t([^Da]|a[^Dt]|at[^Di]|ati[^Do]|atio[^Dn]))))))*(D(D|oD|ocD|ocuD|ocum(D|e(D|n(D|t(D|aD|atD|atiD|atioD)))))*(o|oc|ocu|ocum(e(n(t(a|at|ati|atio)?)?)?)?)?)?$ + skip_cloning: true + spec: + containers: + - command: + - ci-operator + - --artifact-dir=$(ARTIFACTS) + - --secret-dir=/usr/local/e2e-aws-smoke-cluster-profile + - --template=/usr/local/e2e-aws-smoke + - --target=e2e-aws-smoke + env: + - name: JOB_NAME_SAFE + value: e2e-aws-smoke + - name: CLUSTER_TYPE + value: aws + - name: CONFIG_SPEC + valueFrom: + configMapKeyRef: + key: master.json + name: ci-operator-openshift-installer + image: ci-operator:latest + name: test + resources: + limits: + cpu: 500m + requests: + cpu: 10m + volumeMounts: + - mountPath: /usr/local/e2e-aws-smoke + name: job-definition + subPath: cluster-launch-installer-e2e-smoke.yaml + - mountPath: /usr/local/e2e-aws-smoke-cluster-profile + name: cluster-profile + serviceAccountName: ci-operator + volumes: + - configMap: + name: prow-job-cluster-launch-installer-e2e + name: job-definition + - name: cluster-profile + projected: + sources: + - secret: + name: cluster-secrets-aws + - configMap: + name: cluster-profile-aws + trigger: ((?m)^/test( all| e2e-aws-smoke),?(\s+|$)) - agent: kubernetes always_run: false branches: diff --git a/ci-operator/templates/cluster-launch-installer-e2e-smoke.yaml b/ci-operator/templates/cluster-launch-installer-e2e-smoke.yaml new file mode 100644 index 0000000000000..527bd7bffa874 --- /dev/null +++ b/ci-operator/templates/cluster-launch-installer-e2e-smoke.yaml @@ -0,0 +1,342 @@ +kind: Template +apiVersion: template.openshift.io/v1 + +parameters: +- name: JOB_NAME_SAFE + required: true +- name: JOB_NAME_HASH + required: true +- name: NAMESPACE + required: true +- name: IMAGE_CLI + required: true +- name: IMAGE_FORMAT + required: true +- name: IMAGE_INSTALLER + required: true +- name: IMAGE_TESTS + required: true +- name: IMAGE_INSTALLER_SMOKE + required: true +- name: CLUSTER_TYPE + required: true + +objects: + +# We want the cluster to be able to access these images +- kind: RoleBinding + apiVersion: authorization.openshift.io/v1 + metadata: + name: ${JOB_NAME_SAFE}-image-puller + namespace: ${NAMESPACE} + roleRef: + name: system:image-puller + subjects: + - kind: SystemGroup + name: system:unauthenticated + +# The e2e pod spins up a cluster, runs e2e tests, and then cleans up the cluster. +- kind: Pod + apiVersion: v1 + metadata: + name: ${JOB_NAME_SAFE} + namespace: ${NAMESPACE} + annotations: + # we want to gather the teardown logs no matter what + ci-operator.openshift.io/wait-for-container-artifacts: teardown + spec: + restartPolicy: Never + activeDeadlineSeconds: 10800 + terminationGracePeriodSeconds: 900 + volumes: + - name: artifacts + emptyDir: {} + - name: shared-tmp + emptyDir: {} + - name: cluster-profile + secret: + secretName: ${JOB_NAME_SAFE}-cluster-profile + + initContainers: + - name: cli + image: ${IMAGE_CLI} + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + command: + - cp + - /usr/bin/oc + - /tmp/shared/oc + + - name: smoke-test + image: ${IMAGE_INSTALLER_SMOKE} + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + command: + - cp + - /usr/bin/smoke + - /tmp/shared/smoke + + containers: + + # Once admin.kubeconfig exists, executes shared tests + - name: test + image: ${IMAGE_INSTALLER_SMOKE} + resources: + requests: + cpu: 1 + memory: 300Mi + limits: + cpu: 3 + memory: 2Gi + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /tmp/cluster + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: HOME + value: /tmp/home + - name: USER + value: bazel + - name: KUBECONFIG + value: /tmp/admin.kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + set -euo pipefail + + trap 'touch /tmp/shared/exit' EXIT + trap 'kill $(jobs -p); exit 0' TERM + + mkdir -p "${HOME}" + + # wait until the setup job creates admin.kubeconfig + while true; do + if [[ -f /tmp/shared/exit ]]; then + echo "Another process exited" 2>&1 + exit 1 + fi + if [[ ! -f /tmp/shared/admin.kubeconfig ]]; then + sleep 15 & wait + continue + fi + break + done + echo "Found shared kubeconfig" + + # don't let clients impact the global kubeconfig + cp /tmp/shared/admin.kubeconfig /tmp/admin.kubeconfig + + PATH=/usr/libexec/origin:$PATH + + # set up cloud provider specific env vars + if [[ "${CLUSTER_TYPE}" == "gcp" ]]; then + export GOOGLE_APPLICATION_CREDENTIALS="/tmp/cluster/gce.json" + export KUBE_SSH_USER=cloud-user + mkdir -p ~/.ssh + cp /tmp/cluster/ssh-privatekey ~/.ssh/google_compute_engine || true + export PROVIDER_ARGS='-provider=gce -gce-zone=us-east1-c -gce-project=openshift-gce-devel-ci' + elif [[ "${CLUSTER_TYPE}" == "aws" ]]; then + region="$( python -c 'import sys, json; print json.load(sys.stdin)["tectonic_aws_region"]' /tmp/cluster/inputs.yaml + ) + mkdir /tmp/artifacts/installer + cp /tmp/cluster/inputs.yaml /tmp/artifacts/installer/ + + echo "Invoking installer ..." + + cd /tmp/cluster + tectonic init --config=inputs.yaml + mv -f ${NAME}/* /tmp/cluster/ + cp config.yaml internal.yaml terraform.tfvars /tmp/artifacts/installer/ + + tectonic install --dir=. --log-level=debug + + # wait until oc shows up + while true; do + if [[ -f /tmp/exit ]]; then + echo "Interrupted" + cp $KUBECONFIG /tmp/admin.kubeconfig + exit 1 + fi + if [[ ! -f /tmp/oc ]]; then + echo "Waiting for oc binary to show up ..." + sleep 15 & wait + continue + fi + if ! /tmp/oc get nodes 2>/dev/null; then + echo "Waiting for API at $(/tmp/oc whoami --show-server) to respond ..." + sleep 15 & wait + continue + fi + # check multiple namespaces while we are transitioning to the new locations + if /tmp/oc get deploy/router -n tectonic-ingress 2>/dev/null; then + router_namespace=tectonic-ingress + elif /tmp/oc get deploy/router -n openshift-ingress 2>/dev/null; then + router_namespace=openshift-ingress + elif /tmp/oc get deploy/router -n default 2>/dev/null; then + router_namespace=default + else + echo "Waiting for router to be created ..." + sleep 15 & wait + continue + fi + break + done + if ! /tmp/oc wait deploy/router -n "${router_namespace}" --for condition=available --timeout=20m; then + echo "Installation failed" + cp $KUBECONFIG /tmp/admin.kubeconfig + exit 1 + fi + cp $KUBECONFIG /tmp/admin.kubeconfig + echo "Installation successful" + echo "Starting installer smoke tests..." + export SMOKE_KUBECONFIG=${KUBECONFIG} + export SMOKE_MANIFEST_PATHS=/tmp/cluster/generated/manifests + # 3 masters/3 workers/1 bootstrap + export SMOKE_NODE_COUNT=7 + if ! /tmp/smoke -cluster -test.v; then + echo "Smoke tests failed" + exit 1 + fi + echo "Smoke tests passed" + + # Performs cleanup of all created resources + - name: teardown + image: ${IMAGE_INSTALLER_SMOKE} + volumeMounts: + - name: shared-tmp + mountPath: /tmp/shared + - name: cluster-profile + mountPath: /etc/openshift-installer + - name: artifacts + mountPath: /tmp/artifacts + env: + - name: INSTANCE_PREFIX + value: ${NAMESPACE}-${JOB_NAME_HASH} + - name: TYPE + value: ${CLUSTER_TYPE} + - name: KUBECONFIG + value: /tmp/shared/admin.kubeconfig + command: + - /bin/bash + - -c + - | + #!/bin/bash + function teardown() { + set +e + touch /tmp/shared/exit + export PATH=$PATH:/tmp/shared + + echo "Gathering artifacts ..." + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics + + oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes + oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers + oc --request-timeout=5s get nodes -o json > /tmp/artifacts/nodes.json + oc --request-timeout=5s get events --all-namespaces -o json > /tmp/artifacts/events.json + oc --request-timeout=5s get pods -l openshift.io/component=api --all-namespaces --template '{{ range .items }}-n {{ .metadata.namespace }} {{ .metadata.name }}{{ "\n" }}{{ end }}' > /tmp/pods-api + + # gather nodes first in parallel since they may contain the most relevant debugging info + while IFS= read -r i; do + mkdir -p /tmp/artifacts/nodes/$i + ( + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/messages | gzip -c > /tmp/artifacts/nodes/$i/messages.gz + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal | sed -e 's|.*href="\(.*\)".*|\1|;t;d' > /tmp/journals + while IFS= read -r j; do + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/journal/${j}system.journal | gzip -c > /tmp/artifacts/nodes/$i/journal.gz + done < /tmp/journals + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/metrics | gzip -c > /tmp/artifacts/metrics/node-$i.gz + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/debug/pprof/heap > /tmp/artifacts/nodes/$i/heap + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/secure | gzip -c > /tmp/artifacts/nodes/$i/secure.gz + oc get --request-timeout=20s --raw /api/v1/nodes/$i/proxy/logs/audit | gzip -c > /tmp/artifacts/nodes/$i/audit.gz + ) & + done < /tmp/nodes + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 3 | tr -s ' ' '_' )" + oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-heap + oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8443" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-api.gz + oc exec $i -- /bin/bash -c 'oc get --raw /debug/pprof/heap --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' > /tmp/artifacts/metrics/${file}-controllers-heap + oc exec $i -- /bin/bash -c 'oc get --raw /metrics --server "https://$( hostname ):8444" --config /etc/origin/master/admin.kubeconfig' | gzip -c > /tmp/artifacts/metrics/${file}-controllers.gz + done < /tmp/pods-api + + while IFS= read -r i; do + file="$( echo "$i" | cut -d ' ' -f 2,3,5 | tr -s ' ' '_' )" + oc logs --request-timeout=20s $i | gzip -c > /tmp/artifacts/pods/${file}.log.gz + oc logs --request-timeout=20s -p $i | gzip -c > /tmp/artifacts/pods/${file}_previous.log.gz + done < /tmp/containers + + echo "Waiting for node logs to finish ..." + wait + + echo "Deprovisioning cluster ..." + export AWS_SHARED_CREDENTIALS_FILE=/etc/openshift-installer/.awscred + cd /tmp/shared/cluster + for i in `seq 1 2`; do + tectonic destroy --dir=. --log-level=debug + done + } + + trap 'teardown' EXIT + trap 'kill $(jobs -p); exit 0' TERM + + for i in `seq 1 120`; do + if [[ -f /tmp/shared/exit ]]; then + exit 0 + fi + sleep 60 & wait + done diff --git a/projects/origin-release/bazel/Dockerfile b/projects/origin-release/bazel/Dockerfile new file mode 100644 index 0000000000000..03ea11bcb2ef3 --- /dev/null +++ b/projects/origin-release/bazel/Dockerfile @@ -0,0 +1,11 @@ +# This Dockerfile is used to create origin-release:bazel necessary to build +# openshift/installer smoke tests. + +FROM docker.io/openshift/origin-release:golang-1.10 + +RUN wget -P /etc/yum.repos.d https://copr.fedorainfracloud.org/coprs/vbatts/bazel/repo/epel-7/vbatts-bazel-epel-7.repo + +RUN yum install -y bazel gcc-c++ libtool && \ + yum clean all + +WORKDIR /go/src/github.com/openshift/origin diff --git a/projects/origin-release/pipeline.yaml b/projects/origin-release/pipeline.yaml index b94e875851f0f..2585e67d397b9 100644 --- a/projects/origin-release/pipeline.yaml +++ b/projects/origin-release/pipeline.yaml @@ -161,6 +161,36 @@ objects: triggers: - imageChange: {} type: ImageChange +- apiVersion: v1 + kind: BuildConfig + metadata: + labels: + app: origin-release + name: "origin-release-bazel" + spec: + successfulBuildsHistoryLimit: 1 + failedBuildsHistoryLimit: 2 + output: + to: + kind: DockerImage + name: "${PUSH_REPOSITORY}:bazel" + pushSecret: + name: "dockerhub" + source: + git: + ref: "${SOURCE_REF}" + uri: "${SOURCE_URL}" + contextDir: "projects/origin-release/bazel/" + type: Git + strategy: + dockerStrategy: + from: + kind: ImageStreamTag + name: origin-release:base + imageOptimizationPolicy: SkipLayers + triggers: + - imageChange: {} + type: ImageChange - apiVersion: v1 kind: BuildConfig metadata: