From 4667797f009be631081e34c25ef4de6cb9498d04 Mon Sep 17 00:00:00 2001 From: Eric Fried Date: Mon, 2 Aug 2021 18:39:09 -0500 Subject: [PATCH] Introduce e2e-pool test This test - Stands up a clusterpool of size=1 - Waits for its ClusterDeployment to be Installed and Hibernating - Creates a claim and waits for the CD to be Running - Hibernates the CD and waits for it to be Hibernating - Resumes the CD and waits for it to be Running - Deletes the claim and the pool - Waits for all CDs to disappear HIVE-1605 --- Makefile | 4 + hack/e2e-common.sh | 182 +++++++++++++++++++++++++++++++++++++++++ hack/e2e-pool-test.sh | 184 ++++++++++++++++++++++++++++++++++++++++++ hack/e2e-test.sh | 176 ++-------------------------------------- 4 files changed, 375 insertions(+), 171 deletions(-) create mode 100755 hack/e2e-common.sh create mode 100755 hack/e2e-pool-test.sh diff --git a/Makefile b/Makefile index d25d6821904..e8690551e8d 100644 --- a/Makefile +++ b/Makefile @@ -154,6 +154,10 @@ test-integration: generate test-e2e: hack/e2e-test.sh +.PHONY: test-e2e-pool +test-e2e-pool: + hack/e2e-pool-test.sh + .PHONY: test-e2e-postdeploy test-e2e-postdeploy: go test $(GO_MOD_FLAGS) -v -timeout 0 -count=1 ./test/e2e/postdeploy/... diff --git a/hack/e2e-common.sh b/hack/e2e-common.sh new file mode 100755 index 00000000000..3452c524887 --- /dev/null +++ b/hack/e2e-common.sh @@ -0,0 +1,182 @@ +max_tries=60 +sleep_between_tries=10 +# Set timeout for the cluster deployment to install +# timeout = sleep_between_cluster_deployment_status_checks * max_cluster_deployment_status_checks +max_cluster_deployment_status_checks=90 +sleep_between_cluster_deployment_status_checks="1m" + +export CLUSTER_NAMESPACE="${CLUSTER_NAMESPACE:-cluster-test}" + +# In CI, HIVE_IMAGE and RELEASE_IMAGE are set via the job's `dependencies`. +if [[ -z "$HIVE_IMAGE" ]]; then + echo "The HIVE_IMAGE environment variable was not found." >&2 + echo "It must be set to the fully-qualified pull spec of a hive container image." >&2 + echo "E.g. quay.io/my-user/hive:latest" >&2 + exit 1 +fi +if [[ -z "$RELEASE_IMAGE" ]]; then + echo "The RELEASE_IMAGE environment variable was not found." >&2 + echo "It must be set to the fully-qualified pull spec of an OCP release container image." >&2 + echo "E.g. quay.io/openshift-release-dev/ocp-release:4.7.0-x86_64" >&2 + exit 1 +fi + +echo "Running ${TEST_NAME} with HIVE_IMAGE ${HIVE_IMAGE}" +echo "Running ${TEST_NAME} with RELEASE_IMAGE ${RELEASE_IMAGE}" + +i=1 +while [ $i -le ${max_tries} ]; do + if [ $i -gt 1 ]; then + # Don't sleep on first loop + echo "sleeping ${sleep_between_tries} seconds" + sleep ${sleep_between_tries} + fi + + echo -n "Creating namespace ${CLUSTER_NAMESPACE}. Try #${i}/${max_tries}... " + if oc create namespace "${CLUSTER_NAMESPACE}"; then + echo "Success" + break + else + echo -n "Failed, " + fi + + i=$((i + 1)) +done + +ORIGINAL_NAMESPACE=$(oc config view -o json | jq -er 'select(.contexts[].name == ."current-context") | .contexts[]?.context.namespace // ""') +echo Original default namespace is ${ORIGINAL_NAMESPACE} +echo Setting default namespace to ${CLUSTER_NAMESPACE} +if ! oc config set-context --current --namespace=${CLUSTER_NAMESPACE}; then + echo "Failed to set the default namespace" + exit 1 +fi + +function restore_default_namespace() { + echo Restoring default namespace to ${ORIGINAL_NAMESPACE} + oc config set-context --current --namespace=${ORIGINAL_NAMESPACE} +} +trap 'restore_default_namespace' EXIT + +if [ $i -ge ${max_tries} ] ; then + # Failed the maximum amount of times. + echo "exiting" + exit 10 +fi + +CLUSTER_PROFILE_DIR="${CLUSTER_PROFILE_DIR:-/tmp/cluster}" + +CLOUD="${CLOUD:-aws}" +export ARTIFACT_DIR="${ARTIFACT_DIR:-/tmp}" + +SSH_PUBLIC_KEY_FILE="${SSH_PUBLIC_KEY_FILE:-${CLUSTER_PROFILE_DIR}/ssh-publickey}" +# If not specified or nonexistent, generate a keypair to use +if ! [[ -s "${SSH_PUBLIC_KEY_FILE}" ]]; then + echo "Specified SSH public key file '${SSH_PUBLIC_KEY_FILE}' is invalid or nonexistent. Generating a single-use keypair." + WHERE=${SSH_PUBLIC_KEY_FILE%/*} + mkdir -p ${WHERE} + # Tell the installmanager where to find the private key + export SSH_PRIV_KEY_PATH=$(mktemp -p ${WHERE}) + # ssh-keygen will put the public key here + TMP_PUB=${SSH_PRIV_KEY_PATH}.pub + # Answer 'y' to the overwrite prompt, since we touched the file + yes y | ssh-keygen -q -t rsa -N '' -f ${SSH_PRIV_KEY_PATH} + # Now put the pubkey where we expected it + mv ${TMP_PUB} ${SSH_PUBLIC_KEY_FILE} +fi + +PULL_SECRET_FILE="${PULL_SECRET_FILE:-${CLUSTER_PROFILE_DIR}/pull-secret}" +export HIVE_NS="hive-e2e" +export HIVE_OPERATOR_NS="hive-operator" + +# Install Hive +IMG="${HIVE_IMAGE}" make deploy + +function save_hive_logs() { + oc logs -n "${HIVE_NS}" deployment/hive-controllers > "${ARTIFACT_DIR}/hive-controllers.log" + oc logs -n "${HIVE_NS}" deployment/hiveadmission > "${ARTIFACT_DIR}/hiveadmission.log" +} + +SRC_ROOT=$(git rev-parse --show-toplevel) + +USE_MANAGED_DNS=${USE_MANAGED_DNS:-true} + +case "${CLOUD}" in +"aws") + CREDS_FILE="${CLUSTER_PROFILE_DIR}/.awscred" + # Accept creds from the env if the file doesn't exist. + if ! [[ -f $CREDS_FILE ]] && [[ -n "${AWS_ACCESS_KEY_ID}" ]] && [[ -n "${AWS_SECRET_ACCESS_KEY}" ]]; then + # TODO: Refactor contrib/pkg/adm/managedns/enable::generateAWSCredentialsSecret to + # use contrib/pkg/utils/aws/aws::GetAWSCreds, which knows how to look for the env + # vars if the file isn't specified; and use this condition to generate (or not) + # the whole CREDS_FILE_ARG="--creds-file=${CREDS_FILE}". + printf '[default]\naws_access_key_id=%s\naws_secret_access_key=%s\n' "$AWS_ACCESS_KEY_ID" "$AWS_SECRET_ACCESS_KEY" > $CREDS_FILE + fi + BASE_DOMAIN="${BASE_DOMAIN:-hive-ci.openshift.com}" + EXTRA_CREATE_CLUSTER_ARGS="--aws-user-tags expirationDate=$(date -d '4 hours' --iso=minutes --utc)" + ;; +"azure") + CREDS_FILE="${CLUSTER_PROFILE_DIR}/osServicePrincipal.json" + BASE_DOMAIN="${BASE_DOMAIN:-ci.azure.devcluster.openshift.com}" + ;; +"gcp") + CREDS_FILE="${CLUSTER_PROFILE_DIR}/gce.json" + BASE_DOMAIN="${BASE_DOMAIN:-origin-ci-int-gce.dev.openshift.com}" + ;; +*) + echo "unknown cloud: ${CLOUD}" + exit 1 + ;; +esac + +if $USE_MANAGED_DNS; then + # Generate a short random shard string for this cluster similar to OSD prod. + # This is to prevent name conflicts across customer clusters. + CLUSTER_SHARD=$(cat /dev/urandom | tr -dc 'a-z' | fold -w 8 | head -n 1) + CLUSTER_DOMAIN="${CLUSTER_SHARD}.${BASE_DOMAIN}" + go run "${SRC_ROOT}/contrib/cmd/hiveutil/main.go" adm manage-dns enable ${BASE_DOMAIN} \ + --creds-file="${CREDS_FILE}" --cloud="${CLOUD}" + MANAGED_DNS_ARG=" --manage-dns" +else + CLUSTER_DOMAIN="${BASE_DOMAIN}" +fi + + +echo "Using cluster base domain: ${CLUSTER_DOMAIN}" + +function capture_manifests() { + oc get clusterdeployment -A -o yaml &> "${ARTIFACT_DIR}/hive_clusterdeployment.yaml" || true + oc get clusterimageset -o yaml &> "${ARTIFACT_DIR}/hive_clusterimagesets.yaml" || true + oc get clusterprovision -A -o yaml &> "${ARTIFACT_DIR}/hive_clusterprovision.yaml" || true + oc get clusterstate -A -o yaml &> "${ARTIFACT_DIR}/hive_clusterstate.yaml" || true +} + +function capture_cluster_logs() { + local CLUSTER_NAME=$1 + local CLUSTER_NAMESPACE=$2 + local INSTALL_RESULT=$3 + + # Capture install logs + if IMAGESET_JOB_NAME=$(oc get job -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME},hive.openshift.io/imageset=true" -o name -n ${CLUSTER_NAMESPACE}) && [ "${IMAGESET_JOB_NAME}" ] + then + oc logs -c hive -n ${CLUSTER_NAMESPACE} ${IMAGESET_JOB_NAME} &> "${ARTIFACT_DIR}/hive_imageset_job.log" || true + oc get ${IMAGESET_JOB_NAME} -n ${CLUSTER_NAMESPACE} -o yaml &> "${ARTIFACT_DIR}/hive_imageset_job.yaml" || true + fi + if INSTALL_JOB_NAME=$(oc get job -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME},hive.openshift.io/install=true" -o name -n ${CLUSTER_NAMESPACE}) && [ "${INSTALL_JOB_NAME}" ] + then + oc logs -c hive -n ${CLUSTER_NAMESPACE} ${INSTALL_JOB_NAME} &> "${ARTIFACT_DIR}/hive_install_job.log" || true + oc get ${INSTALL_JOB_NAME} -n ${CLUSTER_NAMESPACE} -o yaml &> "${ARTIFACT_DIR}/hive_install_job.yaml" || true + fi + echo "************* INSTALL JOB LOG *************" + if oc get clusterprovision -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME}" -o jsonpath='{.items[0].spec.installLog}' &> "${ARTIFACT_DIR}/hive_install_console.log"; then + cat "${ARTIFACT_DIR}/hive_install_console.log" + else + cat "${ARTIFACT_DIR}/hive_install_job.log" + fi + + if [[ "${INSTALL_RESULT}" != "success" ]] + then + mkdir "${ARTIFACT_DIR}/hive" + ${SRC_ROOT}/hack/logextractor.sh ${CLUSTER_NAME} "${ARTIFACT_DIR}/hive" + exit 1 + fi +} \ No newline at end of file diff --git a/hack/e2e-pool-test.sh b/hack/e2e-pool-test.sh new file mode 100755 index 00000000000..4374b0dbbd7 --- /dev/null +++ b/hack/e2e-pool-test.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +set -ex + +TEST_NAME=e2e-pool +source ${0%/*}/e2e-common.sh + +# TODO: Use something better here. +# `make test-e2e-postdeploy` could work, but does more than we need. +echo "Waiting for the deployment to settle" +sleep 120 + +echo "Creating imageset" +IMAGESET_NAME=cis +oc apply -f -< /dev/null + +function count_cds() { + oc get cd -A -o json | jq -r '.items | length' +} + +# Verify no CDs exist yet +NUM_CDS=$(count_cds) +if [[ $NUM_CDS != "0" ]]; then + echo "Got an unexpected number of pre-existing ClusterDeployments." >&2 + echo "Expected 0." >&2 + echo "Got: $NUM_CDS" >&2 + exit 5 +fi + +# Use the CLUSTER_NAME configured by the test as the pool name. This will result in CD names +# being seeded with that as a prefix, which will make them visible to our leak detector. +POOL_NAME=$CLUSTER_NAME + +function cleanup() { + capture_manifests + # Let's save the logs now in case any of the following never finish + echo "Saving hive logs before cleanup" + save_hive_logs + oc delete clusterclaim --all + oc delete clusterpool $POOL_NAME + # Wait indefinitely for all CDs to disappear. If we exceed the test timeout, + # we'll get killed, and resources will leak. + while true; do + sleep ${sleep_between_tries} + NUM_CDS=$(count_cds) + if [[ $NUM_CDS == "0" ]]; then + break + fi + echo "Waiting for $NUM_CDS ClusterDeployment(s) to be cleaned up" + done + # And if we get this far, overwrite the logs with the latest + echo "Saving hive logs after cleanup" + save_hive_logs +} +trap cleanup EXIT + +echo "Creating cluster pool" +# TODO: This can't be changed yet -- see other TODOs (search for 'variable POOL_SIZE') +POOL_SIZE=1 +# TODO: This is aws-specific at the moment. +go run "${SRC_ROOT}/contrib/cmd/hiveutil/main.go" clusterpool create-pool \ + -n "${CLUSTER_NAMESPACE}" \ + --cloud="${CLOUD}" \ + --creds-file="${CREDS_FILE}" \ + --pull-secret-file="${PULL_SECRET_FILE}" \ + --image-set "${IMAGESET_NAME}" \ + --region us-east-1 \ + --size "${POOL_SIZE}" \ + ${POOL_NAME} + +echo "Waiting for pool to create $POOL_SIZE ClusterDeployment(s)" +i=1 +while [[ $i -le ${max_tries} ]]; do + if [[ $i -gt 1 ]]; then + # Don't sleep on first loop + echo "sleeping ${sleep_between_tries} seconds" + sleep ${sleep_between_tries} + fi + + NUM_CDS=$(count_cds) + if [[ $NUM_CDS == "${POOL_SIZE}" ]]; then + echo "Success" + break + else + echo -n "Failed $(NUM_CDS), " + fi + + i=$((i + 1)) +done + +if [[ $i -ge ${max_tries} ]] ; then + # Failed the maximum amount of times. + echo "exiting" + exit 10 +fi + +# Get the CD name & namespace (which should be the same) +# TODO: Set this up for variable POOL_SIZE +CLUSTER_NAME=$(oc get cd -A -o json | jq -r .items[0].metadata.name) + +echo "Waiting for ClusterDeployment $CLUSTER_NAME to finish installing" +# TODO: Set this up for variable POOL_SIZE +i=1 +while [[ $i -le ${max_cluster_deployment_status_checks} ]]; do + CD_JSON=$(oc get cd -n $CLUSTER_NAME $CLUSTER_NAME -o json) + if [[ $(jq .spec.installed <<<"${CD_JSON}") == "true" ]]; then + echo "ClusterDeployment is Installed" + break + fi + PF_COND=$(jq -r '.status.conditions[] | select(.type == "ProvisionFailed")' <<<"${CD_JSON}") + if [[ $(jq -r .status <<<"${PF_COND}") == 'True' ]]; then + FAILURE_REASON=$(jq -r .reason <<<"${PF_COND}") + FAILURE_MESSAGE=$(jq -r .message <<<"${PF_COND}") + echo "ClusterDeployment install failed with reason '$FAILURE_REASON' and message: $FAILURE_MESSAGE" >&2 + capture_cluster_logs $CLUSTER_NAME $CLUSTER_NAME failure + exit 7 + fi + sleep ${sleep_between_cluster_deployment_status_checks} + echo "Still waiting for the ClusterDeployment ${CLUSTER_NAME} to install. Status check #${i}/${max_cluster_deployment_status_checks}... " + i=$((i + 1)) + +done + +function wait_for_hibernation_state() { + local CLUSTER_NAME=$1 + local EXPECTED_STATE=$2 + echo "Waiting for ClusterDeployment $CLUSTER_NAME to be $EXPECTED_STATE" + local i=1 + while [[ $i -le ${max_tries} ]]; do + if [[ $i -gt 1 ]]; then + # Don't sleep on first loop + echo "sleeping ${sleep_between_tries} seconds" + sleep ${sleep_between_tries} + fi + + HIB_COND=$(oc get cd -n $CLUSTER_NAME $CLUSTER_NAME -o json | jq -r '.status.conditions[] | select(.type == "Hibernating")') + if [[ $(jq -r .reason <<<"${HIB_COND}") == $EXPECTED_STATE ]]; then + echo "Success" + break + else + echo -n "Failed, " + fi + + i=$((i + 1)) + done + + if [[ $i -ge ${max_tries} ]] ; then + # Failed the maximum amount of times. + echo "ClusterDeployment $CLUSTER_NAME still not $EXPECTED_STATE" >&2 + echo "Reason: $(jq -r .reason <<<"${HIB_COND}")" >&2 + echo "Message: $(jq -r .message <<<"${HIB_COND}")" >&2 + exit 9 + fi +} + +wait_for_hibernation_state $CLUSTER_NAME Hibernating + +echo "Claiming" +CLAIM_NAME=the-claim +go run "${SRC_ROOT}/contrib/cmd/hiveutil/main.go" clusterpool claim -n $CLUSTER_NAMESPACE $POOL_NAME $CLAIM_NAME + +wait_for_hibernation_state $CLUSTER_NAME Running + +echo "Re-hibernating" +oc patch cd -n $CLUSTER_NAME $CLUSTER_NAME --type=merge -p '{"spec": {"powerState": "Hibernating"}}' + +wait_for_hibernation_state $CLUSTER_NAME Hibernating + +echo "Re-resuming" +oc patch cd -n $CLUSTER_NAME $CLUSTER_NAME --type=merge -p '{"spec": {"powerState": "Running"}}' + +wait_for_hibernation_state $CLUSTER_NAME Running + +# Let the cleanup trap do the cleanup. diff --git a/hack/e2e-test.sh b/hack/e2e-test.sh index 56605263c04..6faba5d2152 100755 --- a/hack/e2e-test.sh +++ b/hack/e2e-test.sh @@ -2,100 +2,8 @@ set -e -max_tries=60 -sleep_between_tries=10 -# Set timeout for the cluster deployment to install -# timeout = sleep_between_cluster_deployment_status_checks * max_cluster_deployment_status_checks -max_cluster_deployment_status_checks=90 -sleep_between_cluster_deployment_status_checks="1m" - -export CLUSTER_NAMESPACE="${CLUSTER_NAMESPACE:-cluster-test}" - -# In CI, HIVE_IMAGE and RELEASE_IMAGE are set via the job's `dependencies`. -if [[ -z "$HIVE_IMAGE" ]]; then - echo "The HIVE_IMAGE environment variable was not found." >&2 - echo "It must be set to the fully-qualified pull spec of a hive container image." >&2 - echo "E.g. quay.io/my-user/hive:latest" >&2 - exit 1 -fi -if [[ -z "$RELEASE_IMAGE" ]]; then - echo "The RELEASE_IMAGE environment variable was not found." >&2 - echo "It must be set to the fully-qualified pull spec of an OCP release container image." >&2 - echo "E.g. quay.io/openshift-release-dev/ocp-release:4.7.0-x86_64" >&2 - exit 1 -fi - -echo "Running e2e with HIVE_IMAGE ${HIVE_IMAGE}" -echo "Running e2e with RELEASE_IMAGE ${RELEASE_IMAGE}" - -i=1 -while [ $i -le ${max_tries} ]; do - if [ $i -gt 1 ]; then - # Don't sleep on first loop - echo "sleeping ${sleep_between_tries} seconds" - sleep ${sleep_between_tries} - fi - - echo -n "Creating namespace ${CLUSTER_NAMESPACE}. Try #${i}/${max_tries}... " - if oc create namespace "${CLUSTER_NAMESPACE}"; then - echo "Success" - break - else - echo -n "Failed, " - fi - - i=$((i + 1)) -done - -ORIGINAL_NAMESPACE=$(oc config view -o json | jq -er 'select(.contexts[].name == ."current-context") | .contexts[]?.context.namespace // ""') -echo Original default namespace is ${ORIGINAL_NAMESPACE} -echo Setting default namespace to ${CLUSTER_NAMESPACE} -if ! oc config set-context --current --namespace=${CLUSTER_NAMESPACE}; then - echo "Failed to set the default namespace" - exit 1 -fi - -function restore_default_namespace() { - echo Restoring default namespace to ${ORIGINAL_NAMESPACE} - oc config set-context --current --namespace=${ORIGINAL_NAMESPACE} -} -trap 'restore_default_namespace' EXIT - -if [ $i -ge ${max_tries} ] ; then - # Failed the maximum amount of times. - echo "exiting" - exit 10 -fi - -CLUSTER_PROFILE_DIR="${CLUSTER_PROFILE_DIR:-/tmp/cluster}" - -# Create a new cluster deployment -CLOUD="${CLOUD:-aws}" -export CLUSTER_NAME="${CLUSTER_NAME:-hive-$(uuidgen | tr '[:upper:]' '[:lower:]')}" -export ARTIFACT_DIR="${ARTIFACT_DIR:-/tmp}" - -SSH_PUBLIC_KEY_FILE="${SSH_PUBLIC_KEY_FILE:-${CLUSTER_PROFILE_DIR}/ssh-publickey}" -# If not specified or nonexistent, generate a keypair to use -if ! [[ -s "${SSH_PUBLIC_KEY_FILE}" ]]; then - echo "Specified SSH public key file '${SSH_PUBLIC_KEY_FILE}' is invalid or nonexistent. Generating a single-use keypair." - WHERE=${SSH_PUBLIC_KEY_FILE%/*} - mkdir -p ${WHERE} - # Tell the installmanager where to find the private key - export SSH_PRIV_KEY_PATH=$(mktemp -p ${WHERE}) - # ssh-keygen will put the public key here - TMP_PUB=${SSH_PRIV_KEY_PATH}.pub - # Answer 'y' to the overwrite prompt, since we touched the file - yes y | ssh-keygen -q -t rsa -N '' -f ${SSH_PRIV_KEY_PATH} - # Now put the pubkey where we expected it - mv ${TMP_PUB} ${SSH_PUBLIC_KEY_FILE} -fi - -PULL_SECRET_FILE="${PULL_SECRET_FILE:-${CLUSTER_PROFILE_DIR}/pull-secret}" -export HIVE_NS="hive-e2e" -export HIVE_OPERATOR_NS="hive-operator" - -# Install Hive -IMG="${HIVE_IMAGE}" make deploy +TEST_NAME=e2e +source ${0%/*}/e2e-common.sh function teardown() { @@ -128,60 +36,11 @@ function teardown() { } trap 'teardown' EXIT -function save_hive_logs() { - oc logs -n "${HIVE_NS}" deployment/hive-controllers > "${ARTIFACT_DIR}/hive-controllers.log" - oc logs -n "${HIVE_NS}" deployment/hiveadmission > "${ARTIFACT_DIR}/hiveadmission.log" -} - echo "Running post-deploy tests" make test-e2e-postdeploy -SRC_ROOT=$(git rev-parse --show-toplevel) - -USE_MANAGED_DNS=${USE_MANAGED_DNS:-true} - -case "${CLOUD}" in -"aws") - CREDS_FILE="${CLUSTER_PROFILE_DIR}/.awscred" - # Accept creds from the env if the file doesn't exist. - if ! [[ -f $CREDS_FILE ]] && [[ -n "${AWS_ACCESS_KEY_ID}" ]] && [[ -n "${AWS_SECRET_ACCESS_KEY}" ]]; then - # TODO: Refactor contrib/pkg/adm/managedns/enable::generateAWSCredentialsSecret to - # use contrib/pkg/utils/aws/aws::GetAWSCreds, which knows how to look for the env - # vars if the file isn't specified; and use this condition to generate (or not) - # the whole CREDS_FILE_ARG="--creds-file=${CREDS_FILE}". - printf '[default]\naws_access_key_id=%s\naws_secret_access_key=%s\n' "$AWS_ACCESS_KEY_ID" "$AWS_SECRET_ACCESS_KEY" > $CREDS_FILE - fi - BASE_DOMAIN="${BASE_DOMAIN:-hive-ci.openshift.com}" - EXTRA_CREATE_CLUSTER_ARGS="--aws-user-tags expirationDate=$(date -d '4 hours' --iso=minutes --utc)" - ;; -"azure") - CREDS_FILE="${CLUSTER_PROFILE_DIR}/osServicePrincipal.json" - BASE_DOMAIN="${BASE_DOMAIN:-ci.azure.devcluster.openshift.com}" - ;; -"gcp") - CREDS_FILE="${CLUSTER_PROFILE_DIR}/gce.json" - BASE_DOMAIN="${BASE_DOMAIN:-origin-ci-int-gce.dev.openshift.com}" - ;; -*) - echo "unknown cloud: ${CLOUD}" - exit 1 - ;; -esac - -if $USE_MANAGED_DNS; then - # Generate a short random shard string for this cluster similar to OSD prod. - # This is to prevent name conflicts across customer clusters. - CLUSTER_SHARD=$(cat /dev/urandom | tr -dc 'a-z' | fold -w 8 | head -n 1) - CLUSTER_DOMAIN="${CLUSTER_SHARD}.${BASE_DOMAIN}" - go run "${SRC_ROOT}/contrib/cmd/hiveutil/main.go" adm manage-dns enable ${BASE_DOMAIN} \ - --creds-file="${CREDS_FILE}" --cloud="${CLOUD}" - MANAGED_DNS_ARG=" --manage-dns" -else - CLUSTER_DOMAIN="${BASE_DOMAIN}" -fi - +export CLUSTER_NAME="${CLUSTER_NAME:-hive-$(uuidgen | tr '[:upper:]' '[:lower:]')}" -echo "Using cluster base domain: ${CLUSTER_DOMAIN}" echo "Creating cluster deployment" go run "${SRC_ROOT}/contrib/cmd/hiveutil/main.go" create-cluster "${CLUSTER_NAME}" \ --cloud="${CLOUD}" \ @@ -280,33 +139,8 @@ case "${INSTALL_RESULT}" in ;; esac -# Capture install logs -if IMAGESET_JOB_NAME=$(oc get job -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME},hive.openshift.io/imageset=true" -o name -n ${CLUSTER_NAMESPACE}) && [ "${IMAGESET_JOB_NAME}" ] -then - oc logs -c hive -n ${CLUSTER_NAMESPACE} ${IMAGESET_JOB_NAME} &> "${ARTIFACT_DIR}/hive_imageset_job.log" || true - oc get ${IMAGESET_JOB_NAME} -n ${CLUSTER_NAMESPACE} -o yaml &> "${ARTIFACT_DIR}/hive_imageset_job.yaml" || true -fi -if INSTALL_JOB_NAME=$(oc get job -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME},hive.openshift.io/install=true" -o name -n ${CLUSTER_NAMESPACE}) && [ "${INSTALL_JOB_NAME}" ] -then - oc logs -c hive -n ${CLUSTER_NAMESPACE} ${INSTALL_JOB_NAME} &> "${ARTIFACT_DIR}/hive_install_job.log" || true - oc get ${INSTALL_JOB_NAME} -n ${CLUSTER_NAMESPACE} -o yaml &> "${ARTIFACT_DIR}/hive_install_job.yaml" || true -fi -oc get clusterdeployment -A -o yaml &> "${ARTIFACT_DIR}/hive_clusterdeployment.yaml" || true -oc get clusterimageset -o yaml &> "${ARTIFACT_DIR}/hive_clusterimagesets.yaml" || true -oc get clusterprovision -A -o yaml &> "${ARTIFACT_DIR}/hive_clusterprovision.yaml" || true -echo "************* INSTALL JOB LOG *************" -if oc get clusterprovision -l "hive.openshift.io/cluster-deployment-name=${CLUSTER_NAME}" -o jsonpath='{.items[0].spec.installLog}' &> "${ARTIFACT_DIR}/hive_install_console.log"; then - cat "${ARTIFACT_DIR}/hive_install_console.log" -else - cat "${ARTIFACT_DIR}/hive_install_job.log" -fi - -if [[ "${INSTALL_RESULT}" != "success" ]] -then - mkdir "${ARTIFACT_DIR}/hive" - ${SRC_ROOT}/hack/logextractor.sh ${CLUSTER_NAME} "${ARTIFACT_DIR}/hive" - exit 1 -fi +capture_manifests +capture_cluster_logs $CLUSTER_NAME $CLUSTER_NAMESPACE $INSTALL_RESULT echo "Running post-install tests" make test-e2e-postinstall