Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion tests/e2e/common-operator-integ-suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,43 @@ initialize_variables() {
if [ "${OCP}" == "true" ]; then COMMAND="oc"; fi
}

check_cluster_operators() {
# This function is only relevant for OCP clusters
if [ "${OCP}" != "true" ]; then
echo "Skipping ClusterOperator check on non-OCP cluster."
return 0
fi

# Check if jq is installed
if ! command -v jq &> /dev/null; then
echo "ERROR: jq is required for the cluster operator health check. Please install jq."
exit 1
fi

local timeout_seconds=600
echo "Validating OpenShift cluster operators are stable..."
local end_time=$(( $(date +%s) + timeout_seconds ))

while [ "$(date +%s)" -lt $end_time ]; do
# This command uses jq to count operators that are not Available, or are Progressing, or are Degraded.
# A healthy cluster should have a count of 0.
local unstable_operators
unstable_operators=$(oc get clusteroperator -o json | jq '[.items[] | select(.status.conditions[] | (.type == "Available" and .status == "False") or (.type == "Progressing" and .status == "True") or (.type == "Degraded" and .status == "True"))] | length')

if [[ $unstable_operators -eq 0 ]]; then
echo "All cluster operators are stable."
return 0
fi

echo -n "."
sleep 15
done

echo "ERROR: Timeout reached. Not all cluster operators are stable."
oc get clusteroperator # Print the final status for debugging
exit 1
}

install_operator() {
echo "Installing sail-operator (KUBECONFIG=${KUBECONFIG})"
"${COMMAND}" create namespace "${NAMESPACE}"
Expand Down Expand Up @@ -250,6 +287,10 @@ if [ "${OLM}" != "true" ] && [ "${SKIP_DEPLOY}" != "true" ]; then
fi
fi

# Check that all cluster operators are stable before running the tests. This only applies to OCP clusters.
# This is to avoid test failures due to cluster instability.
check_cluster_operators

set +e
# Disable to avoid failing the test run before generating the report.xml
# Capture the test exit code and allow cleanup via trap to run
Expand All @@ -259,4 +300,4 @@ go run github.com/onsi/ginkgo/v2/ginkgo -tags e2e \
--timeout 60m --junit-report="${ARTIFACTS}/report.xml" ${GINKGO_FLAGS:-} "${WD}"/...
TEST_EXIT_CODE=$?

exit "${TEST_EXIT_CODE}"
exit "${TEST_EXIT_CODE}"