From 44d75cb92cc0bbda644822ea62ba5ddadb395567 Mon Sep 17 00:00:00 2001 From: Zane Staggs Date: Thu, 22 Jan 2026 08:50:38 -0800 Subject: [PATCH] pass flaky providers --- scripts/test_providers.sh | 48 +++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index a20b709ed222..43355a5cdf3a 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -13,6 +13,14 @@ for arg in "$@"; do esac done +# Flaky models that are allowed to fail without failing the entire test run. +# These are typically preview/experimental models with inconsistent tool-calling behavior. +# Failures are still reported but don't block PRs. +ALLOWED_FAILURES=( + "google:gemini-3-pro-preview" + "openrouter:nvidia/nemotron-3-nano-30b-a3b" +) + if [ -f .env ]; then export $(grep -v '^#' .env | xargs) fi @@ -71,7 +79,20 @@ else fi echo "" +is_allowed_failure() { + local provider="$1" + local model="$2" + local key="${provider}:${model}" + for allowed in "${ALLOWED_FAILURES[@]}"; do + if [ "$allowed" = "$key" ]; then + return 0 + fi + done + return 1 +} + RESULTS=() +HARD_FAILURES=() for provider_config in "${PROVIDERS[@]}"; do # Split on " -> " to get provider and models @@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do echo "✓ SUCCESS: Test passed - $SUCCESS_MSG" RESULTS+=("✓ ${PROVIDER}: ${MODEL}") else - echo "✗ FAILED: Test failed - $FAILURE_MSG" - RESULTS+=("✗ ${PROVIDER}: ${MODEL}") + if is_allowed_failure "$PROVIDER" "$MODEL"; then + echo "⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG" + RESULTS+=("⚠ ${PROVIDER}: ${MODEL} (flaky)") + else + echo "✗ FAILED: Test failed - $FAILURE_MSG" + RESULTS+=("✗ ${PROVIDER}: ${MODEL}") + HARD_FAILURES+=("${PROVIDER}: ${MODEL}") + fi fi rm "$TMPFILE" rm -rf "$TESTDIR" @@ -107,11 +134,22 @@ echo "=== Test Summary ===" for result in "${RESULTS[@]}"; do echo "$result" done -if echo "${RESULTS[@]}" | grep -q "✗"; then + +if [ ${#HARD_FAILURES[@]} -gt 0 ]; then + echo "" + echo "Hard failures (${#HARD_FAILURES[@]}):" + for failure in "${HARD_FAILURES[@]}"; do + echo " - $failure" + done echo "" echo "Some tests failed!" exit 1 else - echo "" - echo "All tests passed!" + if echo "${RESULTS[@]}" | grep -q "⚠"; then + echo "" + echo "All required tests passed! (some flaky tests failed but are allowed)" + else + echo "" + echo "All tests passed!" + fi fi