Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 43 additions & 5 deletions scripts/test_providers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ for arg in "$@"; do
esac
done

# Flaky models that are allowed to fail without failing the entire test run.
# These are typically preview/experimental models with inconsistent tool-calling behavior.
# Failures are still reported but don't block PRs.
ALLOWED_FAILURES=(
"google:gemini-3-pro-preview"
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
)

if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
Expand Down Expand Up @@ -71,7 +79,20 @@ else
fi
echo ""

is_allowed_failure() {
local provider="$1"
local model="$2"
local key="${provider}:${model}"
for allowed in "${ALLOWED_FAILURES[@]}"; do
if [ "$allowed" = "$key" ]; then
return 0
fi
done
return 1
}

RESULTS=()
HARD_FAILURES=()

for provider_config in "${PROVIDERS[@]}"; do
# Split on " -> " to get provider and models
Expand All @@ -94,8 +115,14 @@ for provider_config in "${PROVIDERS[@]}"; do
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
RESULTS+=("✓ ${PROVIDER}: ${MODEL}")
else
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
if is_allowed_failure "$PROVIDER" "$MODEL"; then
echo "⚠ FLAKY: Test failed but model is in allowed failures list - $FAILURE_MSG"
RESULTS+=("⚠ ${PROVIDER}: ${MODEL} (flaky)")
else
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
HARD_FAILURES+=("${PROVIDER}: ${MODEL}")
fi
fi
rm "$TMPFILE"
rm -rf "$TESTDIR"
Expand All @@ -107,11 +134,22 @@ echo "=== Test Summary ==="
for result in "${RESULTS[@]}"; do
echo "$result"
done
if echo "${RESULTS[@]}" | grep -q "✗"; then

if [ ${#HARD_FAILURES[@]} -gt 0 ]; then
echo ""
echo "Hard failures (${#HARD_FAILURES[@]}):"
for failure in "${HARD_FAILURES[@]}"; do
echo " - $failure"
done
echo ""
echo "Some tests failed!"
exit 1
else
echo ""
echo "All tests passed!"
if echo "${RESULTS[@]}" | grep -q "⚠"; then
echo ""
echo "All required tests passed! (some flaky tests failed but are allowed)"
else
echo ""
echo "All tests passed!"
fi
fi
Loading