From c8f118bc876fef63ff558eee95fdfb3724c5dd2d Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 12:21:25 +1100 Subject: [PATCH 1/6] adding more open models Looking for reliably passing runs, shifting to "exacto" as random providers for qwen3 aren't always the same. --- scripts/test_providers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index e8fdc28fb005..a8ae9cccc9d6 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -29,7 +29,7 @@ fi SCRIPT_DIR=$(pwd) PROVIDERS=( - "openrouter:google/gemini-2.5-pro:google/gemini-2.5-flash:anthropic/claude-sonnet-4.5:qwen/qwen3-coder:z-ai/glm-4.6" + "openrouter:google/gemini-2.5-pro:google/gemini-2.5-flash:anthropic/claude-sonnet-4.5:qwen/qwen3-coder:exacto:z-ai/glm-4.6:qwen/qwen3-next-80b-a3b-instruct:nvidia/nemotron-3-nano-30b-a3b" "xai:grok-3" "openai:gpt-4o:gpt-4o-mini:gpt-3.5-turbo:gpt-5" "anthropic:claude-sonnet-4-5-20250929:claude-opus-4-1-20250805" From 6fa89f8a800595146c219cc76ee0353fd92950e8 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 12:34:51 +1100 Subject: [PATCH 2/6] trying some other formatting --- scripts/test_providers.sh | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index a8ae9cccc9d6..07d71b7bc183 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -28,13 +28,14 @@ fi SCRIPT_DIR=$(pwd) +# Format: "provider -> model1|model2|model3" PROVIDERS=( - "openrouter:google/gemini-2.5-pro:google/gemini-2.5-flash:anthropic/claude-sonnet-4.5:qwen/qwen3-coder:exacto:z-ai/glm-4.6:qwen/qwen3-next-80b-a3b-instruct:nvidia/nemotron-3-nano-30b-a3b" - "xai:grok-3" - "openai:gpt-4o:gpt-4o-mini:gpt-3.5-turbo:gpt-5" - "anthropic:claude-sonnet-4-5-20250929:claude-opus-4-1-20250805" - "google:gemini-2.5-pro:gemini-2.5-flash:gemini-3-pro-preview:gemini-3-flash-preview" - "tetrate:claude-sonnet-4-20250514" + "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder|exacto|z-ai/glm-4.6|qwen/qwen3-next-80b-a3b-instruct|nvidia/nemotron-3-nano-30b-a3b" + "xai -> grok-3" + "openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5" + "anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-1-20250805" + "google -> gemini-2.5-pro|gemini-2.5-flash|gemini-3-pro-preview|gemini-3-flash-preview" + "tetrate -> claude-sonnet-4-20250514" ) # In CI, only run Databricks tests if DATABRICKS_HOST and DATABRICKS_TOKEN are set @@ -42,13 +43,13 @@ PROVIDERS=( if [ -n "$CI" ]; then if [ -n "$DATABRICKS_HOST" ] && [ -n "$DATABRICKS_TOKEN" ]; then echo "✓ Including Databricks tests" - PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o") + PROVIDERS+=("databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o") else echo "⚠️ Skipping Databricks tests (DATABRICKS_HOST and DATABRICKS_TOKEN required in CI)" fi else echo "✓ Including Databricks tests" - PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o") + PROVIDERS+=("databricks -> databricks-claude-sonnet-4|gemini-2-5-flash|gpt-4o") fi # Configure mode-specific settings @@ -73,10 +74,12 @@ echo "" RESULTS=() for provider_config in "${PROVIDERS[@]}"; do - IFS=':' read -ra PARTS <<< "$provider_config" - PROVIDER="${PARTS[0]}" - for i in $(seq 1 $((${#PARTS[@]} - 1))); do - MODEL="${PARTS[$i]}" + # Split on " -> " to get provider and models + PROVIDER="${provider_config%% -> *}" + MODELS_STR="${provider_config#* -> }" + # Split models on "|" + IFS='|' read -ra MODELS <<< "$MODELS_STR" + for MODEL in "${MODELS[@]}"; do export GOOSE_PROVIDER="$PROVIDER" export GOOSE_MODEL="$MODEL" TESTDIR=$(mktemp -d) From 0cd58de233539597599728831b2d4266a7f99d80 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 12:36:40 +1100 Subject: [PATCH 3/6] really want exacto variant of qwen3 --- scripts/test_providers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 07d71b7bc183..caf2d3857f50 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -30,7 +30,7 @@ SCRIPT_DIR=$(pwd) # Format: "provider -> model1|model2|model3" PROVIDERS=( - "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder|exacto|z-ai/glm-4.6|qwen/qwen3-next-80b-a3b-instruct|nvidia/nemotron-3-nano-30b-a3b" + "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6|qwen/qwen3-next-80b-a3b-instruct|nvidia/nemotron-3-nano-30b-a3b" "xai -> grok-3" "openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5" "anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-1-20250805" From c92cee24761cb2429372a427d7a67558e8b193fc Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 13:28:59 +1100 Subject: [PATCH 4/6] removing this one --- scripts/test_providers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index caf2d3857f50..974fb2d381ef 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -30,7 +30,7 @@ SCRIPT_DIR=$(pwd) # Format: "provider -> model1|model2|model3" PROVIDERS=( - "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6|qwen/qwen3-next-80b-a3b-instruct|nvidia/nemotron-3-nano-30b-a3b" + "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6|nvidia/nemotron-3-nano-30b-a3b" "xai -> grok-3" "openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5" "anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-1-20250805" From 6538de2e02fec15ab77bf6f8cc7f3e631acc9bdf Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 17:39:33 +1100 Subject: [PATCH 5/6] use exacto for glm 4.6 as well --- scripts/test_providers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 974fb2d381ef..0902b4071f4e 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -30,7 +30,7 @@ SCRIPT_DIR=$(pwd) # Format: "provider -> model1|model2|model3" PROVIDERS=( - "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6|nvidia/nemotron-3-nano-30b-a3b" + "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b" "xai -> grok-3" "openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5" "anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-1-20250805" From d022ce00ac4468537dfa1061167c88280ae49627 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 30 Dec 2025 17:58:41 +1100 Subject: [PATCH 6/6] gemini flash already covered by google provider --- scripts/test_providers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 0902b4071f4e..93d25149e8d8 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -30,7 +30,7 @@ SCRIPT_DIR=$(pwd) # Format: "provider -> model1|model2|model3" PROVIDERS=( - "openrouter -> google/gemini-2.5-pro|google/gemini-2.5-flash|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b" + "openrouter -> google/gemini-2.5-pro|anthropic/claude-sonnet-4.5|qwen/qwen3-coder:exacto|z-ai/glm-4.6:exacto|nvidia/nemotron-3-nano-30b-a3b" "xai -> grok-3" "openai -> gpt-4o|gpt-4o-mini|gpt-3.5-turbo|gpt-5" "anthropic -> claude-sonnet-4-5-20250929|claude-opus-4-1-20250805"