Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/pr-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,21 @@ jobs:
- name: Make Binary Executable
run: chmod +x target/debug/goose

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22'

- name: Install agentic providers
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli

- name: Run Smoke Tests with Provider Script
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
Expand Down Expand Up @@ -171,11 +181,21 @@ jobs:
- name: Make Binary Executable
run: chmod +x target/debug/goose

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: '22'

- name: Install agentic providers
run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli

- name: Run Provider Tests (Code Execution Mode)
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
Comment thread
codefromthecrypt marked this conversation as resolved.
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
Expand Down
52 changes: 48 additions & 4 deletions scripts/test_providers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ ALLOWED_FAILURES=(
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
)

# Agentic providers handle tools internally and return text results.
# They can't produce the normal tool-call log patterns (e.g. "shell | developer").
AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent")

if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
Expand All @@ -40,6 +44,13 @@ fi

SCRIPT_DIR=$(pwd)

# Create a test file with known content in the current directory
# This cannot be /tmp as some agents cannot work outside the PWD
mkdir -p target
TEST_CONTENT="test-content-abc123"
TEST_FILE="./target/test-content.txt"
echo "$TEST_CONTENT" > "$TEST_FILE"

Comment thread
codefromthecrypt marked this conversation as resolved.
# Format: "provider -> model1|model2|model3"
# Base providers that are always tested (with appropriate env vars)
PROVIDERS=(
Expand Down Expand Up @@ -224,6 +235,16 @@ should_skip_provider() {
return 1
}

is_agentic_provider() {
local provider="$1"
for agentic in "${AGENTIC_PROVIDERS[@]}"; do
if [ "$agentic" = "$provider" ]; then
return 0
fi
done
return 1
}

# Create temp directory for results
RESULTS_DIR=$(mktemp -d)
trap "rm -rf $RESULTS_DIR" EXIT
Expand All @@ -241,17 +262,34 @@ run_test() {
local output_file="$4"

local testdir=$(mktemp -d)
echo "hello" > "$testdir/hello.txt"

# Agentic providers use a file-read prompt with known content marker;
# regular providers use the shell prompt that produces tool-call logs.
local prompt
if is_agentic_provider "$provider"; then
cp "$TEST_FILE" "$testdir/test-content.txt"
prompt="read ./test-content.txt and output its contents exactly"
else
echo "hello" > "$testdir/hello.txt"
prompt="Immediately use the shell tool to run 'ls'. Do not ask for confirmation."
fi

# Run the test and capture output
(
export GOOSE_PROVIDER="$provider"
export GOOSE_MODEL="$model"
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1
cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1
) > "$output_file" 2>&1

# Check result
if grep -qE "$SUCCESS_PATTERN" "$output_file"; then
# Check result: agentic providers return text containing the test content
# instead of producing tool-call log patterns
if is_agentic_provider "$provider"; then
if grep -qi "$TEST_CONTENT" "$output_file"; then
echo "success" > "$result_file"
else
echo "failure" > "$result_file"
fi
elif grep -qE "$SUCCESS_PATTERN" "$output_file"; then
echo "success" > "$result_file"
else
echo "failure" > "$result_file"
Expand All @@ -273,6 +311,12 @@ for provider_config in "${PROVIDERS[@]}"; do
continue
fi

# Agentic providers don't use goose's code_execution system
if [ "$CODE_EXEC_MODE" = true ] && is_agentic_provider "$PROVIDER"; then
echo "⊘ Skipping agentic provider in code_exec mode: ${PROVIDER}"
continue
fi

IFS='|' read -ra MODELS <<< "$MODELS_STR"
for MODEL in "${MODELS[@]}"; do
JOBS+=("$PROVIDER|$MODEL|$job_index")
Expand Down
Loading