diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index 2e3d380e7eb2..399278f78ebb 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -95,11 +95,21 @@ jobs: - name: Make Binary Executable run: chmod +x target/debug/goose + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Install agentic providers + run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli + - name: Run Smoke Tests with Provider Script env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }} DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} @@ -171,11 +181,21 @@ jobs: - name: Make Binary Executable run: chmod +x target/debug/goose + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: '22' + + - name: Install agentic providers + run: npm install -g @anthropic-ai/claude-code @openai/codex @google/gemini-cli + - name: Run Provider Tests (Code Execution Mode) env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + CODEX_API_KEY: ${{ secrets.OPENAI_API_KEY }} GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GOOGLE_API_KEY }} DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 638d74eb8f27..971c06cc8da2 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -25,6 +25,10 @@ ALLOWED_FAILURES=( "openrouter:nvidia/nemotron-3-nano-30b-a3b" ) +# Agentic providers handle tools internally and return text results. +# They can't produce the normal tool-call log patterns (e.g. "shell | developer"). +AGENTIC_PROVIDERS=("claude-code" "codex" "gemini-cli" "cursor-agent") + if [ -f .env ]; then export $(grep -v '^#' .env | xargs) fi @@ -40,6 +44,13 @@ fi SCRIPT_DIR=$(pwd) +# Create a test file with known content in the current directory +# This cannot be /tmp as some agents cannot work outside the PWD +mkdir -p target +TEST_CONTENT="test-content-abc123" +TEST_FILE="./target/test-content.txt" +echo "$TEST_CONTENT" > "$TEST_FILE" + # Format: "provider -> model1|model2|model3" # Base providers that are always tested (with appropriate env vars) PROVIDERS=( @@ -224,6 +235,16 @@ should_skip_provider() { return 1 } +is_agentic_provider() { + local provider="$1" + for agentic in "${AGENTIC_PROVIDERS[@]}"; do + if [ "$agentic" = "$provider" ]; then + return 0 + fi + done + return 1 +} + # Create temp directory for results RESULTS_DIR=$(mktemp -d) trap "rm -rf $RESULTS_DIR" EXIT @@ -241,17 +262,34 @@ run_test() { local output_file="$4" local testdir=$(mktemp -d) - echo "hello" > "$testdir/hello.txt" + + # Agentic providers use a file-read prompt with known content marker; + # regular providers use the shell prompt that produces tool-call logs. + local prompt + if is_agentic_provider "$provider"; then + cp "$TEST_FILE" "$testdir/test-content.txt" + prompt="read ./test-content.txt and output its contents exactly" + else + echo "hello" > "$testdir/hello.txt" + prompt="Immediately use the shell tool to run 'ls'. Do not ask for confirmation." + fi # Run the test and capture output ( export GOOSE_PROVIDER="$provider" export GOOSE_MODEL="$model" - cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1 + cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "$prompt" --with-builtin "$BUILTINS" 2>&1 ) > "$output_file" 2>&1 - # Check result - if grep -qE "$SUCCESS_PATTERN" "$output_file"; then + # Check result: agentic providers return text containing the test content + # instead of producing tool-call log patterns + if is_agentic_provider "$provider"; then + if grep -qi "$TEST_CONTENT" "$output_file"; then + echo "success" > "$result_file" + else + echo "failure" > "$result_file" + fi + elif grep -qE "$SUCCESS_PATTERN" "$output_file"; then echo "success" > "$result_file" else echo "failure" > "$result_file" @@ -273,6 +311,12 @@ for provider_config in "${PROVIDERS[@]}"; do continue fi + # Agentic providers don't use goose's code_execution system + if [ "$CODE_EXEC_MODE" = true ] && is_agentic_provider "$PROVIDER"; then + echo "⊘ Skipping agentic provider in code_exec mode: ${PROVIDER}" + continue + fi + IFS='|' read -ra MODELS <<< "$MODELS_STR" for MODEL in "${MODELS[@]}"; do JOBS+=("$PROVIDER|$MODEL|$job_index")