Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/pr-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,40 @@ jobs:
SKIP_BUILD: 1
run: |
bash scripts/test_compaction.sh

smoke-tests-code-exec:
name: Smoke Tests (Code Execution)
runs-on: ubuntu-latest
needs: build-binary
steps:
- name: Checkout Code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # pin@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}

- name: Download Binary
uses: actions/download-artifact@v4
with:
name: goose-binary
path: target/release

- name: Make Binary Executable
run: chmod +x target/release/goose

- name: Run Provider Tests (Code Execution Mode)
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }}
HOME: /tmp/goose-home
GOOSE_DISABLE_KEYRING: 1
SKIP_BUILD: 1
run: |
mkdir -p $HOME/.local/share/goose/sessions
mkdir -p $HOME/.config/goose
bash scripts/test_providers.sh --code-exec
39 changes: 35 additions & 4 deletions scripts/test_providers.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
#!/bin/bash
# Test providers with optional code_execution mode
# Usage:
# ./test_providers.sh # Normal mode (direct tool calls)
# ./test_providers.sh --code-exec # Code execution mode (JS batching)

CODE_EXEC_MODE=false
for arg in "$@"; do
case $arg in
--code-exec)
CODE_EXEC_MODE=true
;;
Copy link

Copilot AI Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The shift command inside the loop will cause the loop to skip arguments. Since you're iterating over "$@" with "for arg in", the shift modifies the positional parameters but doesn't affect the loop iteration. Remove the shift command as it serves no purpose here and could cause unexpected behavior if additional arguments are added in the future.

Suggested change
;;

Copilot uses AI. Check for mistakes.
esac
done

if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
Expand Down Expand Up @@ -37,6 +51,23 @@ else
PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
fi

# Configure mode-specific settings
if [ "$CODE_EXEC_MODE" = true ]; then
echo "Mode: code_execution (JS batching)"
BUILTINS="developer,code_execution"
# Match "execute_code | code_execution" or "read_module | code_execution" in output
SUCCESS_PATTERN="(execute_code \| code_execution)|(read_module \| code_execution)"
SUCCESS_MSG="code_execution tool called"
FAILURE_MSG="no code_execution tools called"
else
echo "Mode: normal (direct tool calls)"
BUILTINS="developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager"
SUCCESS_PATTERN="shell \| developer"
SUCCESS_MSG="developer tool called"
FAILURE_MSG="no developer tools called"
fi
echo ""

RESULTS=()

for provider_config in "${PROVIDERS[@]}"; do
Expand All @@ -52,13 +83,13 @@ for provider_config in "${PROVIDERS[@]}"; do
echo "Model: ${MODEL}"
echo ""
TMPFILE=$(mktemp)
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin developer,autovisualiser,computercontroller,tutorial,todo,extensionmanager 2>&1) | tee "$TMPFILE"
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "please list files in the current directory" --with-builtin "$BUILTINS" 2>&1) | tee "$TMPFILE"
echo ""
if grep -q "shell | developer" "$TMPFILE"; then
echo "✓ SUCCESS: Test passed - developer tool called"
if grep -qE "$SUCCESS_PATTERN" "$TMPFILE"; then
echo "✓ SUCCESS: Test passed - $SUCCESS_MSG"
RESULTS+=("✓ ${PROVIDER}: ${MODEL}")
else
echo "✗ FAILED: Test failed - no developer tools called"
echo "✗ FAILED: Test failed - $FAILURE_MSG"
RESULTS+=("✗ ${PROVIDER}: ${MODEL}")
fi
rm "$TMPFILE"
Expand Down
Loading