Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .github/workflows/pr-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ jobs:
# Run the provider test script (binary already built and downloaded)
bash scripts/test_providers.sh

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install uv
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0

- name: Run MCP Tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
Expand Down Expand Up @@ -143,14 +151,6 @@ jobs:
run: |
bash scripts/test_subrecipes.sh

- name: Set up Python (for error proxy)
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'

- name: Install uv (for error proxy)
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0

- name: Run Compaction Tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
Expand Down
141 changes: 78 additions & 63 deletions scripts/test_mcp.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
set -e

if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..."
Expand All @@ -13,51 +11,73 @@ else
fi

SCRIPT_DIR=$(pwd)
GOOSE_BIN="$SCRIPT_DIR/target/release/goose"

JUDGE_PROVIDER=${GOOSE_JUDGE_PROVIDER:-openrouter}
JUDGE_MODEL=${GOOSE_JUDGE_MODEL:-google/gemini-2.5-flash}
TEST_PROVIDER=${GOOSE_PROVIDER:-anthropic}
TEST_MODEL=${GOOSE_MODEL:-claude-haiku-4-5-20251001}
MCP_SAMPLING_TOOL="trigger-sampling-request"

PROVIDERS=(
#"google:gemini-2.5-pro"
"anthropic:claude-haiku-4-5-20251001"
#"openrouter:google/gemini-2.5-pro"
#"openai:gpt-5-mini"
)
RESULTS=()

TESTDIR=$(mktemp -d)

cat > "$TESTDIR/test_mcp.py" << 'EOF'
from typing import Annotated
from fastmcp import FastMCP

mcp = FastMCP("test_server")

@mcp.tool
def add(
a: Annotated[float, "First number"],
b: Annotated[float, "Second number"],
) -> Annotated[float, "Sum of the two numbers"]:
"""Add two numbers."""
return a + b
EOF

cat > "$TESTDIR/recipe.yaml" << 'EOF'
title: FastMCP Test
description: Test that FastMCP servers with stderr banners work
prompt: Use the add tool to calculate 42 + 58
extensions:
- name: test_mcp
cmd: uv
args:
- run
- --with
- fastmcp
- fastmcp
- run
- test_mcp.py
type: stdio
EOF

# In CI, only run Databricks tests if DATABRICKS_HOST and DATABRICKS_TOKEN are set
# Locally, always run Databricks tests
if [ -n "$CI" ]; then
if [ -n "$DATABRICKS_HOST" ] && [ -n "$DATABRICKS_TOKEN" ]; then
echo "✓ Including Databricks tests"
PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
else
echo "⚠️ Skipping Databricks tests (DATABRICKS_HOST and DATABRICKS_TOKEN required in CI)"
fi
TMPFILE=$(mktemp)
(cd "$TESTDIR" && GOOSE_PROVIDER="$TEST_PROVIDER" GOOSE_MODEL="$TEST_MODEL" \
"$GOOSE_BIN" run --recipe recipe.yaml 2>&1) | tee "$TMPFILE"

if grep -q "add | test_mcp" "$TMPFILE" && grep -q "100" "$TMPFILE"; then
echo "✓ FastMCP stderr test passed"
RESULTS+=("✓ FastMCP stderr")
else
echo "✓ Including Databricks tests"
PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
echo "✗ FastMCP stderr test failed"
RESULTS+=("✗ FastMCP stderr")
fi

RESULTS=()
rm "$TMPFILE"
rm -rf "$TESTDIR"
echo ""

TESTDIR=$(mktemp -d)
TMPFILE=$(mktemp)

(cd "$TESTDIR" && GOOSE_PROVIDER="$TEST_PROVIDER" GOOSE_MODEL="$TEST_MODEL" \
"$GOOSE_BIN" run --text "Use the sampleLLM tool to ask for a quote from The Great Gatsby" \
--with-extension "npx -y @modelcontextprotocol/server-everything@2026.1.14" 2>&1) | tee "$TMPFILE"

for provider_config in "${PROVIDERS[@]}"; do
IFS=':' read -ra PARTS <<< "$provider_config"
PROVIDER="${PARTS[0]}"
for i in $(seq 1 $((${#PARTS[@]} - 1))); do
MODEL="${PARTS[$i]}"
export GOOSE_PROVIDER="$PROVIDER"
export GOOSE_MODEL="$MODEL"
TESTDIR=$(mktemp -d)
echo "Provider: ${PROVIDER}"
echo "Model: ${MODEL}"
echo ""
TMPFILE=$(mktemp)
(cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "Use the sampleLLM tool to ask for a quote from The Great Gatsby" --with-extension "npx -y @modelcontextprotocol/server-everything@2026.1.14" 2>&1) | tee "$TMPFILE"
echo ""
if grep -q "$MCP_SAMPLING_TOOL | " "$TMPFILE"; then

JUDGE_PROMPT=$(cat <<EOF
if grep -q "$MCP_SAMPLING_TOOL | " "$TMPFILE"; then
JUDGE_PROMPT=$(cat <<EOF
You are a validator. You will be given a transcript of a CLI run that used an MCP tool to initiate MCP sampling.
The MCP server requests a quote from The Great Gatsby from the model via sampling.

Expand All @@ -80,40 +100,35 @@ $(cat "$TMPFILE")
----- END TRANSCRIPT -----
EOF
)
JUDGE_OUT=$(GOOSE_PROVIDER="$JUDGE_PROVIDER" GOOSE_MODEL="$JUDGE_MODEL" \
"$SCRIPT_DIR/target/release/goose" run --text "$JUDGE_PROMPT" 2>&1)

if echo "$JUDGE_OUT" | tr -d '\r' | grep -Eq '^[[:space:]]*PASS[[:space:]]*$'; then
echo "✓ SUCCESS: MCP sampling test passed - confirmed Gatsby related response"
RESULTS+=("✓ MCP Sampling ${PROVIDER}: ${MODEL}")
else
echo "✗ FAILED: MCP sampling test failed - did not confirm Gatsby related response"
echo " Judge provider/model: ${JUDGE_PROVIDER}:${JUDGE_MODEL}"
echo " Judge output (snippet):"
echo "$JUDGE_OUT" | tail -n 20
RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}")
fi
JUDGE_OUT=$(GOOSE_PROVIDER="$TEST_PROVIDER" GOOSE_MODEL="$TEST_MODEL" \
"$GOOSE_BIN" run --text "$JUDGE_PROMPT" 2>&1)

if echo "$JUDGE_OUT" | tr -d '\r' | grep -Eq '^[[:space:]]*PASS[[:space:]]*$'; then
echo "✓ MCP sampling test passed"
RESULTS+=("✓ MCP sampling")
else
echo "✗ FAILED: MCP sampling test failed - $MCP_SAMPLING_TOOL tool not called"
RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}")
echo "✗ MCP sampling test failed"
RESULTS+=("✗ MCP sampling")
fi
rm "$TMPFILE"
rm -rf "$TESTDIR"
echo "---"
done
done
else
echo "✗ MCP sampling test failed - $MCP_SAMPLING_TOOL tool not called"
RESULTS+=("✗ MCP sampling")
fi

rm "$TMPFILE"
rm -rf "$TESTDIR"
echo ""
echo "=== MCP Sampling Test Summary ==="

echo "=== Test Summary ==="
for result in "${RESULTS[@]}"; do
echo "$result"
done

if echo "${RESULTS[@]}" | grep -q "✗"; then
echo ""
echo "Some MCP sampling tests failed!"
echo "Some tests failed!"
exit 1
else
echo ""
echo "All MCP sampling tests passed!"
echo "All tests passed!"
fi
6 changes: 1 addition & 5 deletions ui/desktop/src/hooks/useChatStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,7 @@ async function streamFromResponse(
}
};

const maybeUpdateUI = (
tokenState: TokenState,
chatState: ChatState,
forceImmediate = false
) => {
const maybeUpdateUI = (tokenState: TokenState, chatState: ChatState, forceImmediate = false) => {
if (!reduceMotion) {
dispatch({ type: 'SET_TOKEN_STATE', payload: tokenState });
dispatch({ type: 'SET_MESSAGES', payload: currentMessages });
Expand Down
Loading