block · alexhancock · Nov 3, 2025 · Oct 29, 2025
diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml
@@ -125,6 +125,21 @@ jobs:
           # Run the provider test script (binary already built and downloaded)
           bash scripts/test_providers.sh
 
+      - name: Run MCP Tests
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
+          DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
+          OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+          TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }}
+          HOME: /tmp/goose-home
+          GOOSE_DISABLE_KEYRING: 1
+          SKIP_BUILD: 1
+        run: |
+          bash scripts/test_mcp.sh
+
       - name: Run Subrecipe Tests
         env:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}

diff --git a/crates/goose/tests/mcp_integration_test.rs b/crates/goose/tests/mcp_integration_test.rs
@@ -1,18 +1,25 @@
 use serde::Deserialize;
+
 use std::collections::HashMap;
 use std::fs::File;
 use std::path::PathBuf;
+use std::sync::Arc;
 use std::{env, fs};
 
-use rmcp::model::{CallToolRequestParam, Content};
+use rmcp::model::{CallToolRequestParam, Content, Tool};
 use rmcp::object;
 use tokio_util::sync::CancellationToken;
 
 use goose::agents::extension::{Envs, ExtensionConfig};
 use goose::agents::extension_manager::ExtensionManager;
+use goose::model::ModelConfig;
 
 use test_case::test_case;
 
+use async_trait::async_trait;
+use goose::conversation::message::Message;
+use goose::providers::base::{Provider, ProviderMetadata, ProviderUsage, Usage};
+use goose::providers::errors::ProviderError;
 use once_cell::sync::Lazy;
 use std::process::Command;
 
@@ -29,6 +36,45 @@ struct Target {
     kind: Vec<String>,
 }
 
+#[derive(Clone)]
+pub struct MockProvider {
+    pub model_config: ModelConfig,
+}
+
+impl MockProvider {
+    pub fn new(model_config: ModelConfig) -> Self {
+        Self { model_config }
+    }
+}
+
+#[async_trait]
+impl Provider for MockProvider {
+    fn metadata() -> ProviderMetadata {
+        ProviderMetadata::empty()
+    }
+
+    fn get_name(&self) -> &str {
+        "mock"
+    }
+
+    async fn complete_with_model(
+        &self,
+        _model_config: &ModelConfig,
+        _system: &str,
+        _messages: &[Message],
+        _tools: &[Tool],
+    ) -> anyhow::Result<(Message, ProviderUsage), ProviderError> {
+        Ok((
+            Message::assistant().with_text("\"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"),
+            ProviderUsage::new("mock".to_string(), Usage::default()),
+        ))
+    }
+
+    fn get_model_config(&self) -> ModelConfig {
+        self.model_config.clone()
+    }
+}
+
 fn build_and_get_binary_path() -> PathBuf {
     let output = Command::new("cargo")
         .args([
@@ -79,6 +125,7 @@ enum TestMode {
         CallToolRequestParam { name: "add".into(), arguments: Some(object!({"a": 1, "b": 2 })) },
         CallToolRequestParam { name: "longRunningOperation".into(), arguments: Some(object!({"duration": 1, "steps": 5 })) },
         CallToolRequestParam { name: "structuredContent".into(), arguments: Some(object!({"location": "11238"})) },
+        CallToolRequestParam { name: "sampleLLM".into(), arguments: Some(object!({"prompt": "Please provide a quote from The Great Gatsby", "maxTokens": 100 })) }
     ],
     vec![]
 )]
@@ -205,7 +252,11 @@ async fn test_replayed_session(
         bundled: Some(false),
         available_tools: vec![],
     };
-    let extension_manager = ExtensionManager::new_without_provider();
+
+    let provider = Arc::new(tokio::sync::Mutex::new(Some(Arc::new(MockProvider {
+        model_config: ModelConfig::new("test-model").unwrap(),
+    }) as Arc<dyn Provider>)));
+    let extension_manager = ExtensionManager::new(provider);
 
     #[allow(clippy::redundant_closure_call)]
     let result = (async || -> Result<(), Box<dyn std::error::Error>> {

diff --git a/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper b/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper
diff --git a/...ose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json b/...ose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json
@@ -90,7 +90,7 @@
   [
     {
       "type": "text",
-      "text": "Available windows:\nMenubar",
+      "text": "Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests",
       "annotations": {
         "audience": [
           "assistant"
@@ -99,7 +99,7 @@
     },
     {
       "type": "text",
-      "text": "Available windows:\nMenubar",
+      "text": "Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests",
       "annotations": {
         "audience": [
           "user"

diff --git a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything
@@ -1,29 +1,10 @@
 STDIN: {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{"sampling":{}},"clientInfo":{"name":"goose","version":"0.0.0"}}}
-STDERR: 2025-09-26 23:13:04 - Starting npx setup script.
-STDERR: 2025-09-26 23:13:04 - Creating directory ~/.config/goose/mcp-hermit/bin if it does not exist.
-STDERR: 2025-09-26 23:13:04 - Changing to directory ~/.config/goose/mcp-hermit.
-STDERR: 2025-09-26 23:13:04 - Hermit binary already exists. Skipping download.
-STDERR: 2025-09-26 23:13:04 - setting hermit cache to be local for MCP servers
-STDERR: 2025-09-26 23:13:04 - Updated PATH to include ~/.config/goose/mcp-hermit/bin.
-STDERR: 2025-09-26 23:13:04 - Checking for hermit in PATH.
-STDERR: 2025-09-26 23:13:04 - Initializing hermit.
-STDERR: 2025-09-26 23:13:04 - Installing Node.js with hermit.
-STDERR: 2025-09-26 23:13:04 - Verifying installation locations:
-STDERR: 2025-09-26 23:13:04 - hermit: /Users/angiej/.config/goose/mcp-hermit/bin/hermit
-STDERR: 2025-09-26 23:13:04 - node: /Users/angiej/.config/goose/mcp-hermit/bin/node
-STDERR: 2025-09-26 23:13:04 - npx: /Users/angiej/.config/goose/mcp-hermit/bin/npx
-STDERR: 2025-09-26 23:13:04 - Checking for GOOSE_NPM_REGISTRY and GOOSE_NPM_CERT environment variables for custom npm registry setup...
-STDERR: 2025-09-26 23:13:05 - Checking custom goose registry availability: https://global.block-artifacts.com/artifactory/api/npm/square-npm/
-STDERR: 2025-09-26 23:13:05 - https://global.block-artifacts.com/artifactory/api/npm/square-npm/ is accessible. Using it for npm registry.
-STDERR: 2025-09-26 23:13:06 - Downloading certificate from: https://block-ca.squareup.com/root-certs.pem
-STDERR: 2025-09-26 23:13:07 - Certificate downloaded successfully.
-STDERR: 2025-09-26 23:13:07 - Executing 'npx' command with arguments: -y @modelcontextprotocol/server-everything
 STDERR: Starting default (STDIO) server...
 STDERR: Starting logs update interval
 STDOUT: {"result":{"protocolVersion":"2025-03-26","capabilities":{"prompts":{},"resources":{"subscribe":true},"tools":{},"logging":{},"completions":{}},"serverInfo":{"name":"example-servers/everything","title":"Everything Example Server","version":"1.0.0"},"instructions":"Testing and demonstration server for MCP protocol features.\n\n## Resources\n\nResources 1-100 follow pattern: even IDs contain text, odd IDs contain binary data. Resources are paginated at 10 items per page with cursor-based navigation.\n\n## Key dependencies\n\nProgress notifications require `_meta.progressToken` in tool calls. Resource subscriptions generate updates every 10 seconds.\n\n## Performance characteristics\n\nServer generates automatic log messages every 20 seconds (filtered by current log level), stderr notifications every 30 seconds, and resource update notifications every 10 seconds for active subscriptions. Log level changes affect message filtering in real-time.\n\n## Multi-modal testing\n\n`complex_prompt` includes both text arguments and image content for testing client multi-modal handling. `resource_prompt` embeds actual resource content for testing resource reference resolution.\n\nArgument completion is available for prompt parameters and resource IDs. Resource templates enable dynamic URI construction via `test://static/resource/{id}` pattern.\n\n## Easter egg\n\nIf asked about server instructions, respond with \"🎉 Server instructions are working! This response proves the client properly passed server instructions to the LLM. This demonstrates MCP's instructions feature in action.\"\n"},"jsonrpc":"2.0","id":0}
 STDIN: {"jsonrpc":"2.0","method":"notifications/initialized"}
-STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"echo","arguments":{"message":"Hello, world!"}}}
 STDOUT: {"method":"notifications/message","params":{"level":"info","logger":"everything-server","data":"Client does not support MCP roots protocol"},"jsonrpc":"2.0"}
+STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"echo","arguments":{"message":"Hello, world!"}}}
 STDOUT: {"result":{"content":[{"type":"text","text":"Echo: Hello, world!"}]},"jsonrpc":"2.0","id":1}
 STDIN: {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"_meta":{"progressToken":1},"name":"add","arguments":{"a":1,"b":2}}}
 STDOUT: {"result":{"content":[{"type":"text","text":"The sum of 1 and 2 is 3."}]},"jsonrpc":"2.0","id":2}
@@ -36,5 +17,9 @@ STDOUT: {"method":"notifications/progress","params":{"progress":5,"total":5,"pro
 STDOUT: {"result":{"content":[{"type":"text","text":"Long running operation completed. Duration: 1 seconds, Steps: 5."}]},"jsonrpc":"2.0","id":3}
 STDIN: {"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"_meta":{"progressToken":3},"name":"structuredContent","arguments":{"location":"11238"}}}
 STDOUT: {"result":{"content":[{"type":"text","text":"{\"temperature\":22.5,\"conditions\":\"Partly cloudy\",\"humidity\":65}"}],"structuredContent":{"temperature":22.5,"conditions":"Partly cloudy","humidity":65}},"jsonrpc":"2.0","id":4}
-STDOUT: {"method":"notifications/message","params":{"level":"emergency","data":"Emergency-level message"},"jsonrpc":"2.0"}
-STDERR: node:events:497
+STDIN: {"jsonrpc":"2.0","id":5,"method":"tools/call","params":{"_meta":{"progressToken":4},"name":"sampleLLM","arguments":{"maxTokens":100,"prompt":"Please provide a quote from The Great Gatsby"}}}
+STDOUT: {"method":"sampling/createMessage","params":{"messages":[{"role":"user","content":{"type":"text","text":"Resource sampleLLM context: Please provide a quote from The Great Gatsby"}}],"systemPrompt":"You are a helpful test server.","maxTokens":100,"temperature":0.7,"includeContext":"thisServer"},"jsonrpc":"2.0","id":0}
+STDIN: {"jsonrpc":"2.0","id":0,"result":{"model":"mock","stopReason":"endTurn","role":"assistant","content":{"type":"text","text":"\"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"}}}
+STDOUT: {"result":{"content":[{"type":"text","text":"LLM sampling result: \"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"}]},"jsonrpc":"2.0","id":5}
+STDOUT: {"method":"notifications/message","params":{"level":"error","data":"Error-level message"},"jsonrpc":"2.0"}
+STDERR: node:events:486
diff --git a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json
@@ -22,5 +22,11 @@
       "type": "text",
       "text": "{\"temperature\":22.5,\"conditions\":\"Partly cloudy\",\"humidity\":65}"
     }
+  ],
+  [
+    {
+      "type": "text",
+      "text": "LLM sampling result: \"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"
+    }
   ]
 ]
diff --git a/crates/goose/tests/mcp_replays/uvxmcp-server-fetch b/crates/goose/tests/mcp_replays/uvxmcp-server-fetch
@@ -1,29 +1,5 @@
 STDIN: {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{"sampling":{}},"clientInfo":{"name":"goose","version":"0.0.0"}}}
-STDERR: 2025-09-26 23:13:04 - Starting uvx setup script.
-STDERR: 2025-09-26 23:13:04 - Creating directory ~/.config/goose/mcp-hermit/bin if it does not exist.
-STDERR: 2025-09-26 23:13:04 - Changing to directory ~/.config/goose/mcp-hermit.
-STDERR: 2025-09-26 23:13:04 - Hermit binary already exists. Skipping download.
-STDERR: 2025-09-26 23:13:04 - setting hermit cache to be local for MCP servers
-STDERR: 2025-09-26 23:13:04 - Updated PATH to include ~/.config/goose/mcp-hermit/bin.
-STDERR: 2025-09-26 23:13:04 - Checking for hermit in PATH.
-STDERR: 2025-09-26 23:13:04 - Initializing hermit.
-STDERR: 2025-09-26 23:13:04 - hermit install python 3.10
-STDERR: 2025-09-26 23:13:04 - Installing UV with hermit.
-STDERR: 2025-09-26 23:13:04 - Verifying installation locations:
-STDERR: 2025-09-26 23:13:04 - hermit: /Users/angiej/.config/goose/mcp-hermit/bin/hermit
-STDERR: 2025-09-26 23:13:04 - uv: /Users/angiej/.config/goose/mcp-hermit/bin/uv
-STDERR: 2025-09-26 23:13:04 - uvx: /Users/angiej/.config/goose/mcp-hermit/bin/uvx
-STDERR: 2025-09-26 23:13:04 - Checking for GOOSE_UV_REGISTRY environment variable for custom python/pip/UV registry setup...
-STDERR: 2025-09-26 23:13:05 - Checking custom goose registry availability: https://global.block-artifacts.com/artifactory/api/pypi/block-pypi/simple
-STDERR: 2025-09-26 23:13:05 - https://global.block-artifacts.com/artifactory/api/pypi/block-pypi/simple is accessible, setting it as UV_DEFAULT_INDEX. Setting UV_NATIVE_TLS to true.
-STDERR: 2025-09-26 23:13:05 - Executing 'uvx' command with arguments: mcp-server-fetch
-STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"experimental":{},"prompts":{"listChanged":false},"tools":{"listChanged":false}},"serverInfo":{"name":"mcp-fetch","version":"1.15.0"}}}
+STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"experimental":{},"prompts":{"listChanged":false},"tools":{"listChanged":false}},"serverInfo":{"name":"mcp-fetch","version":"1.19.0"}}}
 STDIN: {"jsonrpc":"2.0","method":"notifications/initialized"}
 STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"fetch","arguments":{"url":"https://example.com"}}}
-STDERR: npm error code FETCH_ERROR
-STDERR: npm error errno FETCH_ERROR
-STDERR: npm error invalid json response body at https://blocked.teams.cloudflare.com/?account_id=1e25787f854fa4b713d08a859d3e16ed&background_color=%23000000&block_reason=This+has+been+blocked+as+part+of+the+Dependency+Confusion+threat.+Please+see+go%2Fdependencyconfusionpypi+and+go%2Fdependencyconfusionnpm+for+more+info.&device_id=***&footer_text=The+website+you+are+trying+to+access+has+been+blocked+because+it+presents+a+risk+to+the+safety+and+security+of+Block%E2%80%99s+IT+systems.&header_text=This+page+presents+a+risk+to+Block&location=cf1ebd1203624140846ced63a200519e&logo_path=https%3A%2F%2Fmedia.block.xyz%2Flogos%2Fblock-jewel_white.png&mailto_address=&mailto_subject=&name=Block%2C+Inc.&params_sign=yrMcT5HYDMHvixy%2BdLHApce3BcNYIdlI8qh3wTcIrLA%3D&query_id=***&rule_id=***&source_ip=2a09%3Abac0%3A1000%3A2df%3A%3A281%3Ac0&suppress_footer=false&url=registry.npmjs.org&user_id=*** reason: Unexpected token '<', "
-STDERR: npm error <!DOCTYPE "... is not valid JSON
-STDERR: npm error A complete log of this run can be found in: /Users/angiej/.config/goose/mcp-hermit/.hermit/node/cache/_logs/2025-09-27T04_13_13_364Z-debug-0.log
-STDOUT: {"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"Command '['npm', 'install']' returned non-zero exit status 1."}],"isError":true}}
-STDERR: 2025-09-26 23:13:14 - uvx setup script completed successfully.
+STDOUT: {"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"text","text":"Failed to fetch robots.txt https://example.com/robots.txt due to a connection issue"}],"isError":true}}
diff --git a/crates/goose/tests/mcp_replays/uvxmcp-server-fetch.results.json b/crates/goose/tests/mcp_replays/uvxmcp-server-fetch.results.json
@@ -2,7 +2,7 @@
   [
     {
       "type": "text",
-      "text": "Command '['npm', 'install']' returned non-zero exit status 1."
+      "text": "Failed to fetch robots.txt https://example.com/robots.txt due to a connection issue"
     }
   ]
 ]
diff --git a/scripts/test_mcp.sh b/scripts/test_mcp.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+if [ -f .env ]; then
+  export $(grep -v '^#' .env | xargs)
+fi
+
+if [ -z "$SKIP_BUILD" ]; then
+  echo "Building goose..."
+  cargo build --release --bin goose
+  echo ""
+else
+  echo "Skipping build (SKIP_BUILD is set)..."
+  echo ""
+fi
+
+SCRIPT_DIR=$(pwd)
+
+JUDGE_PROVIDER=${GOOSE_JUDGE_PROVIDER:-openrouter}
+JUDGE_MODEL=${GOOSE_JUDGE_MODEL:-google/gemini-2.5-flash}
+
+PROVIDERS=(
+  "anthropic:claude-haiku-4-5-20251001"
+  "google:gemini-2.5-flash"
+  "openrouter:qwen/qwen3-coder"
+  "openai:gpt-5-mini"
+)
+
+# In CI, only run Databricks tests if DATABRICKS_HOST and DATABRICKS_TOKEN are set
+# Locally, always run Databricks tests
+if [ -n "$CI" ]; then
+  if [ -n "$DATABRICKS_HOST" ] && [ -n "$DATABRICKS_TOKEN" ]; then
+    echo "✓ Including Databricks tests"
+    PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
+  else
+    echo "⚠️  Skipping Databricks tests (DATABRICKS_HOST and DATABRICKS_TOKEN required in CI)"
+  fi
+else
+  echo "✓ Including Databricks tests"
+  PROVIDERS+=("databricks:databricks-claude-sonnet-4:gemini-2-5-flash:gpt-4o")
+fi
+
+RESULTS=()
+
+for provider_config in "${PROVIDERS[@]}"; do
+  IFS=':' read -ra PARTS <<< "$provider_config"
+  PROVIDER="${PARTS[0]}"
+  for i in $(seq 1 $((${#PARTS[@]} - 1))); do
+    MODEL="${PARTS[$i]}"
+    export GOOSE_PROVIDER="$PROVIDER"
+    export GOOSE_MODEL="$MODEL"
+    TESTDIR=$(mktemp -d)
+    echo "Provider: ${PROVIDER}"
+    echo "Model: ${MODEL}"
+    echo ""
+    TMPFILE=$(mktemp)
+    (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --text "Use the sampleLLM tool to ask for a quote from The Great Gatsby" --with-extension "npx -y @modelcontextprotocol/server-everything" 2>&1) | tee "$TMPFILE"
+    echo ""
+    if grep -q "sampleLLM | " "$TMPFILE"; then
+
+      JUDGE_PROMPT=$(cat <<EOF
+You are a validator. You will be given a transcript of a CLI run that used an MCP tool to initiate MCP sampling.
+The MCP server requests a quote from The Great Gatsby from the model via sampling.
+
+Task: Determine whether the transcript shows that the sampling request reached the model and that the output included either:
+  • A recognizable quote, paraphrase, or reference from The Great Gatsby, or
+  • A clear attempt or explanation from the model about why the quote could not be returned.
+
+If either of these conditions is true, respond PASS.
+If there is no evidence that the model attempted or returned a Gatsby-related response, respond FAIL.
+If uncertain, lean toward PASS.
+
+Output format: Respond with exactly one word on a single line:
+PASS
+or
+FAIL
+
+Transcript:
+----- BEGIN TRANSCRIPT -----
+$(cat "$TMPFILE")
+----- END TRANSCRIPT -----
+EOF
+)
+      JUDGE_OUT=$(GOOSE_PROVIDER="$JUDGE_PROVIDER" GOOSE_MODEL="$JUDGE_MODEL" \
+        "$SCRIPT_DIR/target/release/goose" run --text "$JUDGE_PROMPT" 2>&1)
+
+      if echo "$JUDGE_OUT" | tr -d '\r' | grep -Eq '^[[:space:]]*PASS[[:space:]]*$'; then
+        echo "✓ SUCCESS: MCP sampling test passed - confirmed Gatsby related response"
+        RESULTS+=("✓ MCP Sampling ${PROVIDER}: ${MODEL}")
+      else
+        echo "✗ FAILED: MCP sampling test failed - did not confirm Gatsby related response"
+        echo "  Judge provider/model: ${JUDGE_PROVIDER}:${JUDGE_MODEL}"
+        echo "  Judge output (snippet):"
+        echo "$JUDGE_OUT" | tail -n 20
+        RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}")
+      fi
+    else
+      echo "✗ FAILED: MCP sampling test failed - sampleLLM tool not called"
+      RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}")
+    fi
+    rm "$TMPFILE"
+    rm -rf "$TESTDIR"
+    echo "---"
+  done
+done
+
+echo ""
+echo "=== MCP Sampling Test Summary ==="
+for result in "${RESULTS[@]}"; do
+  echo "$result"
+done
+
+if echo "${RESULTS[@]}" | grep -q "✗"; then
+  echo ""
+  echo "Some MCP sampling tests failed!"
+  exit 1
+else
+  echo ""
+  echo "All MCP sampling tests passed!"
+fi