diff --git a/.github/workflows/pr-smoke-test.yml b/.github/workflows/pr-smoke-test.yml index 8fee5945ed91..07ae49654feb 100644 --- a/.github/workflows/pr-smoke-test.yml +++ b/.github/workflows/pr-smoke-test.yml @@ -125,6 +125,21 @@ jobs: # Run the provider test script (binary already built and downloaded) bash scripts/test_providers.sh + - name: Run MCP Tests + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }} + DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + TETRATE_API_KEY: ${{ secrets.TETRATE_API_KEY }} + HOME: /tmp/goose-home + GOOSE_DISABLE_KEYRING: 1 + SKIP_BUILD: 1 + run: | + bash scripts/test_mcp.sh + - name: Run Subrecipe Tests env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/crates/goose/tests/mcp_integration_test.rs b/crates/goose/tests/mcp_integration_test.rs index 6704119e638b..df1b70d36fa5 100644 --- a/crates/goose/tests/mcp_integration_test.rs +++ b/crates/goose/tests/mcp_integration_test.rs @@ -1,18 +1,25 @@ use serde::Deserialize; + use std::collections::HashMap; use std::fs::File; use std::path::PathBuf; +use std::sync::Arc; use std::{env, fs}; -use rmcp::model::{CallToolRequestParam, Content}; +use rmcp::model::{CallToolRequestParam, Content, Tool}; use rmcp::object; use tokio_util::sync::CancellationToken; use goose::agents::extension::{Envs, ExtensionConfig}; use goose::agents::extension_manager::ExtensionManager; +use goose::model::ModelConfig; use test_case::test_case; +use async_trait::async_trait; +use goose::conversation::message::Message; +use goose::providers::base::{Provider, ProviderMetadata, ProviderUsage, Usage}; +use goose::providers::errors::ProviderError; use once_cell::sync::Lazy; use std::process::Command; @@ -29,6 +36,45 @@ struct Target { kind: Vec, } +#[derive(Clone)] +pub struct MockProvider { + pub model_config: ModelConfig, +} + +impl MockProvider { + pub fn new(model_config: ModelConfig) -> Self { + Self { model_config } + } +} + +#[async_trait] +impl Provider for MockProvider { + fn metadata() -> ProviderMetadata { + ProviderMetadata::empty() + } + + fn get_name(&self) -> &str { + "mock" + } + + async fn complete_with_model( + &self, + _model_config: &ModelConfig, + _system: &str, + _messages: &[Message], + _tools: &[Tool], + ) -> anyhow::Result<(Message, ProviderUsage), ProviderError> { + Ok(( + Message::assistant().with_text("\"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"), + ProviderUsage::new("mock".to_string(), Usage::default()), + )) + } + + fn get_model_config(&self) -> ModelConfig { + self.model_config.clone() + } +} + fn build_and_get_binary_path() -> PathBuf { let output = Command::new("cargo") .args([ @@ -79,6 +125,7 @@ enum TestMode { CallToolRequestParam { name: "add".into(), arguments: Some(object!({"a": 1, "b": 2 })) }, CallToolRequestParam { name: "longRunningOperation".into(), arguments: Some(object!({"duration": 1, "steps": 5 })) }, CallToolRequestParam { name: "structuredContent".into(), arguments: Some(object!({"location": "11238"})) }, + CallToolRequestParam { name: "sampleLLM".into(), arguments: Some(object!({"prompt": "Please provide a quote from The Great Gatsby", "maxTokens": 100 })) } ], vec![] )] @@ -205,7 +252,11 @@ async fn test_replayed_session( bundled: Some(false), available_tools: vec![], }; - let extension_manager = ExtensionManager::new_without_provider(); + + let provider = Arc::new(tokio::sync::Mutex::new(Some(Arc::new(MockProvider { + model_config: ModelConfig::new("test-model").unwrap(), + }) as Arc))); + let extension_manager = ExtensionManager::new(provider); #[allow(clippy::redundant_closure_call)] let result = (async || -> Result<(), Box> { diff --git a/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper b/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper index c5b490b065f4..38cf176226bc 100644 --- a/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper +++ b/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper @@ -1,20 +1,20 @@ STDIN: {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{"sampling":{}},"clientInfo":{"name":"goose","version":"0.0.0"}}} -STDERR: 2025-09-27T04:13:30.409389Z  INFO goose_mcp::mcp_server_runner: Starting MCP server +STDERR: 2025-10-30T14:41:09.117156Z  INFO goose_mcp::mcp_server_runner: Starting MCP server STDERR: at crates/goose-mcp/src/mcp_server_runner.rs:18 -STDERR: -STDERR: 2025-09-27T04:13:30.412663Z  INFO goose_mcp::developer::analyze::cache: Initializing analysis cache with size 100 -STDERR: at crates/goose-mcp/src/developer/analyze/cache.rs:25 -STDERR: -STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"prompts":{},"tools":{}},"serverInfo":{"name":"goose-developer","version":"1.9.0"},"instructions":" The developer extension gives you the capabilities to edit code files and run shell commands,\n and can be used to solve a wide range of problems.\n\nYou can use the shell tool to run any command that would work on the relevant operating system.\nUse the shell tool as needed to locate files or interact with the project.\n\nLeverage `analyze` through `return_last_only=true` subagents for deep codebase understanding with lean context\n- delegate analysis, retain summaries\n\nYour windows/screen tools can be used for visual debugging. You should not use these tools unless\nprompted to, but you can mention they are available if they are relevant.\n\nAlways prefer ripgrep (rg -C 3) to grep.\n\noperating system: macos\ncurrent directory: /Users/angiej/workspace/goose/crates/goose\n\n \n\nAdditional Text Editor Tool Instructions:\n\nPerform text editing operations on files.\n\nThe `command` parameter specifies the operation to perform. Allowed options are:\n- `view`: View the content of a file.\n- `write`: Create or overwrite a file with the given content\n- `str_replace`: Replace text in one or more files.\n- `insert`: Insert text at a specific line location in the file.\n- `undo_edit`: Undo the last edit made to a file.\n\nTo use the write command, you must specify `file_text` which will become the new content of the file. Be careful with\nexisting files! This is a full overwrite, so you must include everything - not just sections you are modifying.\n\nTo use the str_replace command to edit multiple files, use the `diff` parameter with a unified diff.\nTo use the str_replace command to edit one file, you must specify both `old_str` and `new_str` - the `old_str` needs to exactly match one\nunique section of the original file, including any whitespace. Make sure to include enough context that the match is not\nambiguous. The entire original string will be replaced with `new_str`\n\nWhen possible, batch file edits together by using a multi-file unified `diff` within a single str_replace tool call.\n\nTo use the insert command, you must specify both `insert_line` (the line number after which to insert, 0 for beginning, -1 for end)\nand `new_str` (the text to insert).\n\n\n\nAdditional Shell Tool Instructions:\nExecute a command in the shell.\n\nThis will return the output and error concatenated into a single string, as\nyou would see from running on the command line. There will also be an indication\nof if the command succeeded or failed.\n\nAvoid commands that produce a large amount of output, and consider piping those outputs to files.\n\n**Important**: Each shell command runs in its own process. Things like directory changes or\nsourcing files do not persist between tool calls. So you may need to repeat them each time by\nstringing together commands.\nIf you need to run a long lived command, background it - e.g. `uvicorn main:app &` so that\nthis tool does not run indefinitely.\n\n**Important**: Use ripgrep - `rg` - exclusively when you need to locate a file or a code reference,\nother solutions may produce too large output because of hidden files! For example *do not* use `find` or `ls -r`\n - List files by name: `rg --files | rg `\n - List files that contain a regex: `rg '' -l`\n\n - Multiple commands: Use && to chain commands, avoid newlines\n - Example: `cd example && ls` or `source env/bin/activate && pip install numpy`\n\n\n### Global Hints\nThe developer extension includes some global hints that apply to all projects & directories.\nCloned Goose repo: /Users/angiej/workspace/goose\nMCP means Model Context Protocol. Docs: https://modelcontextprotocol.io/introduction\nUse GitHub CLI for GitHub-related tasks.\nWhen prompted for date-related information, do not rely on your internal knowledge for the current date. Instead, use the `date` terminal command to get the actual date and time.\nNEVER run blocking server commands (node server.js, npm start, etc.) - provide commands for user to run separately\n\n### Project Hints\nThe developer extension includes some hints for working on the project in this directory.\n# AGENTS Instructions\n\ngoose is an AI agent framework in Rust with CLI and Electron desktop interfaces.\n\n## Setup\n```bash\nsource bin/activate-hermit\ncargo build\n```\n\n## Commands\n\n### Build\n```bash\ncargo build # debug\ncargo build --release # release \njust release-binary # release + openapi\n```\n\n### Test\n```bash\ncargo test # all tests\ncargo test -p goose # specific crate\ncargo test --package goose --test mcp_integration_test\njust record-mcp-tests # record MCP\n```\n\n### Lint/Format\n```bash\ncargo fmt\n./scripts/clippy-lint.sh\ncargo clippy --fix\n```\n\n### UI\n```bash\njust generate-openapi # after server changes\njust run-ui # start desktop\ncd ui/desktop && npm test # test UI\n```\n\n## Structure\n```\ncrates/\n├── goose # core logic\n├── goose-bench # benchmarking\n├── goose-cli # CLI entry\n├── goose-server # backend (binary: goosed)\n├── goose-mcp # MCP extensions\n├── goose-test # test utilities\n├── mcp-client # MCP client\n├── mcp-core # MCP shared\n└── mcp-server # MCP server\n\ntemporal-service/ # Go scheduler\nui/desktop/ # Electron app\n```\n\n## Development Loop\n```bash\n# 1. source bin/activate-hermit\n# 2. Make changes\n# 3. cargo fmt\n# 4. cargo build\n# 5. cargo test -p \n# 6. ./scripts/clippy-lint.sh\n# 7. [if server] just generate-openapi\n```\n\n## Rules\n\nTest: Prefer tests/ folder, e.g. crates/goose/tests/\nError: Use anyhow::Result\nProvider: Implement Provider trait see providers/base.rs\nMCP: Extensions in crates/goose-mcp/\nServer: Changes need just generate-openapi\n\n## Never\n\nNever: Edit ui/desktop/openapi.json manually\nNever: Edit Cargo.toml use cargo add\nNever: Skip cargo fmt\nNever: Merge without ./scripts/clippy-lint.sh\n\n## Entry Points\n- CLI: crates/goose-cli/src/main.rs\n- Server: crates/goose-server/src/main.rs\n- UI: ui/desktop/src/main.ts\n- Agent: crates/goose/src/agents/agent.rs\n\nThis is a rust project with crates in the crates dir:\ngoose: the main code for goose, contains all the core logic\ngoose-bench: bench marking\ngoose-cli: the command line interface, use goose crate\ngoose-mcp: the mcp servers that ship with goose. the developer sub system is of special interest\ngoose-server: the server that suports the desktop (electron) app. also known as goosed\n\n\nui/desktop has an electron app in typescript. \n\nnon trivial features should be implemented in the goose crate and then be called from the goose-cli crate for the cli. for the desktop, you want to add routes to \ngoose-server/src/routes. you can then run `just generate-openapi` to generate the openapi spec which will modify the ui/desktop/src/api files. once you have\nthat you can call the functionality from the server from the typescript.\n\ntips: \n- can look at unstaged changes for what is being worked on if starting\n- always check rust compiles, cargo fmt etc and `./scripts/clippy-lint.sh` (as well as run tests in files you are working on)\n- in ui/desktop, look at how you can run lint checks and if other tests can run\n"}} +STDERR: +STDERR: 2025-10-30T14:41:09.120902Z  INFO goose_mcp::developer::analyze::cache: Initializing analysis cache with size 100 +STDERR: at crates/goose-mcp/src/developer/analyze/cache.rs:26 +STDERR: +STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"prompts":{},"tools":{}},"serverInfo":{"name":"goose-developer","version":"1.11.0"},"instructions":" The developer extension gives you the capabilities to edit code files and run shell commands,\n and can be used to solve a wide range of problems.\n\nYou can use the shell tool to run any command that would work on the relevant operating system.\nUse the shell tool as needed to locate files or interact with the project.\n\nLeverage `analyze` through `return_last_only=true` subagents for deep codebase understanding with lean context\n- delegate analysis, retain summaries\n\nYour windows/screen tools can be used for visual debugging. You should not use these tools unless\nprompted to, but you can mention they are available if they are relevant.\n\nAlways prefer ripgrep (rg -C 3) to grep.\n\noperating system: macos\ncurrent directory: /Users/alexhancock/Development/goose/crates/goose\nshell: zsh\n\n \n\nAdditional Text Editor Tool Instructions:\n\nPerform text editing operations on files.\n\nThe `command` parameter specifies the operation to perform. Allowed options are:\n- `view`: View the content of a file.\n- `write`: Create or overwrite a file with the given content\n- `str_replace`: Replace text in one or more files.\n- `insert`: Insert text at a specific line location in the file.\n- `undo_edit`: Undo the last edit made to a file.\n\nTo use the write command, you must specify `file_text` which will become the new content of the file. Be careful with\nexisting files! This is a full overwrite, so you must include everything - not just sections you are modifying.\n\nTo use the str_replace command to edit multiple files, use the `diff` parameter with a unified diff.\nTo use the str_replace command to edit one file, you must specify both `old_str` and `new_str` - the `old_str` needs to exactly match one\nunique section of the original file, including any whitespace. Make sure to include enough context that the match is not\nambiguous. The entire original string will be replaced with `new_str`\n\nWhen possible, batch file edits together by using a multi-file unified `diff` within a single str_replace tool call.\n\nTo use the insert command, you must specify both `insert_line` (the line number after which to insert, 0 for beginning, -1 for end)\nand `new_str` (the text to insert).\n\n\n\nAdditional Shell Tool Instructions:\nExecute a command in the shell.\n\nThis will return the output and error concatenated into a single string, as\nyou would see from running on the command line. There will also be an indication\nof if the command succeeded or failed.\n\nAvoid commands that produce a large amount of output, and consider piping those outputs to files.\n\n**Important**: Each shell command runs in its own process. Things like directory changes or\nsourcing files do not persist between tool calls. So you may need to repeat them each time by\nstringing together commands.\nIf you need to run a long lived command, background it - e.g. `uvicorn main:app &` so that\nthis tool does not run indefinitely.\n\n**Important**: Use ripgrep - `rg` - exclusively when you need to locate a file or a code reference,\nother solutions may produce too large output because of hidden files! For example *do not* use `find` or `ls -r`\n - List files by name: `rg --files | rg `\n - List files that contain a regex: `rg '' -l`\n\n - Multiple commands: Use && to chain commands, avoid newlines\n - Example: `cd example && ls` or `source env/bin/activate && pip install numpy`\n\n\n### Global Hints\nThe developer extension includes some global hints that apply to all projects & directories.\nThese are my global goose hints.\n\n### Project Hints\nThe developer extension includes some hints for working on the project in this directory.\n# AGENTS Instructions\n\ngoose is an AI agent framework in Rust with CLI and Electron desktop interfaces.\n\n## Setup\n```bash\nsource bin/activate-hermit\ncargo build\n```\n\n## Commands\n\n### Build\n```bash\ncargo build # debug\ncargo build --release # release \njust release-binary # release + openapi\n```\n\n### Test\n```bash\ncargo test # all tests\ncargo test -p goose # specific crate\ncargo test --package goose --test mcp_integration_test\njust record-mcp-tests # record MCP\n```\n\n### Lint/Format\n```bash\ncargo fmt\n./scripts/clippy-lint.sh\ncargo clippy --fix\n```\n\n### UI\n```bash\njust generate-openapi # after server changes\njust run-ui # start desktop\ncd ui/desktop && npm test # test UI\n```\n\n## Structure\n```\ncrates/\n├── goose # core logic\n├── goose-bench # benchmarking\n├── goose-cli # CLI entry\n├── goose-server # backend (binary: goosed)\n├── goose-mcp # MCP extensions\n├── goose-test # test utilities\n├── mcp-client # MCP client\n├── mcp-core # MCP shared\n└── mcp-server # MCP server\n\ntemporal-service/ # Go scheduler\nui/desktop/ # Electron app\n```\n\n## Development Loop\n```bash\n# 1. source bin/activate-hermit\n# 2. Make changes\n# 3. cargo fmt\n# 4. cargo build\n# 5. cargo test -p \n# 6. ./scripts/clippy-lint.sh\n# 7. [if server] just generate-openapi\n```\n\n## Rules\n\nTest: Prefer tests/ folder, e.g. crates/goose/tests/\nTest: When adding features, update goose-self-test.yaml, rebuild, then run `goose run --recipe goose-self-test.yaml` to validate\nError: Use anyhow::Result\nProvider: Implement Provider trait see providers/base.rs\nMCP: Extensions in crates/goose-mcp/\nServer: Changes need just generate-openapi\n\n## Never\n\nNever: Edit ui/desktop/openapi.json manually\nNever: Edit Cargo.toml use cargo add\nNever: Skip cargo fmt\nNever: Merge without ./scripts/clippy-lint.sh\nNever: Comment self-evident operations (`// Initialize`, `// Return result`), getters/setters, constructors, or standard Rust idioms\n\n## Entry Points\n- CLI: crates/goose-cli/src/main.rs\n- Server: crates/goose-server/src/main.rs\n- UI: ui/desktop/src/main.ts\n- Agent: crates/goose/src/agents/agent.rs\n\nThis is a rust project with crates in the crates dir:\ngoose: the main code for goose, contains all the core logic\ngoose-bench: bench marking\ngoose-cli: the command line interface, use goose crate\ngoose-mcp: the mcp servers that ship with goose. the developer sub system is of special interest\ngoose-server: the server that suports the desktop (electron) app. also known as goosed\n\n\nui/desktop has an electron app in typescript. \n\nnon trivial features should be implemented in the goose crate and then be called from the goose-cli crate for the cli. for the desktop, you want to add routes to \ngoose-server/src/routes. you can then run `just generate-openapi` to generate the openapi spec which will modify the ui/desktop/src/api files. once you have\nthat you can call the functionality from the server from the typescript.\n\ntips: \n- can look at unstaged changes for what is being worked on if starting\n- always check rust compiles, cargo fmt etc and `./scripts/clippy-lint.sh` (as well as run tests in files you are working on)\n- in ui/desktop, look at how you can run lint checks and if other tests can run\n"}} STDIN: {"jsonrpc":"2.0","method":"notifications/initialized"} -STDERR: 2025-09-27T04:13:30.418172Z  INFO rmcp::handler::server: client initialized -STDERR: at /Users/angiej/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/rmcp-0.6.2/src/handler/server.rs:218 -STDERR: +STDERR: 2025-10-30T14:41:09.126014Z  INFO rmcp::handler::server: client initialized +STDERR: at /Users/alexhancock/Development/goose/.hermit/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rmcp-0.8.1/src/handler/server.rs:218 +STDERR: STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"text_editor","arguments":{"command":"view","path":"/tmp/goose_test/goose.txt"}}} -STDERR: 2025-09-27T04:13:30.418412Z  INFO rmcp::service: Service initialized as server, peer_info: Some(InitializeRequestParam { protocol_version: ProtocolVersion("2025-03-26"), capabilities: ClientCapabilities { experimental: None, roots: None, sampling: None, elicitation: None }, client_info: Implementation { name: "goose", version: "1.9.0" } }) -STDERR: at /Users/angiej/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/rmcp-0.6.2/src/service.rs:561 +STDERR: 2025-10-30T14:41:09.126196Z  INFO rmcp::service: Service initialized as server, peer_info: Some(InitializeRequestParam { protocol_version: ProtocolVersion("2025-03-26"), capabilities: ClientCapabilities { experimental: None, roots: None, sampling: Some({}), elicitation: None }, client_info: Implementation { name: "goose", title: None, version: "0.0.0", icons: None, website_url: None } }) +STDERR: at /Users/alexhancock/Development/goose/.hermit/rust/registry/src/index.crates.io-1949cf8c6b5b557f/rmcp-0.8.1/src/service.rs:562 STDERR: in rmcp::service::serve_inner -STDERR: +STDERR: STDOUT: {"jsonrpc":"2.0","id":1,"result":{"content":[{"type":"resource","resource":{"uri":"file:///tmp/goose_test/goose.txt","mimeType":"text","text":"# goose\n"},"annotations":{"audience":["assistant"]}},{"type":"text","text":"### /tmp/goose_test/goose.txt\n```\n1: # goose\n```\n","annotations":{"audience":["user"],"priority":0.0}}],"isError":false}} STDIN: {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"_meta":{"progressToken":1},"name":"text_editor","arguments":{"command":"str_replace","new_str":"# goose (modified by test)","old_str":"# goose","path":"/tmp/goose_test/goose.txt"}}} STDOUT: {"jsonrpc":"2.0","id":2,"result":{"content":[{"type":"text","text":"The file /tmp/goose_test/goose.txt has been edited, and the section now reads:\n```\n# goose (modified by test)\n```\n\nReview the changes above for errors. Undo and edit the file again if necessary!\n","annotations":{"audience":["assistant"]}},{"type":"text","text":"```\n# goose (modified by test)\n```\n","annotations":{"audience":["user"],"priority":0.2}}],"isError":false}} @@ -24,5 +24,5 @@ STDOUT: {"jsonrpc":"2.0","id":3,"result":{"content":[{"type":"text","text":"# go STDIN: {"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"_meta":{"progressToken":3},"name":"text_editor","arguments":{"command":"str_replace","new_str":"# goose","old_str":"# goose (modified by test)","path":"/tmp/goose_test/goose.txt"}}} STDOUT: {"jsonrpc":"2.0","id":4,"result":{"content":[{"type":"text","text":"The file /tmp/goose_test/goose.txt has been edited, and the section now reads:\n```\n# goose\n```\n\nReview the changes above for errors. Undo and edit the file again if necessary!\n","annotations":{"audience":["assistant"]}},{"type":"text","text":"```\n# goose\n```\n","annotations":{"audience":["user"],"priority":0.2}}],"isError":false}} STDIN: {"jsonrpc":"2.0","id":5,"method":"tools/call","params":{"_meta":{"progressToken":4},"name":"list_windows","arguments":{}}} -STDOUT: {"jsonrpc":"2.0","id":5,"result":{"content":[{"type":"text","text":"Available windows:\nMenubar","annotations":{"audience":["assistant"]}},{"type":"text","text":"Available windows:\nMenubar","annotations":{"audience":["user"],"priority":0.0}}],"isError":false}} -STDERR: 2025-09-27T04:13:30.505916Z  INFO rmcp::service: input stream terminated +STDOUT: {"jsonrpc":"2.0","id":5,"result":{"content":[{"type":"text","text":"Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests","annotations":{"audience":["assistant"]}},{"type":"text","text":"Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests","annotations":{"audience":["user"],"priority":0.0}}],"isError":false}} +STDERR: 2025-10-30T14:41:09.200915Z  INFO rmcp::service: input stream terminated diff --git a/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json b/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json index 1f53d3c2629e..e3d4e6a35541 100644 --- a/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json +++ b/crates/goose/tests/mcp_replays/cargorun--quiet-pgoose-server--bingoosed--mcpdeveloper.results.json @@ -90,7 +90,7 @@ [ { "type": "text", - "text": "Available windows:\nMenubar", + "text": "Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests", "annotations": { "audience": [ "assistant" @@ -99,7 +99,7 @@ }, { "type": "text", - "text": "Available windows:\nMenubar", + "text": "Available windows:\n\nItem-0\nbb3cc23c-6950-4e96-8b40-850e09f46934\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nItem-0\nBattery\nWiFi\nItem-0\nBentoBox\nSiri\nClock\nMenubar\nDock\njust record-mcp-tests", "annotations": { "audience": [ "user" diff --git a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything index 8ac4f0843488..39cca7e5dfa7 100644 --- a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything +++ b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything @@ -1,29 +1,10 @@ STDIN: {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{"sampling":{}},"clientInfo":{"name":"goose","version":"0.0.0"}}} -STDERR: 2025-09-26 23:13:04 - Starting npx setup script. -STDERR: 2025-09-26 23:13:04 - Creating directory ~/.config/goose/mcp-hermit/bin if it does not exist. -STDERR: 2025-09-26 23:13:04 - Changing to directory ~/.config/goose/mcp-hermit. -STDERR: 2025-09-26 23:13:04 - Hermit binary already exists. Skipping download. -STDERR: 2025-09-26 23:13:04 - setting hermit cache to be local for MCP servers -STDERR: 2025-09-26 23:13:04 - Updated PATH to include ~/.config/goose/mcp-hermit/bin. -STDERR: 2025-09-26 23:13:04 - Checking for hermit in PATH. -STDERR: 2025-09-26 23:13:04 - Initializing hermit. -STDERR: 2025-09-26 23:13:04 - Installing Node.js with hermit. -STDERR: 2025-09-26 23:13:04 - Verifying installation locations: -STDERR: 2025-09-26 23:13:04 - hermit: /Users/angiej/.config/goose/mcp-hermit/bin/hermit -STDERR: 2025-09-26 23:13:04 - node: /Users/angiej/.config/goose/mcp-hermit/bin/node -STDERR: 2025-09-26 23:13:04 - npx: /Users/angiej/.config/goose/mcp-hermit/bin/npx -STDERR: 2025-09-26 23:13:04 - Checking for GOOSE_NPM_REGISTRY and GOOSE_NPM_CERT environment variables for custom npm registry setup... -STDERR: 2025-09-26 23:13:05 - Checking custom goose registry availability: https://global.block-artifacts.com/artifactory/api/npm/square-npm/ -STDERR: 2025-09-26 23:13:05 - https://global.block-artifacts.com/artifactory/api/npm/square-npm/ is accessible. Using it for npm registry. -STDERR: 2025-09-26 23:13:06 - Downloading certificate from: https://block-ca.squareup.com/root-certs.pem -STDERR: 2025-09-26 23:13:07 - Certificate downloaded successfully. -STDERR: 2025-09-26 23:13:07 - Executing 'npx' command with arguments: -y @modelcontextprotocol/server-everything STDERR: Starting default (STDIO) server... STDERR: Starting logs update interval STDOUT: {"result":{"protocolVersion":"2025-03-26","capabilities":{"prompts":{},"resources":{"subscribe":true},"tools":{},"logging":{},"completions":{}},"serverInfo":{"name":"example-servers/everything","title":"Everything Example Server","version":"1.0.0"},"instructions":"Testing and demonstration server for MCP protocol features.\n\n## Resources\n\nResources 1-100 follow pattern: even IDs contain text, odd IDs contain binary data. Resources are paginated at 10 items per page with cursor-based navigation.\n\n## Key dependencies\n\nProgress notifications require `_meta.progressToken` in tool calls. Resource subscriptions generate updates every 10 seconds.\n\n## Performance characteristics\n\nServer generates automatic log messages every 20 seconds (filtered by current log level), stderr notifications every 30 seconds, and resource update notifications every 10 seconds for active subscriptions. Log level changes affect message filtering in real-time.\n\n## Multi-modal testing\n\n`complex_prompt` includes both text arguments and image content for testing client multi-modal handling. `resource_prompt` embeds actual resource content for testing resource reference resolution.\n\nArgument completion is available for prompt parameters and resource IDs. Resource templates enable dynamic URI construction via `test://static/resource/{id}` pattern.\n\n## Easter egg\n\nIf asked about server instructions, respond with \"🎉 Server instructions are working! This response proves the client properly passed server instructions to the LLM. This demonstrates MCP's instructions feature in action.\"\n"},"jsonrpc":"2.0","id":0} STDIN: {"jsonrpc":"2.0","method":"notifications/initialized"} -STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"echo","arguments":{"message":"Hello, world!"}}} STDOUT: {"method":"notifications/message","params":{"level":"info","logger":"everything-server","data":"Client does not support MCP roots protocol"},"jsonrpc":"2.0"} +STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"echo","arguments":{"message":"Hello, world!"}}} STDOUT: {"result":{"content":[{"type":"text","text":"Echo: Hello, world!"}]},"jsonrpc":"2.0","id":1} STDIN: {"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"_meta":{"progressToken":1},"name":"add","arguments":{"a":1,"b":2}}} STDOUT: {"result":{"content":[{"type":"text","text":"The sum of 1 and 2 is 3."}]},"jsonrpc":"2.0","id":2} @@ -36,5 +17,9 @@ STDOUT: {"method":"notifications/progress","params":{"progress":5,"total":5,"pro STDOUT: {"result":{"content":[{"type":"text","text":"Long running operation completed. Duration: 1 seconds, Steps: 5."}]},"jsonrpc":"2.0","id":3} STDIN: {"jsonrpc":"2.0","id":4,"method":"tools/call","params":{"_meta":{"progressToken":3},"name":"structuredContent","arguments":{"location":"11238"}}} STDOUT: {"result":{"content":[{"type":"text","text":"{\"temperature\":22.5,\"conditions\":\"Partly cloudy\",\"humidity\":65}"}],"structuredContent":{"temperature":22.5,"conditions":"Partly cloudy","humidity":65}},"jsonrpc":"2.0","id":4} -STDOUT: {"method":"notifications/message","params":{"level":"emergency","data":"Emergency-level message"},"jsonrpc":"2.0"} -STDERR: node:events:497 +STDIN: {"jsonrpc":"2.0","id":5,"method":"tools/call","params":{"_meta":{"progressToken":4},"name":"sampleLLM","arguments":{"maxTokens":100,"prompt":"Please provide a quote from The Great Gatsby"}}} +STDOUT: {"method":"sampling/createMessage","params":{"messages":[{"role":"user","content":{"type":"text","text":"Resource sampleLLM context: Please provide a quote from The Great Gatsby"}}],"systemPrompt":"You are a helpful test server.","maxTokens":100,"temperature":0.7,"includeContext":"thisServer"},"jsonrpc":"2.0","id":0} +STDIN: {"jsonrpc":"2.0","id":0,"result":{"model":"mock","stopReason":"endTurn","role":"assistant","content":{"type":"text","text":"\"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"}}} +STDOUT: {"result":{"content":[{"type":"text","text":"LLM sampling result: \"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)"}]},"jsonrpc":"2.0","id":5} +STDOUT: {"method":"notifications/message","params":{"level":"error","data":"Error-level message"},"jsonrpc":"2.0"} +STDERR: node:events:486 diff --git a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json index 7d4a3b268c6a..2b7f47b8d8bf 100644 --- a/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json +++ b/crates/goose/tests/mcp_replays/npx-y@modelcontextprotocol_server-everything.results.json @@ -22,5 +22,11 @@ "type": "text", "text": "{\"temperature\":22.5,\"conditions\":\"Partly cloudy\",\"humidity\":65}" } + ], + [ + { + "type": "text", + "text": "LLM sampling result: \"So we beat on, boats against the current, borne back ceaselessly into the past.\" — F. Scott Fitzgerald, The Great Gatsby (1925)" + } ] ] \ No newline at end of file diff --git a/crates/goose/tests/mcp_replays/uvxmcp-server-fetch b/crates/goose/tests/mcp_replays/uvxmcp-server-fetch index a473f29fa8eb..d84548d8c0fb 100644 --- a/crates/goose/tests/mcp_replays/uvxmcp-server-fetch +++ b/crates/goose/tests/mcp_replays/uvxmcp-server-fetch @@ -1,29 +1,5 @@ STDIN: {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{"sampling":{}},"clientInfo":{"name":"goose","version":"0.0.0"}}} -STDERR: 2025-09-26 23:13:04 - Starting uvx setup script. -STDERR: 2025-09-26 23:13:04 - Creating directory ~/.config/goose/mcp-hermit/bin if it does not exist. -STDERR: 2025-09-26 23:13:04 - Changing to directory ~/.config/goose/mcp-hermit. -STDERR: 2025-09-26 23:13:04 - Hermit binary already exists. Skipping download. -STDERR: 2025-09-26 23:13:04 - setting hermit cache to be local for MCP servers -STDERR: 2025-09-26 23:13:04 - Updated PATH to include ~/.config/goose/mcp-hermit/bin. -STDERR: 2025-09-26 23:13:04 - Checking for hermit in PATH. -STDERR: 2025-09-26 23:13:04 - Initializing hermit. -STDERR: 2025-09-26 23:13:04 - hermit install python 3.10 -STDERR: 2025-09-26 23:13:04 - Installing UV with hermit. -STDERR: 2025-09-26 23:13:04 - Verifying installation locations: -STDERR: 2025-09-26 23:13:04 - hermit: /Users/angiej/.config/goose/mcp-hermit/bin/hermit -STDERR: 2025-09-26 23:13:04 - uv: /Users/angiej/.config/goose/mcp-hermit/bin/uv -STDERR: 2025-09-26 23:13:04 - uvx: /Users/angiej/.config/goose/mcp-hermit/bin/uvx -STDERR: 2025-09-26 23:13:04 - Checking for GOOSE_UV_REGISTRY environment variable for custom python/pip/UV registry setup... -STDERR: 2025-09-26 23:13:05 - Checking custom goose registry availability: https://global.block-artifacts.com/artifactory/api/pypi/block-pypi/simple -STDERR: 2025-09-26 23:13:05 - https://global.block-artifacts.com/artifactory/api/pypi/block-pypi/simple is accessible, setting it as UV_DEFAULT_INDEX. Setting UV_NATIVE_TLS to true. -STDERR: 2025-09-26 23:13:05 - Executing 'uvx' command with arguments: mcp-server-fetch -STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"experimental":{},"prompts":{"listChanged":false},"tools":{"listChanged":false}},"serverInfo":{"name":"mcp-fetch","version":"1.15.0"}}} +STDOUT: {"jsonrpc":"2.0","id":0,"result":{"protocolVersion":"2025-03-26","capabilities":{"experimental":{},"prompts":{"listChanged":false},"tools":{"listChanged":false}},"serverInfo":{"name":"mcp-fetch","version":"1.19.0"}}} STDIN: {"jsonrpc":"2.0","method":"notifications/initialized"} STDIN: {"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"_meta":{"progressToken":0},"name":"fetch","arguments":{"url":"https://example.com"}}} -STDERR: npm error code FETCH_ERROR -STDERR: npm error errno FETCH_ERROR -STDERR: npm error invalid json response body at https://blocked.teams.cloudflare.com/?account_id=1e25787f854fa4b713d08a859d3e16ed&background_color=%23000000&block_reason=This+has+been+blocked+as+part+of+the+Dependency+Confusion+threat.+Please+see+go%2Fdependencyconfusionpypi+and+go%2Fdependencyconfusionnpm+for+more+info.&device_id=***&footer_text=The+website+you+are+trying+to+access+has+been+blocked+because+it+presents+a+risk+to+the+safety+and+security+of+Block%E2%80%99s+IT+systems.&header_text=This+page+presents+a+risk+to+Block&location=cf1ebd1203624140846ced63a200519e&logo_path=https%3A%2F%2Fmedia.block.xyz%2Flogos%2Fblock-jewel_white.png&mailto_address=&mailto_subject=&name=Block%2C+Inc.¶ms_sign=yrMcT5HYDMHvixy%2BdLHApce3BcNYIdlI8qh3wTcIrLA%3D&query_id=***&rule_id=***&source_ip=2a09%3Abac0%3A1000%3A2df%3A%3A281%3Ac0&suppress_footer=false&url=registry.npmjs.org&user_id=*** reason: Unexpected token '<', " -STDERR: npm error &1) | tee "$TMPFILE" + echo "" + if grep -q "sampleLLM | " "$TMPFILE"; then + + JUDGE_PROMPT=$(cat <&1) + + if echo "$JUDGE_OUT" | tr -d '\r' | grep -Eq '^[[:space:]]*PASS[[:space:]]*$'; then + echo "✓ SUCCESS: MCP sampling test passed - confirmed Gatsby related response" + RESULTS+=("✓ MCP Sampling ${PROVIDER}: ${MODEL}") + else + echo "✗ FAILED: MCP sampling test failed - did not confirm Gatsby related response" + echo " Judge provider/model: ${JUDGE_PROVIDER}:${JUDGE_MODEL}" + echo " Judge output (snippet):" + echo "$JUDGE_OUT" | tail -n 20 + RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}") + fi + else + echo "✗ FAILED: MCP sampling test failed - sampleLLM tool not called" + RESULTS+=("✗ MCP Sampling ${PROVIDER}: ${MODEL}") + fi + rm "$TMPFILE" + rm -rf "$TESTDIR" + echo "---" + done +done + +echo "" +echo "=== MCP Sampling Test Summary ===" +for result in "${RESULTS[@]}"; do + echo "$result" +done + +if echo "${RESULTS[@]}" | grep -q "✗"; then + echo "" + echo "Some MCP sampling tests failed!" + exit 1 +else + echo "" + echo "All MCP sampling tests passed!" +fi