diff --git a/crates/goose-cli/src/commands/mcp.rs b/crates/goose-cli/src/commands/mcp.rs index 0db83f3efc0d..325a3fa4d347 100644 --- a/crates/goose-cli/src/commands/mcp.rs +++ b/crates/goose-cli/src/commands/mcp.rs @@ -32,6 +32,7 @@ pub async fn run_server(name: &str) -> Result<()> { "computercontroller" => Some(Box::new(RouterService(ComputerControllerRouter::new()))), "autovisualiser" => Some(Box::new(RouterService(AutoVisualiserRouter::new()))), "memory" => Some(Box::new(RouterService(MemoryRouter::new()))), + "tutorial" => Some(Box::new(RouterService(TutorialRouter::new()))), _ => None, }; diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml index feb195ded449..5c21287ac043 100644 --- a/crates/goose-mcp/Cargo.toml +++ b/crates/goose-mcp/Cargo.toml @@ -11,6 +11,7 @@ description.workspace = true workspace = true [dependencies] + mcp-core = { path = "../mcp-core" } mcp-server = { path = "../mcp-server" } rmcp = { workspace = true } diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 0503641f24e4..53e04fefbcc0 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -41,7 +41,6 @@ use crate::providers::errors::ProviderError; use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe}; use crate::scheduler_trait::SchedulerTrait; use crate::session; -use crate::session::extension_data::ExtensionState; use crate::tool_monitor::{ToolCall, ToolMonitor}; use crate::utils::is_token_cancelled; use mcp_core::ToolResult; @@ -58,9 +57,7 @@ use super::final_output_tool::FinalOutputTool; use super::platform_tools; use super::tool_execution::{ToolCallResult, CHAT_MODE_TOOL_SKIPPED_RESPONSE, DECLINED_RESPONSE}; use crate::agents::subagent_task_config::TaskConfig; -use crate::agents::todo_tools::{ - todo_read_tool, todo_write_tool, TODO_READ_TOOL_NAME, TODO_WRITE_TOOL_NAME, -}; + use crate::conversation::message::{Message, ToolRequest}; const DEFAULT_MAX_TURNS: u32 = 1000; @@ -226,6 +223,9 @@ impl Agent { unfixed_conversation: Conversation, session: &Option, ) -> Result { + // Ensure TODO extension is registered if we have a session + self.ensure_todo_extension(session).await; + let unfixed_messages = unfixed_conversation.messages().clone(); let (conversation, issues) = fix_conversation(unfixed_conversation.clone()); if !issues.is_empty() { @@ -484,100 +484,6 @@ impl Agent { "Frontend tool execution required".to_string(), None, ))) - } else if tool_call.name == TODO_READ_TOOL_NAME { - // Handle task planner read tool - let session_file_path = if let Some(session_config) = session { - session::storage::get_path(session_config.id.clone()).ok() - } else { - None - }; - - let todo_content = if let Some(path) = session_file_path { - session::storage::read_metadata(&path) - .ok() - .and_then(|m| { - session::TodoState::from_extension_data(&m.extension_data) - .map(|state| state.content) - }) - .unwrap_or_default() - } else { - String::new() - }; - - ToolCallResult::from(Ok(vec![Content::text(todo_content)])) - } else if tool_call.name == TODO_WRITE_TOOL_NAME { - // Handle task planner write tool - let content = tool_call - .arguments - .get("content") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - // Character limit validation - let char_count = content.chars().count(); - let max_chars = std::env::var("GOOSE_TODO_MAX_CHARS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(50_000); - - if max_chars > 0 && char_count > max_chars { - ToolCallResult::from(Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - format!( - "Todo list too large: {} chars (max: {})", - char_count, max_chars - ), - None, - ))) - } else if let Some(session_config) = session { - // Update session metadata with new TODO content - match session::storage::get_path(session_config.id.clone()) { - Ok(path) => match session::storage::read_metadata(&path) { - Ok(mut metadata) => { - let todo_state = session::TodoState::new(content); - todo_state - .to_extension_data(&mut metadata.extension_data) - .ok(); - - let path_clone = path.clone(); - let metadata_clone = metadata.clone(); - let update_result = tokio::task::spawn(async move { - session::storage::update_metadata(&path_clone, &metadata_clone) - .await - }) - .await; - - match update_result { - Ok(Ok(_)) => ToolCallResult::from(Ok(vec![Content::text( - format!("Updated ({} chars)", char_count), - )])), - _ => ToolCallResult::from(Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - "Failed to update session metadata".to_string(), - None, - ))), - } - } - Err(_) => ToolCallResult::from(Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - "Failed to read session metadata".to_string(), - None, - ))), - }, - Err(_) => ToolCallResult::from(Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - "Failed to get session path".to_string(), - None, - ))), - } - } else { - ToolCallResult::from(Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - "TODO tools require an active session to persist data".to_string(), - None, - ))) - } } else if tool_call.name == ROUTER_LLM_SEARCH_TOOL_NAME { match self .tool_route_manager @@ -803,9 +709,6 @@ impl Agent { platform_tools::manage_schedule_tool(), ]); - // Add task planner tools - prefixed_tools.extend([todo_read_tool(), todo_write_tool()]); - // Dynamic task tool prefixed_tools.push(create_dynamic_task_tool()); @@ -1613,11 +1516,24 @@ mod tests { async fn test_todo_tools_integration() -> Result<()> { let agent = Agent::new(); - // Test that task planner tools are listed + // Create a mock session to trigger TODO extension registration + let session = SessionConfig { + id: session::Identifier::Name("test-session".to_string()), + working_dir: std::path::PathBuf::from("/tmp"), + schedule_id: None, + execution_mode: None, + max_turns: None, + retry_config: None, + }; + + // Ensure TODO extension is registered + agent.ensure_todo_extension(&Some(session)).await; + + // Test that task planner tools are listed through the extension let tools = agent.list_tools(None).await; - let todo_read = tools.iter().find(|tool| tool.name == TODO_READ_TOOL_NAME); - let todo_write = tools.iter().find(|tool| tool.name == TODO_WRITE_TOOL_NAME); + let todo_read = tools.iter().find(|tool| tool.name == "todo__read"); + let todo_write = tools.iter().find(|tool| tool.name == "todo__write"); assert!(todo_read.is_some(), "TODO read tool should be present"); assert!(todo_write.is_some(), "TODO write tool should be present"); diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs index ae668b714ea1..4f6879e00315 100644 --- a/crates/goose/src/agents/mod.rs +++ b/crates/goose/src/agents/mod.rs @@ -18,6 +18,7 @@ pub mod subagent; pub mod subagent_execution_tool; pub mod subagent_handler; mod subagent_task_config; +pub mod todo_mcp_client; pub mod todo_tools; mod tool_execution; mod tool_route_manager; diff --git a/crates/goose/src/agents/reply_parts.rs b/crates/goose/src/agents/reply_parts.rs index 14cbc7888e50..38cf74346f54 100644 --- a/crates/goose/src/agents/reply_parts.rs +++ b/crates/goose/src/agents/reply_parts.rs @@ -1,6 +1,7 @@ use anyhow::Result; use std::collections::HashSet; use std::sync::Arc; +use tokio::sync::Mutex; use async_stream::try_stream; use futures::stream::StreamExt; @@ -32,6 +33,50 @@ async fn toolshim_postprocess( } impl Agent { + /// Register TODO extension if session is available + pub async fn ensure_todo_extension( + &self, + session: &Option, + ) { + // Check if TODO extension is already registered + if self + .extension_manager + .list_extensions() + .await + .unwrap_or_default() + .contains(&"todo".to_string()) + { + return; + } + + // Register TODO extension if we have a session + if let Some(session_config) = session { + let todo_client = super::todo_mcp_client::TodoMcpClient::with_session(session_config); + let server_info = { + use mcp_client::client::McpClientTrait; + todo_client.get_info().cloned() + }; + let config = crate::agents::extension::ExtensionConfig::Builtin { + name: "todo".to_string(), + display_name: Some("Task Management".to_string()), + description: Some("Persistent task tracking throughout your session".to_string()), + timeout: None, + bundled: Some(true), + available_tools: vec!["read".to_string(), "write".to_string()], + }; + + self.extension_manager + .add_client( + "todo".to_string(), + config, + Arc::new(Mutex::new(Box::new(todo_client))), + server_info, + None, + ) + .await; + } + } + /// Prepares tools and system prompt for a provider request pub async fn prepare_tools_and_prompt(&self) -> anyhow::Result<(Vec, Vec, String)> { // Get router enabled status diff --git a/crates/goose/src/agents/todo_mcp_client.rs b/crates/goose/src/agents/todo_mcp_client.rs new file mode 100644 index 000000000000..d7b674bd54e3 --- /dev/null +++ b/crates/goose/src/agents/todo_mcp_client.rs @@ -0,0 +1,417 @@ +use std::sync::Arc; + +use async_trait::async_trait; +use mcp_client::client::{Error, McpClientTrait}; +use rmcp::model::{ + CallToolResult, Content, GetPromptResult, InitializeResult, ListPromptsResult, + ListResourcesResult, ListToolsResult, ReadResourceResult, ServerNotification, Tool, +}; +use serde_json::Value; +use tokio::sync::{mpsc, Mutex}; +use tokio_util::sync::CancellationToken; + +use crate::agents::todo_tools::{todo_read_tool, todo_write_tool}; +use crate::agents::types::SessionConfig; +use crate::session::extension_data::ExtensionState; +use crate::session::{self, TodoState}; + +/// Storage trait for TODO persistence +#[async_trait] +pub trait TodoStorage: Send + Sync { + async fn read(&self) -> Result; + async fn write(&self, content: String) -> Result; +} + +/// Session-based TODO storage implementation +pub struct SessionTodoStorage { + session_id: session::Identifier, +} + +impl SessionTodoStorage { + pub fn new(session_id: session::Identifier) -> Self { + Self { session_id } + } +} + +#[async_trait] +impl TodoStorage for SessionTodoStorage { + async fn read(&self) -> Result { + let session_path = session::storage::get_path(self.session_id.clone()) + .map_err(|e| format!("Failed to get session path: {}", e))?; + + let metadata = session::storage::read_metadata(&session_path) + .map_err(|e| format!("Failed to read session metadata: {}", e))?; + + let content = TodoState::from_extension_data(&metadata.extension_data) + .map(|state| state.content) + .unwrap_or_default(); + + Ok(content) + } + + async fn write(&self, content: String) -> Result { + // Character limit validation + let char_count = content.chars().count(); + let max_chars = std::env::var("GOOSE_TODO_MAX_CHARS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(50_000); + + if max_chars > 0 && char_count > max_chars { + return Err(format!( + "Todo list too large: {} chars (max: {})", + char_count, max_chars + )); + } + + let session_path = session::storage::get_path(self.session_id.clone()) + .map_err(|e| format!("Failed to get session path: {}", e))?; + + let mut metadata = session::storage::read_metadata(&session_path) + .map_err(|e| format!("Failed to read session metadata: {}", e))?; + + let todo_state = TodoState::new(content); + todo_state + .to_extension_data(&mut metadata.extension_data) + .map_err(|e| format!("Failed to update extension data: {}", e))?; + + session::storage::update_metadata(&session_path, &metadata) + .await + .map_err(|e| format!("Failed to update session metadata: {}", e))?; + + Ok(format!("Updated ({} chars)", char_count)) + } +} + +/// In-memory TODO storage for sessions without persistence +pub struct MemoryTodoStorage { + content: Arc>, +} + +impl MemoryTodoStorage { + pub fn new() -> Self { + Self { + content: Arc::new(Mutex::new(String::new())), + } + } +} + +impl Default for MemoryTodoStorage { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl TodoStorage for MemoryTodoStorage { + async fn read(&self) -> Result { + Ok(self.content.lock().await.clone()) + } + + async fn write(&self, content: String) -> Result { + let char_count = content.chars().count(); + *self.content.lock().await = content; + Ok(format!("Updated ({} chars)", char_count)) + } +} + +/// Internal MCP client for TODO functionality +pub struct TodoMcpClient { + storage: Arc, + server_info: InitializeResult, +} + +impl TodoMcpClient { + pub fn new(storage: Arc) -> Self { + let instructions = r#"The todo extension provides persistent task management throughout your session. + +These tools help you track multi-step work, maintain context between interactions, and ensure systematic task completion. + +## Task Management Guidelines + +**Required Usage:** +- Use `todo__read` and `todo__write` for any task with 2+ steps, multiple files/components, or uncertain scope +- Skipping these tools when needed is considered an error + +**Workflow:** +1. Start: Always `todo__read` first, then `todo__write` a brief checklist using Markdown checkboxes +2. During: After each major action, reread via `todo__read`, then update via `todo__write` - mark completed items, add new discoveries, note blockers +3. Finish: Ensure every item is checked, or clearly list what remains + +**Critical:** `todo__write` replaces the entire list. Always read before writing - not doing so is an error. + +**Best Practices:** +- Keep items short, specific, and action-oriented +- Use nested checkboxes for subtasks +- Include context about blockers or dependencies + +Example format: +```markdown +- [x] Analyze request fully +- [ ] Create implementation plan + - [x] General guidelines + - [ ] Sample work +- [ ] Begin on implementation plan +```"#; + + let server_info = InitializeResult { + protocol_version: rmcp::model::ProtocolVersion::V_2025_03_26, + capabilities: rmcp::model::ServerCapabilities { + tools: Some(rmcp::model::ToolsCapability { list_changed: None }), + prompts: None, + resources: None, + logging: None, + experimental: None, + completions: None, + }, + server_info: rmcp::model::Implementation { + name: "todo".to_string(), + version: "1.0.0".to_string(), + }, + instructions: Some(instructions.to_string()), + }; + + Self { + storage, + server_info, + } + } + + pub fn with_session(session: &SessionConfig) -> Self { + // Use the session ID directly - the storage layer will handle it properly + let storage = Arc::new(SessionTodoStorage::new(session.id.clone())); + Self::new(storage) + } + + pub fn memory_only() -> Self { + let storage = Arc::new(MemoryTodoStorage::new()); + Self::new(storage) + } +} + +#[async_trait] +impl McpClientTrait for TodoMcpClient { + fn get_info(&self) -> Option<&InitializeResult> { + Some(&self.server_info) + } + + async fn list_resources( + &self, + _next_cursor: Option, + _cancellation_token: CancellationToken, + ) -> Result { + Ok(ListResourcesResult { + resources: vec![], + next_cursor: None, + }) + } + + async fn read_resource( + &self, + _uri: &str, + _cancellation_token: CancellationToken, + ) -> Result { + Err(Error::UnexpectedResponse) + } + + async fn list_tools( + &self, + _next_cursor: Option, + _cancellation_token: CancellationToken, + ) -> Result { + // Return unprefixed tools - the extension manager will add the prefix + Ok(ListToolsResult { + tools: vec![ + Tool { + name: "read".into(), + description: todo_read_tool().description, + input_schema: todo_read_tool().input_schema, + annotations: todo_read_tool().annotations, + output_schema: todo_read_tool().output_schema, + }, + Tool { + name: "write".into(), + description: todo_write_tool().description, + input_schema: todo_write_tool().input_schema, + annotations: todo_write_tool().annotations, + output_schema: todo_write_tool().output_schema, + }, + ], + next_cursor: None, + }) + } + + async fn call_tool( + &self, + name: &str, + arguments: Value, + _cancellation_token: CancellationToken, + ) -> Result { + match name { + "read" => { + let content = self.storage.read().await.map_err(|e| { + Error::McpError(rmcp::model::ErrorData::new( + rmcp::model::ErrorCode::INTERNAL_ERROR, + e, + None, + )) + })?; + Ok(CallToolResult { + content: vec![Content::text(content)], + is_error: None, + structured_content: None, + }) + } + "write" => { + let content = arguments + .get("content") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + Error::McpError(rmcp::model::ErrorData::new( + rmcp::model::ErrorCode::INVALID_PARAMS, + "Missing 'content' parameter".to_string(), + None, + )) + })? + .to_string(); + + let result = self.storage.write(content).await.map_err(|e| { + Error::McpError(rmcp::model::ErrorData::new( + rmcp::model::ErrorCode::INTERNAL_ERROR, + e, + None, + )) + })?; + + Ok(CallToolResult { + content: vec![Content::text(result)], + is_error: None, + structured_content: None, + }) + } + _ => Err(Error::UnexpectedResponse), + } + } + + async fn list_prompts( + &self, + _next_cursor: Option, + _cancellation_token: CancellationToken, + ) -> Result { + Ok(ListPromptsResult { + prompts: vec![], + next_cursor: None, + }) + } + + async fn get_prompt( + &self, + _name: &str, + _arguments: Value, + _cancellation_token: CancellationToken, + ) -> Result { + Err(Error::UnexpectedResponse) + } + + async fn subscribe(&self) -> mpsc::Receiver { + // Return a receiver that will never send anything + let (_tx, rx) = mpsc::channel(1); + rx + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_memory_storage() { + let storage = MemoryTodoStorage::new(); + + // Test initial read + let content = storage.read().await.unwrap(); + assert_eq!(content, ""); + + // Test write + let result = storage.write("- [ ] Test task".to_string()).await.unwrap(); + assert!(result.contains("Updated")); + + // Test read after write + let content = storage.read().await.unwrap(); + assert_eq!(content, "- [ ] Test task"); + } + + #[tokio::test] + async fn test_todo_client_list_tools() { + let client = TodoMcpClient::memory_only(); + + let tools = client + .list_tools(None, CancellationToken::default()) + .await + .unwrap(); + + assert_eq!(tools.tools.len(), 2); + assert_eq!(tools.tools[0].name, "read"); + assert_eq!(tools.tools[1].name, "write"); + } + + #[tokio::test] + async fn test_todo_client_read_write() { + let client = TodoMcpClient::memory_only(); + + // Test read empty + let result = client + .call_tool("read", serde_json::json!({}), CancellationToken::default()) + .await + .unwrap(); + assert_eq!(result.content[0], Content::text("")); + + // Test write + let result = client + .call_tool( + "write", + serde_json::json!({"content": "- [ ] Task 1"}), + CancellationToken::default(), + ) + .await + .unwrap(); + assert!(result.content[0] + .as_text() + .unwrap() + .text + .contains("Updated")); + + // Test read after write + let result = client + .call_tool("read", serde_json::json!({}), CancellationToken::default()) + .await + .unwrap(); + assert_eq!(result.content[0], Content::text("- [ ] Task 1")); + } + + #[tokio::test] + async fn test_todo_client_invalid_tool() { + let client = TodoMcpClient::memory_only(); + + let result = client + .call_tool( + "invalid", + serde_json::json!({}), + CancellationToken::default(), + ) + .await; + + assert!(matches!(result, Err(Error::UnexpectedResponse))); + } + + #[tokio::test] + async fn test_todo_client_missing_content_param() { + let client = TodoMcpClient::memory_only(); + + let result = client + .call_tool("write", serde_json::json!({}), CancellationToken::default()) + .await; + + assert!(matches!(result, Err(Error::McpError(_)))); + } +} diff --git a/crates/goose/src/prompts/system.md b/crates/goose/src/prompts/system.md index 1bcf22993c72..2681696a0dbc 100644 --- a/crates/goose/src/prompts/system.md +++ b/crates/goose/src/prompts/system.md @@ -38,23 +38,6 @@ No extensions are defined. You should let the user know that they should add ext {{tool_selection_strategy}} -# Task Management - -- Required — use `todo__read` and `todo__write` for any task with 2+ steps, multiple files/components, or uncertain scope. Skipping them is an error. -- Start — `todo__read`, then `todo__write` a brief checklist (Markdown checkboxes). -- During — after each major action, update via `todo__write`: mark done, add/edit items, note blockers/dependencies. -- Finish — ensure every item is checked, or clearly list what remains. -- Overwrite warning — `todo__write` replaces the entire list; always read before writing. It is an error to not read before writing. -- Quality — keep items short, specific, and action‑oriented. - -Template: -```markdown -- [ ] Implement feature X - - [ ] Update API - - [ ] Write tests -- [ ] Blocked: waiting on credentials -``` - # Response Guidelines - Use Markdown formatting for all responses.