diff --git a/.gitignore b/.gitignore index 1acf9e3f26ad..be1a7699b226 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,4 @@ result # Goose self-test artifacts gooseselftest/ +.tasks/ diff --git a/crates/goose-cli/src/recipes/extract_from_cli.rs b/crates/goose-cli/src/recipes/extract_from_cli.rs index aac9e91cea8b..3c8420c7875d 100644 --- a/crates/goose-cli/src/recipes/extract_from_cli.rs +++ b/crates/goose-cli/src/recipes/extract_from_cli.rs @@ -97,7 +97,10 @@ mod tests { input_config.additional_system_prompt, Some("test_instructions my_value".to_string()) ); - assert!(recipe.extensions.is_none()); + assert!(recipe + .extensions + .as_ref() + .is_none_or(|e| e.iter().all(|ext| ext.name() == "summon"))); assert!(settings.is_some()); let settings = settings.unwrap(); @@ -162,7 +165,10 @@ mod tests { input_config.additional_system_prompt, Some("test_instructions my_value".to_string()) ); - assert!(recipe.extensions.is_none()); + assert!(recipe + .extensions + .as_ref() + .is_none_or(|e| e.iter().all(|ext| ext.name() == "summon"))); assert!(settings.is_some()); let settings = settings.unwrap(); diff --git a/crates/goose-cli/src/session/builder.rs b/crates/goose-cli/src/session/builder.rs index edda60e3e109..8ca7907ff3e4 100644 --- a/crates/goose-cli/src/session/builder.rs +++ b/crates/goose-cli/src/session/builder.rs @@ -600,11 +600,7 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession { let recipe = session_config.recipe.as_ref(); agent - .apply_recipe_components( - recipe.and_then(|r| r.sub_recipes.clone()), - recipe.and_then(|r| r.response.clone()), - true, - ) + .apply_recipe_components(recipe.and_then(|r| r.response.clone()), true) .await; let new_provider = match create(&resolved.provider_name, resolved.model_config).await { @@ -643,6 +639,17 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession { process::exit(1); }); + if let Some(recipe) = session_config.recipe.clone() { + if let Err(e) = session_manager + .update(&session_id) + .recipe(Some(recipe)) + .apply() + .await + { + tracing::warn!("Failed to store recipe on session: {}", e); + } + } + if session_config.resume { handle_resumed_session_workdir(&agent, &session_id, session_config.interactive).await; } diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs index b758a8334467..3b3dcd314f24 100644 --- a/crates/goose-cli/src/session/mod.rs +++ b/crates/goose-cli/src/session/mod.rs @@ -21,8 +21,8 @@ use tokio_util::task::AbortOnDropHandle; pub use self::export::message_to_markdown; pub use builder::{build_session, SessionBuilderConfig}; use console::Color; -use goose::agents::subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE; use goose::agents::AgentEvent; +use goose::agents::SUBAGENT_TOOL_REQUEST_TYPE; use goose::permission::permission_confirmation::PrincipalType; use goose::permission::Permission; use goose::permission::PermissionConfirmation; diff --git a/crates/goose-cli/src/session/output.rs b/crates/goose-cli/src/session/output.rs index b196329393e3..d7875377e35b 100644 --- a/crates/goose-cli/src/session/output.rs +++ b/crates/goose-cli/src/session/output.rs @@ -314,8 +314,9 @@ fn render_tool_request(req: &ToolRequest, theme: Theme, debug: bool) { Ok(call) => match call.name.to_string().as_str() { "developer__text_editor" => render_text_editor_request(call, debug), "developer__shell" => render_shell_request(call, debug), - "code_execution__execute" => render_execute_code_request(call, debug), - "subagent" => render_subagent_request(call, debug), + "execute" | "execute_code" => render_execute_code_request(call, debug), + "delegate" => render_delegate_request(call, debug), + "subagent" => render_delegate_request(call, debug), "todo__write" => render_todo_request(call, debug), _ => render_default_request(call, debug), }, @@ -555,12 +556,12 @@ fn render_execute_code_request(call: &CallToolRequestParams, debug: bool) { println!(); } -fn render_subagent_request(call: &CallToolRequestParams, debug: bool) { +fn render_delegate_request(call: &CallToolRequestParams, debug: bool) { print_tool_header(call); if let Some(args) = &call.arguments { - if let Some(Value::String(subrecipe)) = args.get("subrecipe") { - println!("{}: {}", style("subrecipe").dim(), style(subrecipe).cyan()); + if let Some(Value::String(source)) = args.get("source") { + println!("{}: {}", style("source").dim(), style(source).cyan()); } if let Some(Value::String(instructions)) = args.get("instructions") { @@ -581,7 +582,7 @@ fn render_subagent_request(call: &CallToolRequestParams, debug: bool) { print_params(&Some(params.clone()), 1, debug); } - let skip_keys = ["subrecipe", "instructions", "parameters"]; + let skip_keys = ["source", "instructions", "parameters"]; let mut other_args = serde_json::Map::new(); for (k, v) in args { if !skip_keys.contains(&k.as_str()) { diff --git a/crates/goose-server/src/routes/recipe_utils.rs b/crates/goose-server/src/routes/recipe_utils.rs index 46c8c3d5f605..f198998a8519 100644 --- a/crates/goose-server/src/routes/recipe_utils.rs +++ b/crates/goose-server/src/routes/recipe_utils.rs @@ -163,11 +163,7 @@ pub async fn apply_recipe_to_agent( include_final_output_tool: bool, ) -> Option { agent - .apply_recipe_components( - recipe.sub_recipes.clone(), - recipe.response.clone(), - include_final_output_tool, - ) + .apply_recipe_components(recipe.response.clone(), include_final_output_tool) .await; recipe.instructions.as_ref().map(|instructions| { diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index c5ab84898f88..72b3acab87a6 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -20,10 +20,6 @@ use crate::agents::final_output_tool::{FINAL_OUTPUT_CONTINUATION_MESSAGE, FINAL_ use crate::agents::platform_tools::PLATFORM_MANAGE_SCHEDULE_TOOL_NAME; use crate::agents::prompt_manager::PromptManager; use crate::agents::retry::{RetryManager, RetryResult}; -use crate::agents::subagent_task_config::TaskConfig; -use crate::agents::subagent_tool::{ - create_subagent_tool, handle_subagent_tool, SUBAGENT_TOOL_NAME, -}; use crate::agents::types::{FrontendTool, SessionConfig, SharedProvider, ToolResultReceiver}; use crate::config::permission::PermissionManager; use crate::config::{get_enabled_extensions, Config, GooseMode}; @@ -42,11 +38,11 @@ use crate::permission::permission_judge::PermissionCheckResult; use crate::permission::PermissionConfirmation; use crate::providers::base::Provider; use crate::providers::errors::ProviderError; -use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe}; +use crate::recipe::{Author, Recipe, Response, Settings}; use crate::scheduler_trait::SchedulerTrait; use crate::security::security_inspector::SecurityInspector; use crate::session::extension_data::{EnabledExtensionsState, ExtensionState}; -use crate::session::{Session, SessionManager, SessionType}; +use crate::session::{Session, SessionManager}; use crate::tool_inspection::ToolInspectionManager; use crate::tool_monitor::RepetitionInspector; use crate::utils::is_token_cancelled; @@ -121,7 +117,6 @@ pub struct Agent { pub config: AgentConfig, pub extension_manager: Arc, - pub(super) sub_recipes: Mutex>, pub(super) final_output_tool: Arc>>, pub(super) frontend_tools: Mutex>, pub(super) frontend_instructions: Mutex>, @@ -210,7 +205,6 @@ impl Agent { provider: provider.clone(), config, extension_manager: Arc::new(ExtensionManager::new(provider.clone(), session_manager)), - sub_recipes: Mutex::new(HashMap::new()), final_output_tool: Arc::new(Mutex::new(None)), frontend_tools: Mutex::new(HashMap::new()), frontend_instructions: Mutex::new(None), @@ -452,23 +446,11 @@ impl Agent { self.extend_system_prompt(final_output_system_prompt).await; } - pub async fn add_sub_recipes(&self, sub_recipes_to_add: Vec) { - let mut sub_recipes = self.sub_recipes.lock().await; - for sr in sub_recipes_to_add { - sub_recipes.insert(sr.name.clone(), sr); - } - } - pub async fn apply_recipe_components( &self, - sub_recipes: Option>, response: Option, include_final_output: bool, ) { - if let Some(sub_recipes) = sub_recipes { - self.add_sub_recipes(sub_recipes).await; - } - if include_final_output { if let Some(response) = response { self.add_final_output_tool(response).await; @@ -485,18 +467,6 @@ impl Agent { cancellation_token: Option, session: &Session, ) -> (String, Result) { - // Prevent subagents from creating other subagents - if session.session_type == SessionType::SubAgent && tool_call.name == SUBAGENT_TOOL_NAME { - return ( - request_id, - Err(ErrorData::new( - ErrorCode::INVALID_REQUEST, - "Subagents cannot create other subagents".to_string(), - None, - )), - ); - } - if tool_call.name == PLATFORM_MANAGE_SCHEDULE_TOOL_NAME { let arguments = tool_call .arguments @@ -531,49 +501,7 @@ impl Agent { } debug!("WAITING_TOOL_START: {}", tool_call.name); - let result: ToolCallResult = if tool_call.name == SUBAGENT_TOOL_NAME { - let provider = match self.provider().await { - Ok(p) => p, - Err(_) => { - return ( - request_id, - Err(ErrorData::new( - ErrorCode::INTERNAL_ERROR, - "Provider is required".to_string(), - None, - )), - ); - } - }; - - let extensions = self.get_extension_configs().await; - - let max_turns_from_recipe = session - .recipe - .as_ref() - .and_then(|r| r.settings.as_ref()) - .and_then(|s| s.max_turns); - - let task_config = - TaskConfig::new(provider, &session.id, &session.working_dir, extensions) - .with_max_turns(max_turns_from_recipe); - let sub_recipes = self.sub_recipes.lock().await.clone(); - - let arguments = tool_call - .arguments - .clone() - .map(Value::Object) - .unwrap_or(Value::Object(serde_json::Map::new())); - - handle_subagent_tool( - &self.config, - arguments, - task_config, - sub_recipes, - session.working_dir.clone(), - cancellation_token, - ) - } else if self.is_frontend_tool(&tool_call.name).await { + let result: ToolCallResult = if self.is_frontend_tool(&tool_call.name).await { // For frontend tools, return an error indicating we need frontend execution ToolCallResult::from(Err(ErrorData::new( ErrorCode::INTERNAL_ERROR, @@ -807,30 +735,6 @@ impl Agent { Ok(()) } - pub async fn subagents_enabled(&self, session_id: &str) -> bool { - if self.config.goose_mode != GooseMode::Auto { - return false; - } - let context = self.extension_manager.get_context(); - if matches!( - context - .session_manager - .get_session(session_id, false) - .await - .ok() - .map(|session| session.session_type), - Some(SessionType::SubAgent) - ) { - return false; - } - !self - .extension_manager - .list_extensions() - .await - .map(|ext| ext.is_empty()) - .unwrap_or(true) - } - pub async fn list_tools(&self, session_id: &str, extension_name: Option) -> Vec { let mut prefixed_tools = self .extension_manager @@ -838,7 +742,6 @@ impl Agent { .await .unwrap_or_default(); - let subagents_enabled = self.subagents_enabled(session_id).await; if (extension_name.is_none() || extension_name.as_deref() == Some("platform")) && self.config.scheduler_service.is_some() { @@ -849,12 +752,6 @@ impl Agent { if let Some(final_output_tool) = self.final_output_tool.lock().await.as_ref() { prefixed_tools.push(final_output_tool.tool()); } - - if subagents_enabled { - let sub_recipes = self.sub_recipes.lock().await; - let sub_recipes_vec: Vec<_> = sub_recipes.values().cloned().collect(); - prefixed_tools.push(create_subagent_tool(&sub_recipes_vec)); - } } prefixed_tools diff --git a/crates/goose/src/agents/builtin_skills/mod.rs b/crates/goose/src/agents/builtin_skills/mod.rs index a94bddc625a4..039e7680448d 100644 --- a/crates/goose/src/agents/builtin_skills/mod.rs +++ b/crates/goose/src/agents/builtin_skills/mod.rs @@ -3,7 +3,7 @@ use include_dir::{include_dir, Dir}; static BUILTIN_SKILLS_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/src/agents/builtin_skills/skills"); -pub fn get_all_builtin_skills() -> Vec<&'static str> { +pub fn get_all() -> Vec<&'static str> { BUILTIN_SKILLS_DIR .files() .filter(|f| f.path().extension().is_some_and(|ext| ext == "md")) diff --git a/crates/goose/src/agents/code_execution_extension.rs b/crates/goose/src/agents/code_execution_extension.rs index 01cd5df06456..2bdb7072573a 100644 --- a/crates/goose/src/agents/code_execution_extension.rs +++ b/crates/goose/src/agents/code_execution_extension.rs @@ -1,4 +1,5 @@ use crate::agents::extension::PlatformExtensionContext; +use crate::agents::extension_manager::get_tool_owner; use crate::agents::mcp_client::{Error, McpClientTrait}; use anyhow::Result; use async_trait::async_trait; @@ -111,10 +112,16 @@ impl CodeExecutionClient { let mut cfgs = vec![]; for tool in tools { let full_name = tool.name.to_string(); - let (server_name, tool_name) = full_name.split_once("__")?; + let (namespace, name) = if let Some((server, tool_name)) = full_name.split_once("__") { + (server.to_string(), tool_name.to_string()) + } else if let Some(owner) = get_tool_owner(&tool) { + (owner, full_name) + } else { + continue; + }; cfgs.push(CallbackConfig { - name: tool_name.into(), - namespace: server_name.into(), + name, + namespace, description: tool.description.as_ref().map(|d| d.to_string()), input_schema: Some(json!(tool.input_schema)), output_schema: tool.output_schema.as_ref().map(|s| json!(s)), diff --git a/crates/goose/src/agents/execute_commands.rs b/crates/goose/src/agents/execute_commands.rs index e2219e028c8c..2c115249aa71 100644 --- a/crates/goose/src/agents/execute_commands.rs +++ b/crates/goose/src/agents/execute_commands.rs @@ -378,7 +378,7 @@ impl Agent { Err(e) => return Err(anyhow!("Failed to build recipe: {}", e)), }; - self.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true) + self.apply_recipe_components(recipe.response.clone(), true) .await; let prompt = [recipe.instructions.as_deref(), recipe.prompt.as_deref()] diff --git a/crates/goose/src/agents/extension.rs b/crates/goose/src/agents/extension.rs index 9dd372b09dbf..3a34617d718d 100644 --- a/crates/goose/src/agents/extension.rs +++ b/crates/goose/src/agents/extension.rs @@ -2,7 +2,7 @@ use crate::agents::apps_extension; use crate::agents::chatrecall_extension; use crate::agents::code_execution_extension; use crate::agents::extension_manager_extension; -use crate::agents::skills_extension; +use crate::agents::summon_extension; use crate::agents::todo_extension; use crate::agents::tom_extension; use std::collections::HashMap; @@ -53,6 +53,7 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Enable a todo list for goose so it can keep track of what it is doing", default_enabled: true, + unprefixed_tools: false, client_factory: |ctx| Box::new(todo_extension::TodoClient::new(ctx).unwrap()), }, ); @@ -65,6 +66,7 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Create and manage custom Goose apps through chat. Apps are HTML/CSS/JavaScript and run in sandboxed windows.", default_enabled: true, + unprefixed_tools: false, client_factory: |ctx| Box::new(apps_extension::AppsManagerClient::new(ctx).unwrap()), }, ); @@ -77,6 +79,7 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Search past conversations and load session summaries for contextual memory", default_enabled: false, + unprefixed_tools: false, client_factory: |ctx| { Box::new(chatrecall_extension::ChatRecallClient::new(ctx).unwrap()) }, @@ -91,18 +94,20 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Enable extension management tools for discovering, enabling, and disabling extensions", default_enabled: true, + unprefixed_tools: false, client_factory: |ctx| Box::new(extension_manager_extension::ExtensionManagerClient::new(ctx).unwrap()), }, ); map.insert( - skills_extension::EXTENSION_NAME, + summon_extension::EXTENSION_NAME, PlatformExtensionDef { - name: skills_extension::EXTENSION_NAME, - display_name: "Skills", - description: "Load and use skills from relevant directories", + name: summon_extension::EXTENSION_NAME, + display_name: "Summon", + description: "Load knowledge and delegate tasks to subagents", default_enabled: true, - client_factory: |ctx| Box::new(skills_extension::SkillsClient::new(ctx).unwrap()), + unprefixed_tools: true, + client_factory: |ctx| Box::new(summon_extension::SummonClient::new(ctx).unwrap()), }, ); @@ -114,6 +119,7 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Goose will make extension calls through code execution, saving tokens", default_enabled: false, + unprefixed_tools: true, client_factory: |ctx| { Box::new(code_execution_extension::CodeExecutionClient::new(ctx).unwrap()) }, @@ -128,6 +134,7 @@ pub static PLATFORM_EXTENSIONS: Lazy description: "Inject custom context into every turn via GOOSE_MOIM_MESSAGE_TEXT and GOOSE_MOIM_MESSAGE_FILE environment variables", default_enabled: true, + unprefixed_tools: false, client_factory: |ctx| Box::new(tom_extension::TomClient::new(ctx).unwrap()), }, ); @@ -175,12 +182,15 @@ impl PlatformExtensionContext { } } +/// Definition for a platform extension that runs in-process with direct agent access. #[derive(Debug, Clone)] pub struct PlatformExtensionDef { pub name: &'static str, pub display_name: &'static str, pub description: &'static str, pub default_enabled: bool, + /// If true, tools are exposed without extension prefix for intuitive first-class use. + pub unprefixed_tools: bool, pub client_factory: fn(PlatformExtensionContext) -> Box, } diff --git a/crates/goose/src/agents/extension_manager.rs b/crates/goose/src/agents/extension_manager.rs index 80f9d350c1a9..58b4648bb040 100644 --- a/crates/goose/src/agents/extension_manager.rs +++ b/crates/goose/src/agents/extension_manager.rs @@ -105,18 +105,18 @@ pub struct ExtensionManager { /// A flattened representation of a resource used by the agent to prepare inference #[derive(Debug, Clone)] pub struct ResourceItem { - pub client_name: String, // The name of the client that owns the resource - pub uri: String, // The URI of the resource - pub name: String, // The name of the resource - pub content: String, // The content of the resource + pub extension_name: String, // The name of the extension that owns the resource + pub uri: String, // The URI of the resource + pub name: String, // The name of the resource + pub content: String, // The content of the resource pub timestamp: DateTime, // The timestamp of the resource - pub priority: f32, // The priority of the resource + pub priority: f32, // The priority of the resource pub token_count: Option, // The token count of the resource (filled in by the agent) } impl ResourceItem { pub fn new( - client_name: String, + extension_name: String, uri: String, name: String, content: String, @@ -124,7 +124,7 @@ impl ResourceItem { priority: f32, ) -> Self { Self { - client_name, + extension_name, uri, name, content, @@ -199,6 +199,41 @@ pub fn get_parameter_names(tool: &Tool) -> Vec { names } +const TOOL_EXTENSION_META_KEY: &str = "goose_extension"; + +pub fn get_tool_owner(tool: &Tool) -> Option { + tool.meta + .as_ref() + .and_then(|m| m.0.get(TOOL_EXTENSION_META_KEY)) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) +} + +fn is_unprefixed_extension(config: &ExtensionConfig) -> bool { + if let ExtensionConfig::Platform { name, .. } = config { + PLATFORM_EXTENSIONS + .get(name_to_key(name).as_str()) + .is_some_and(|def| def.unprefixed_tools) + } else { + false + } +} + +/// Returns true if the named extension is a first-class platform extension +/// whose tools are exposed unprefixed and remain visible during code execution mode. +pub fn is_first_class_extension(name: &str) -> bool { + PLATFORM_EXTENSIONS + .get(name_to_key(name).as_str()) + .is_some_and(|def| def.unprefixed_tools) +} + +/// Result of resolving a tool call to its owning extension +struct ResolvedTool { + extension_name: String, + actual_tool_name: String, + client: McpClientBox, +} + async fn child_process_client( mut command: Command, timeout: &Option, @@ -789,16 +824,18 @@ impl ExtensionManager { tools .iter() .filter(|tool| { - let tool_prefix = tool.name.split("__").next().unwrap_or(""); + let tool_owner = get_tool_owner(tool) + .map(|s| name_to_key(&s)) + .unwrap_or_else(|| tool.name.split("__").next().unwrap_or("").to_string()); if let Some(ref excluded) = exclude_normalized { - if tool_prefix == excluded { + if tool_owner == *excluded { return false; } } if let Some(ref name_filter) = extension_name_normalized { - tool_prefix == name_filter + tool_owner == *name_filter } else { true } @@ -861,18 +898,36 @@ impl ExtensionManager { } }; + let expose_unprefixed = is_unprefixed_extension(&config); + loop { for tool in client_tools.tools { if config.is_tool_available(&tool.name) { + let public_name = if expose_unprefixed { + tool.name.to_string() + } else { + format!("{}__{}", name, tool.name) + }; + + let mut meta_map = tool + .meta + .as_ref() + .map(|m| m.0.clone()) + .unwrap_or_default(); + meta_map.insert( + TOOL_EXTENSION_META_KEY.to_string(), + serde_json::Value::String(name.clone()), + ); + tools.push(Tool { - name: format!("{}__{}", name, tool.name).into(), + name: public_name.into(), description: tool.description, input_schema: tool.input_schema, annotations: tool.annotations, output_schema: tool.output_schema, icons: tool.icons, title: tool.title, - meta: tool.meta, + meta: Some(rmcp::model::Meta(meta_map)), }); } } @@ -899,9 +954,22 @@ impl ExtensionManager { let results = future::join_all(client_futures).await; + let mut seen_names: std::collections::HashSet = std::collections::HashSet::new(); let mut tools = Vec::new(); - for (_, client_tools) in results { - tools.extend(client_tools); + for (ext_name, client_tools) in results { + for tool in client_tools { + let tool_name = tool.name.to_string(); + if seen_names.contains(&tool_name) { + warn!( + tool = %tool_name, + extension = %ext_name, + "Duplicate tool name - skipping" + ); + continue; + } + seen_names.insert(tool_name); + tools.push(tool); + } } Ok(tools) @@ -915,16 +983,6 @@ impl ExtensionManager { prompt_template::render_template("plan.md", &context).expect("Prompt should render") } - /// Find and return a reference to the appropriate client for a tool call - async fn get_client_for_tool(&self, prefixed_name: &str) -> Option<(String, McpClientBox)> { - self.extensions - .lock() - .await - .iter() - .find(|(key, _)| prefixed_name.starts_with(*key)) - .map(|(name, extension)| (name.clone(), extension.get_client())) - } - // Function that gets executed for read_resource tool pub async fn read_resource_tool( &self, @@ -1183,59 +1241,86 @@ impl ExtensionManager { } } - pub async fn dispatch_tool_call( + async fn resolve_tool( &self, session_id: &str, - tool_call: CallToolRequestParams, - working_dir: Option<&std::path::Path>, - cancellation_token: CancellationToken, - ) -> Result { - // Some models strip the tool prefix, so auto-add it for known code_execution tools - let tool_name_str = tool_call.name.to_string(); - let prefixed_name = if !tool_name_str.contains("__") { - let code_exec_tools = ["execute", "list_functions", "get_function_details"]; - if code_exec_tools.contains(&tool_name_str.as_str()) - && self.extensions.lock().await.contains_key("code_execution") - { - format!("code_execution__{tool_name_str}") - } else { - tool_name_str + tool_name: &str, + ) -> Result { + if let Some((prefix, actual)) = tool_name.split_once("__") { + let owner = name_to_key(prefix); + if let Some(client) = self.get_server_client(&owner).await { + return Ok(ResolvedTool { + extension_name: owner, + actual_tool_name: actual.to_string(), + client, + }); } - } else { - tool_name_str - }; + } - // Dispatch tool call based on the prefix naming convention - let (client_name, client) = - self.get_client_for_tool(&prefixed_name) - .await - .ok_or_else(|| { - ErrorData::new( - ErrorCode::RESOURCE_NOT_FOUND, - format!("Tool '{}' not found", tool_call.name), - None, - ) - })?; + let tools = self.get_all_tools_cached(session_id).await.map_err(|e| { + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to get tools: {}", e), + None, + ) + })?; - let tool_name = prefixed_name - .strip_prefix(client_name.as_str()) - .and_then(|s| s.strip_prefix("__")) - .ok_or_else(|| { + if let Some(tool) = tools.iter().find(|t| *t.name == *tool_name) { + let owner = get_tool_owner(tool).ok_or_else(|| { ErrorData::new( ErrorCode::RESOURCE_NOT_FOUND, - format!("Invalid tool name format: '{}'", tool_call.name), + format!("Tool '{}' has no owner", tool_name), None, ) - })? - .to_string(); + })?; + + let actual_tool_name = tool_name + .strip_prefix(&format!("{owner}__")) + .unwrap_or(tool_name) + .to_string(); + + let client = self.get_server_client(&owner).await.ok_or_else(|| { + ErrorData::new( + ErrorCode::RESOURCE_NOT_FOUND, + format!("Extension '{}' not found for tool '{}'", owner, tool_name), + None, + ) + })?; + + return Ok(ResolvedTool { + extension_name: owner, + actual_tool_name, + client, + }); + } - if let Some(extension) = self.extensions.lock().await.get(&client_name) { - if !extension.config.is_tool_available(&tool_name) { + Err(ErrorData::new( + ErrorCode::RESOURCE_NOT_FOUND, + format!("Tool '{}' not found", tool_name), + None, + )) + } + + pub async fn dispatch_tool_call( + &self, + session_id: &str, + tool_call: CallToolRequestParams, + working_dir: Option<&std::path::Path>, + cancellation_token: CancellationToken, + ) -> Result { + let tool_name_str = tool_call.name.to_string(); + let resolved = self.resolve_tool(session_id, &tool_name_str).await?; + + if let Some(extension) = self.extensions.lock().await.get(&resolved.extension_name) { + if !extension + .config + .is_tool_available(&resolved.actual_tool_name) + { return Err(ErrorData::new( ErrorCode::RESOURCE_NOT_FOUND, format!( "Tool '{}' is not available for extension '{}'", - tool_name, client_name + resolved.actual_tool_name, resolved.extension_name ), None, ) @@ -1244,15 +1329,16 @@ impl ExtensionManager { } let arguments = tool_call.arguments.clone(); - let client = client.clone(); + let client = resolved.client.clone(); let notifications_receiver = client.lock().await.subscribe().await; let session_id = session_id.to_string(); + let actual_tool_name = resolved.actual_tool_name; let working_dir_str = working_dir.map(|p| p.to_string_lossy().to_string()); let fut = async move { tracing::debug!( - "dispatch_tool_call fut: calling client.call_tool tool={} session_id={} working_dir={:?}", - tool_name, + "dispatch_tool_call: calling client.call_tool tool={} session_id={} working_dir={:?}", + actual_tool_name, session_id, working_dir_str ); @@ -1260,7 +1346,7 @@ impl ExtensionManager { client_guard .call_tool( &session_id, - &tool_name, + &actual_tool_name, arguments, working_dir_str.as_deref(), cancellation_token, @@ -1636,70 +1722,8 @@ mod tests { } } - #[tokio::test] - async fn test_get_client_for_tool() { - let temp_dir = tempfile::tempdir().unwrap(); - let extension_manager = - ExtensionManager::new_without_provider(temp_dir.path().to_path_buf()); - - // Add some mock clients using the helper method - extension_manager - .add_mock_extension( - "test_client".to_string(), - Arc::new(Mutex::new(Box::new(MockClient {}))), - ) - .await; - - extension_manager - .add_mock_extension( - "__client".to_string(), - Arc::new(Mutex::new(Box::new(MockClient {}))), - ) - .await; - - extension_manager - .add_mock_extension( - "__cli__ent__".to_string(), - Arc::new(Mutex::new(Box::new(MockClient {}))), - ) - .await; - - extension_manager - .add_mock_extension( - "client ๐Ÿš€".to_string(), - Arc::new(Mutex::new(Box::new(MockClient {}))), - ) - .await; - - // Test basic case - assert!(extension_manager - .get_client_for_tool("test_client__tool") - .await - .is_some()); - - // Test leading underscores - assert!(extension_manager - .get_client_for_tool("__client__tool") - .await - .is_some()); - - // Test multiple underscores in client name, and ending with __ - assert!(extension_manager - .get_client_for_tool("__cli__ent____tool") - .await - .is_some()); - - // Test unicode in tool name, "client ๐Ÿš€" should become "client_" - assert!(extension_manager - .get_client_for_tool("client___tool") - .await - .is_some()); - } - #[tokio::test] async fn test_dispatch_tool_call() { - // test that dispatch_tool_call parses out the sanitized name correctly, and extracts - // tool_names let temp_dir = tempfile::tempdir().unwrap(); let extension_manager = ExtensionManager::new_without_provider(temp_dir.path().to_path_buf()); @@ -1726,7 +1750,6 @@ mod tests { ) .await; - // verify a normal tool call let tool_call = CallToolRequestParams { meta: None, task: None, @@ -1747,7 +1770,7 @@ mod tests { let tool_call = CallToolRequestParams { meta: None, task: None, - name: "test_client__test__tool".to_string().into(), + name: "test_client__available_tool".to_string().into(), arguments: Some(object!({})), }; @@ -1761,7 +1784,6 @@ mod tests { .await; assert!(result.is_ok()); - // verify a multiple underscores dispatch let tool_call = CallToolRequestParams { meta: None, task: None, @@ -1779,7 +1801,6 @@ mod tests { .await; assert!(result.is_ok()); - // Test unicode in tool name, "client ๐Ÿš€" should become "client_" let tool_call = CallToolRequestParams { meta: None, task: None, @@ -1797,24 +1818,6 @@ mod tests { .await; assert!(result.is_ok()); - let tool_call = CallToolRequestParams { - meta: None, - task: None, - name: "client___test__tool".to_string().into(), - arguments: Some(object!({})), - }; - - let result = extension_manager - .dispatch_tool_call( - "test-session-id", - tool_call, - None, - CancellationToken::default(), - ) - .await; - assert!(result.is_ok()); - - // this should error out, specifically for an ToolError::ExecutionError let invalid_tool_call = CallToolRequestParams { meta: None, task: None, @@ -1829,20 +1832,14 @@ mod tests { None, CancellationToken::default(), ) - .await - .unwrap() - .result .await; - assert!(matches!( - result, - Err(ErrorData { - code: ErrorCode::INTERNAL_ERROR, - .. - }) - )); + if let Err(err) = result { + let tool_err = err.downcast_ref::().expect("Expected ErrorData"); + assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND); + } else { + panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND"); + } - // this should error out, specifically with an ToolError::NotFound - // this client doesn't exist let invalid_tool_call = CallToolRequestParams { meta: None, task: None, @@ -1945,7 +1942,6 @@ mod tests { ) .await; - // Try to call an unavailable tool let unavailable_tool_call = CallToolRequestParams { meta: None, task: None, @@ -1962,11 +1958,9 @@ mod tests { ) .await; - // Should return RESOURCE_NOT_FOUND error if let Err(err) = result { let tool_err = err.downcast_ref::().expect("Expected ErrorData"); assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND); - assert!(tool_err.message.contains("is not available")); } else { panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND"); } diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs index 2d33fb2793bc..0efc3ec22274 100644 --- a/crates/goose/src/agents/mod.rs +++ b/crates/goose/src/agents/mod.rs @@ -1,6 +1,6 @@ mod agent; pub(crate) mod apps_extension; -mod builtin_skills; +pub(crate) mod builtin_skills; pub(crate) mod chatrecall_extension; pub(crate) mod code_execution_extension; pub mod container; @@ -18,11 +18,10 @@ pub mod prompt_manager; mod reply_parts; pub mod retry; mod schedule_tool; -pub(crate) mod skills_extension; pub mod subagent_execution_tool; -pub mod subagent_handler; -mod subagent_task_config; -pub mod subagent_tool; +pub(crate) mod subagent_handler; +pub(crate) mod subagent_task_config; +pub(crate) mod summon_extension; pub(crate) mod todo_extension; pub(crate) mod tom_extension; mod tool_execution; @@ -34,5 +33,6 @@ pub use execute_commands::COMPACT_TRIGGERS; pub use extension::ExtensionConfig; pub use extension_manager::ExtensionManager; pub use prompt_manager::PromptManager; +pub use subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE; pub use subagent_task_config::TaskConfig; pub use types::{FrontendTool, RetryConfig, SessionConfig, SuccessCheck}; diff --git a/crates/goose/src/agents/reply_parts.rs b/crates/goose/src/agents/reply_parts.rs index ff8670eea15e..765f019e5b7e 100644 --- a/crates/goose/src/agents/reply_parts.rs +++ b/crates/goose/src/agents/reply_parts.rs @@ -9,8 +9,6 @@ use tracing::debug; use super::super::agents::Agent; use crate::agents::code_execution_extension::EXTENSION_NAME as CODE_EXECUTION_EXTENSION; -use crate::agents::skills_extension::EXTENSION_NAME as SKILLS_EXTENSION; -use crate::agents::subagent_tool::SUBAGENT_TOOL_NAME; use crate::conversation::message::{Message, MessageContent, ToolRequest}; use crate::conversation::Conversation; use crate::providers::base::{stream_from_single_message, MessageStream, Provider, ProviderUsage}; @@ -151,12 +149,12 @@ impl Agent { .is_extension_enabled(CODE_EXECUTION_EXTENSION) .await; if code_execution_active { - let code_exec_prefix = format!("{CODE_EXECUTION_EXTENSION}__"); - let skills_prefix = format!("{SKILLS_EXTENSION}__"); tools.retain(|tool| { - tool.name.starts_with(&code_exec_prefix) - || tool.name.starts_with(&skills_prefix) - || tool.name == SUBAGENT_TOOL_NAME + if let Some(owner) = crate::agents::extension_manager::get_tool_owner(tool) { + crate::agents::extension_manager::is_first_class_extension(&owner) + } else { + false + } }); } @@ -182,7 +180,6 @@ impl Agent { .with_extension_and_tool_counts(extension_count, tool_count) .with_code_execution_mode(code_execution_active) .with_hints(working_dir) - .with_enable_subagents(self.subagents_enabled(session_id).await) .build(); // Handle toolshim if enabled diff --git a/crates/goose/src/agents/skills_extension.rs b/crates/goose/src/agents/skills_extension.rs deleted file mode 100644 index b882c1bb389f..000000000000 --- a/crates/goose/src/agents/skills_extension.rs +++ /dev/null @@ -1,866 +0,0 @@ -use super::builtin_skills; -use crate::agents::extension::PlatformExtensionContext; -use crate::agents::mcp_client::{Error, McpClientTrait}; -use crate::config::paths::Paths; -use anyhow::Result; -use async_trait::async_trait; -use indoc::indoc; -use rmcp::model::{ - CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult, - ProtocolVersion, ServerCapabilities, Tool, ToolAnnotations, ToolsCapability, -}; -use schemars::{schema_for, JsonSchema}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use tokio_util::sync::CancellationToken; - -pub static EXTENSION_NAME: &str = "skills"; - -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -struct LoadSkillParams { - name: String, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -struct SkillMetadata { - name: String, - description: String, -} - -#[derive(Debug, Clone)] -struct Skill { - metadata: SkillMetadata, - body: String, - directory: PathBuf, - supporting_files: Vec, -} - -pub struct SkillsClient { - info: InitializeResult, - skills: HashMap, -} - -impl SkillsClient { - pub fn new(_context: PlatformExtensionContext) -> Result { - let info = InitializeResult { - protocol_version: ProtocolVersion::V_2025_03_26, - capabilities: ServerCapabilities { - tasks: None, - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - resources: None, - prompts: None, - completions: None, - experimental: None, - logging: None, - }, - server_info: Implementation { - name: EXTENSION_NAME.to_string(), - title: Some("Skills".to_string()), - version: "1.0.0".to_string(), - icons: None, - website_url: None, - }, - instructions: Some(String::new()), - }; - - let mut skills = Self::load_builtin_skills(); - - let directories = Self::get_default_skill_directories() - .into_iter() - .filter(|d| d.exists()) - .collect::>(); - let fs_skills = Self::discover_skills_in_directories(&directories); - skills.extend(fs_skills); - - let mut client = Self { info, skills }; - client.info.instructions = Some(client.generate_instructions()); - Ok(client) - } - - fn load_builtin_skills() -> HashMap { - let mut skills = HashMap::new(); - for content in builtin_skills::get_all_builtin_skills() { - if let Ok((metadata, body)) = Self::parse_frontmatter(content) { - skills.insert( - metadata.name.clone(), - Skill { - metadata, - body, - directory: PathBuf::new(), - supporting_files: vec![], - }, - ); - } - } - skills - } - - fn get_default_skill_directories() -> Vec { - let mut dirs = Vec::new(); - - if let Some(home) = dirs::home_dir() { - dirs.push(home.join(".claude/skills")); - dirs.push(home.join(".config/agents/skills")); - } - - dirs.push(Paths::config_dir().join("skills")); - - if let Ok(working_dir) = std::env::current_dir() { - dirs.push(working_dir.join(".claude/skills")); - dirs.push(working_dir.join(".goose/skills")); - dirs.push(working_dir.join(".agents/skills")); - } - - dirs - } - - fn parse_skill_file(path: &Path) -> Result { - let content = std::fs::read_to_string(path)?; - - let (metadata, body) = Self::parse_frontmatter(&content)?; - - let directory = path - .parent() - .ok_or_else(|| anyhow::anyhow!("Skill file has no parent directory"))? - .to_path_buf(); - - let supporting_files = Self::find_supporting_files(&directory, path)?; - - Ok(Skill { - metadata, - body, - directory, - supporting_files, - }) - } - - fn parse_frontmatter(content: &str) -> Result<(SkillMetadata, String)> { - let parts: Vec<&str> = content.split("---").collect(); - - if parts.len() < 3 { - return Err(anyhow::anyhow!("Invalid frontmatter format")); - } - - let yaml_content = parts[1].trim(); - let metadata: SkillMetadata = serde_yaml::from_str(yaml_content)?; - - let body = parts[2..].join("---").trim().to_string(); - - Ok((metadata, body)) - } - - fn find_supporting_files(directory: &Path, skill_file: &Path) -> Result> { - let mut files = Vec::new(); - - if let Ok(entries) = std::fs::read_dir(directory) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_file() && path != skill_file { - files.push(path); - } else if path.is_dir() { - if let Ok(sub_entries) = std::fs::read_dir(&path) { - for sub_entry in sub_entries.flatten() { - let sub_path = sub_entry.path(); - if sub_path.is_file() { - files.push(sub_path); - } - } - } - } - } - } - - Ok(files) - } - - fn discover_skills_in_directories(directories: &[PathBuf]) -> HashMap { - let mut skills = HashMap::new(); - - for dir in directories { - if let Ok(entries) = std::fs::read_dir(dir) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_dir() { - let skill_file = path.join("SKILL.md"); - if skill_file.exists() { - if let Ok(skill) = Self::parse_skill_file(&skill_file) { - skills.insert(skill.metadata.name.clone(), skill); - } - } - } - } - } - } - - skills - } - - fn generate_instructions(&self) -> String { - if self.skills.is_empty() { - return String::new(); - } - - let mut instructions = String::from("You have these skills at your disposal, when it is clear they can help you solve a problem or you are asked to use them:\n\n"); - - let mut skill_list: Vec<_> = self.skills.iter().collect(); - skill_list.sort_by_key(|(name, _)| *name); - - for (name, skill) in skill_list { - instructions.push_str(&format!("- {}: {}\n", name, skill.metadata.description)); - } - - instructions - } - - async fn handle_load_skill( - &self, - arguments: Option, - ) -> Result, String> { - let skill_name = arguments - .as_ref() - .ok_or("Missing arguments")? - .get("name") - .and_then(|v| v.as_str()) - .ok_or("Missing required parameter: name")?; - - let skill = self - .skills - .get(skill_name) - .ok_or_else(|| format!("Skill '{}' not found", skill_name))?; - - let mut response = format!("# Skill: {}\n\n{}\n\n", skill.metadata.name, skill.body); - - if !skill.supporting_files.is_empty() { - response.push_str(&format!( - "## Supporting Files\n\nSkill directory: {}\n\n", - skill.directory.display() - )); - response.push_str("The following supporting files are available:\n"); - for file in &skill.supporting_files { - if let Ok(relative) = file.strip_prefix(&skill.directory) { - response.push_str(&format!("- {}\n", relative.display())); - } - } - response.push_str("\nUse the view file tools to access these files as needed, or run scripts as directed with dev extension.\n"); - } - - Ok(vec![Content::text(response)]) - } - - fn get_tools() -> Vec { - let schema = schema_for!(LoadSkillParams); - let schema_value = - serde_json::to_value(schema).expect("Failed to serialize LoadSkillParams schema"); - - let input_schema = schema_value - .as_object() - .expect("Schema should be an object") - .clone(); - - vec![Tool::new( - "loadSkill".to_string(), - indoc! {r#" - Load a skill by name and return its content. - - This tool loads the specified skill and returns its body content along with - information about any supporting files in the skill directory. - "#} - .to_string(), - input_schema, - ) - .annotate(ToolAnnotations { - title: Some("Load skill".to_string()), - read_only_hint: Some(true), - destructive_hint: Some(false), - idempotent_hint: Some(true), - open_world_hint: Some(false), - })] - } -} - -#[async_trait] -impl McpClientTrait for SkillsClient { - async fn list_tools( - &self, - _session_id: &str, - _next_cursor: Option, - _cancellation_token: CancellationToken, - ) -> Result { - let tools = if self.skills.is_empty() { - Vec::new() - } else { - Self::get_tools() - }; - Ok(ListToolsResult { - tools, - next_cursor: None, - meta: None, - }) - } - - async fn call_tool( - &self, - _session_id: &str, - name: &str, - arguments: Option, - _working_dir: Option<&str>, - _cancellation_token: CancellationToken, - ) -> Result { - let content = match name { - "loadSkill" => self.handle_load_skill(arguments).await, - _ => Err(format!("Unknown tool: {}", name)), - }; - - match content { - Ok(content) => Ok(CallToolResult::success(content)), - Err(error) => Ok(CallToolResult::error(vec![Content::text(format!( - "Error: {}", - error - ))])), - } - } - - fn get_info(&self) -> Option<&InitializeResult> { - Some(&self.info) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::fs; - use tempfile::TempDir; - - #[test] - fn test_parse_frontmatter() { - let content = r#"--- -name: test-skill -description: A test skill ---- - -# Test Skill - -This is the body of the skill. -"#; - - let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap(); - assert_eq!(metadata.name, "test-skill"); - assert_eq!(metadata.description, "A test skill"); - assert!(body.contains("# Test Skill")); - assert!(body.contains("This is the body of the skill.")); - } - - #[test] - fn test_parse_frontmatter_missing() { - let content = "# No frontmatter here"; - assert!(SkillsClient::parse_frontmatter(content).is_err()); - } - - #[test] - fn test_parse_frontmatter_unclosed() { - let content = r#"--- -name: test -description: test -"#; - assert!(SkillsClient::parse_frontmatter(content).is_err()); - } - - #[test] - fn test_parse_frontmatter_with_extra_fields() { - let content = r#"--- -name: test-skill -description: A test skill -author: Test Author -version: 1.0.0 -tags: - - test - - example -extra_field: some value ---- - -# Test Skill - -This is the body of the skill. -"#; - - let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap(); - assert_eq!(metadata.name, "test-skill"); - assert_eq!(metadata.description, "A test skill"); - assert!(body.contains("# Test Skill")); - assert!(body.contains("This is the body of the skill.")); - } - - #[test] - fn test_parse_skill_file() { - let temp_dir = TempDir::new().unwrap(); - let skill_dir = temp_dir.path().join("test-skill"); - fs::create_dir(&skill_dir).unwrap(); - - let skill_file = skill_dir.join("SKILL.md"); - fs::write( - &skill_file, - r#"--- -name: test-skill -description: A test skill ---- - -# Test Skill Content -"#, - ) - .unwrap(); - - fs::write(skill_dir.join("helper.py"), "print('hello')").unwrap(); - fs::create_dir(skill_dir.join("templates")).unwrap(); - fs::write(skill_dir.join("templates/template.txt"), "template").unwrap(); - - let skill = SkillsClient::parse_skill_file(&skill_file).unwrap(); - assert_eq!(skill.metadata.name, "test-skill"); - assert_eq!(skill.metadata.description, "A test skill"); - assert!(skill.body.contains("# Test Skill Content")); - assert_eq!(skill.supporting_files.len(), 2); - } - - #[test] - fn test_discover_skills() { - let temp_dir = TempDir::new().unwrap(); - let skills_dir = temp_dir.path().join("skills"); - fs::create_dir(&skills_dir).unwrap(); - - let skill1_dir = skills_dir.join("test-skill-one-a1b2c3"); - fs::create_dir(&skill1_dir).unwrap(); - fs::write( - skill1_dir.join("SKILL.md"), - r#"--- -name: test-skill-one-a1b2c3 -description: First test skill ---- -Body 1 -"#, - ) - .unwrap(); - - let skill2_dir = skills_dir.join("test-skill-two-d4e5f6"); - fs::create_dir(&skill2_dir).unwrap(); - fs::write( - skill2_dir.join("SKILL.md"), - r#"--- -name: test-skill-two-d4e5f6 -description: Second test skill ---- -Body 2 -"#, - ) - .unwrap(); - - let skill3_dir = skills_dir.join("test-skill-three-g7h8i9"); - fs::create_dir(&skill3_dir).unwrap(); - fs::write( - skill3_dir.join("SKILL.md"), - r#"--- -name: test-skill-three-g7h8i9 -description: Third test skill ---- -Body 3 -"#, - ) - .unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]); - - assert_eq!(skills.len(), 3); - assert!(skills.contains_key("test-skill-one-a1b2c3")); - assert!(skills.contains_key("test-skill-two-d4e5f6")); - assert!(skills.contains_key("test-skill-three-g7h8i9")); - } - - #[test] - fn test_discover_skills_from_multiple_directories() { - let temp_dir = TempDir::new().unwrap(); - - let dir1 = temp_dir.path().join("dir1"); - fs::create_dir(&dir1).unwrap(); - let skill1_dir = dir1.join("skill-from-dir1"); - fs::create_dir(&skill1_dir).unwrap(); - fs::write( - skill1_dir.join("SKILL.md"), - r#"--- -name: skill-from-dir1 -description: Skill from directory 1 ---- -Content from dir1 -"#, - ) - .unwrap(); - - let dir2 = temp_dir.path().join("dir2"); - fs::create_dir(&dir2).unwrap(); - let skill2_dir = dir2.join("skill-from-dir2"); - fs::create_dir(&skill2_dir).unwrap(); - fs::write( - skill2_dir.join("SKILL.md"), - r#"--- -name: skill-from-dir2 -description: Skill from directory 2 ---- -Content from dir2 -"#, - ) - .unwrap(); - - let dir3 = temp_dir.path().join("dir3"); - fs::create_dir(&dir3).unwrap(); - let skill3_dir = dir3.join("skill-from-dir3"); - fs::create_dir(&skill3_dir).unwrap(); - fs::write( - skill3_dir.join("SKILL.md"), - r#"--- -name: skill-from-dir3 -description: Skill from directory 3 ---- -Content from dir3 -"#, - ) - .unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[dir1, dir2, dir3]); - - assert_eq!(skills.len(), 3); - assert!(skills.contains_key("skill-from-dir1")); - assert!(skills.contains_key("skill-from-dir2")); - assert!(skills.contains_key("skill-from-dir3")); - - assert_eq!( - skills.get("skill-from-dir1").unwrap().metadata.description, - "Skill from directory 1" - ); - assert_eq!( - skills.get("skill-from-dir2").unwrap().metadata.description, - "Skill from directory 2" - ); - assert_eq!( - skills.get("skill-from-dir3").unwrap().metadata.description, - "Skill from directory 3" - ); - } - - #[test] - fn test_empty_instructions_when_no_skills() { - let temp_dir = TempDir::new().unwrap(); - let empty_dir = temp_dir.path().join("empty"); - fs::create_dir(&empty_dir).unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]); - assert_eq!(skills.len(), 0); - - let mut client = SkillsClient { - info: InitializeResult { - protocol_version: ProtocolVersion::V_2025_03_26, - capabilities: ServerCapabilities { - tasks: None, - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - resources: None, - prompts: None, - completions: None, - experimental: None, - logging: None, - }, - server_info: Implementation { - name: EXTENSION_NAME.to_string(), - title: Some("Skills".to_string()), - version: "1.0.0".to_string(), - icons: None, - website_url: None, - }, - instructions: Some(String::new()), - }, - skills, - }; - - let instructions = client.generate_instructions(); - assert_eq!(instructions, ""); - assert!(instructions.is_empty()); - - client.info.instructions = Some(instructions); - assert_eq!(client.info.instructions.as_ref().unwrap(), ""); - } - - #[tokio::test] - async fn test_no_tools_when_no_skills() { - let temp_dir = TempDir::new().unwrap(); - let empty_dir = temp_dir.path().join("empty"); - fs::create_dir(&empty_dir).unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]); - assert_eq!(skills.len(), 0); - - let client = SkillsClient { - info: InitializeResult { - protocol_version: ProtocolVersion::V_2025_03_26, - capabilities: ServerCapabilities { - tasks: None, - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - resources: None, - prompts: None, - completions: None, - experimental: None, - logging: None, - }, - server_info: Implementation { - name: EXTENSION_NAME.to_string(), - title: Some("Skills".to_string()), - version: "1.0.0".to_string(), - icons: None, - website_url: None, - }, - instructions: Some(String::new()), - }, - skills, - }; - - let result = client - .list_tools("test-session-id", None, CancellationToken::new()) - .await - .unwrap(); - assert_eq!(result.tools.len(), 0); - } - - #[tokio::test] - async fn test_tools_available_when_skills_exist() { - let temp_dir = TempDir::new().unwrap(); - let skills_dir = temp_dir.path().join("skills"); - fs::create_dir(&skills_dir).unwrap(); - - let skill_dir = skills_dir.join("test-skill"); - fs::create_dir(&skill_dir).unwrap(); - fs::write( - skill_dir.join("SKILL.md"), - r#"--- -name: test-skill -description: A test skill ---- -Content -"#, - ) - .unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]); - assert_eq!(skills.len(), 1); - - let client = SkillsClient { - info: InitializeResult { - protocol_version: ProtocolVersion::V_2025_03_26, - capabilities: ServerCapabilities { - tasks: None, - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - resources: None, - prompts: None, - completions: None, - experimental: None, - logging: None, - }, - server_info: Implementation { - name: EXTENSION_NAME.to_string(), - title: Some("Skills".to_string()), - version: "1.0.0".to_string(), - icons: None, - website_url: None, - }, - instructions: Some(String::new()), - }, - skills, - }; - - let result = client - .list_tools("test-session-id", None, CancellationToken::new()) - .await - .unwrap(); - assert_eq!(result.tools.len(), 1); - assert_eq!(result.tools[0].name, "loadSkill"); - } - - #[test] - fn test_instructions_with_skills() { - let temp_dir = TempDir::new().unwrap(); - let skills_dir = temp_dir.path().join("skills"); - fs::create_dir(&skills_dir).unwrap(); - - let skill1_dir = skills_dir.join("alpha-skill"); - fs::create_dir(&skill1_dir).unwrap(); - fs::write( - skill1_dir.join("SKILL.md"), - r#"--- -name: alpha-skill -description: First skill alphabetically ---- -Content -"#, - ) - .unwrap(); - - let skill2_dir = skills_dir.join("beta-skill"); - fs::create_dir(&skill2_dir).unwrap(); - fs::write( - skill2_dir.join("SKILL.md"), - r#"--- -name: beta-skill -description: Second skill alphabetically ---- -Content -"#, - ) - .unwrap(); - - let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]); - assert_eq!(skills.len(), 2); - - let mut client = SkillsClient { - info: InitializeResult { - protocol_version: ProtocolVersion::V_2025_03_26, - capabilities: ServerCapabilities { - tasks: None, - tools: Some(ToolsCapability { - list_changed: Some(false), - }), - resources: None, - prompts: None, - completions: None, - experimental: None, - logging: None, - }, - server_info: Implementation { - name: EXTENSION_NAME.to_string(), - title: Some("Skills".to_string()), - version: "1.0.0".to_string(), - icons: None, - website_url: None, - }, - instructions: Some(String::new()), - }, - skills, - }; - - let instructions = client.generate_instructions(); - assert!(!instructions.is_empty()); - assert!(instructions.contains("You have these skills at your disposal")); - assert!(instructions.contains("alpha-skill: First skill alphabetically")); - assert!(instructions.contains("beta-skill: Second skill alphabetically")); - - let lines: Vec<&str> = instructions.lines().collect(); - let alpha_line = lines - .iter() - .position(|l| l.contains("alpha-skill")) - .unwrap(); - let beta_line = lines.iter().position(|l| l.contains("beta-skill")).unwrap(); - assert!(alpha_line < beta_line); - - client.info.instructions = Some(instructions); - assert!(!client.info.instructions.as_ref().unwrap().is_empty()); - } - - #[test] - fn test_discover_skills_working_dir_overrides_global() { - let temp_dir = TempDir::new().unwrap(); - - // Simulate ~/.claude/skills (global, lowest priority) - let global_claude = temp_dir.path().join("global-claude"); - fs::create_dir(&global_claude).unwrap(); - let skill_global_claude = global_claude.join("my-skill"); - fs::create_dir(&skill_global_claude).unwrap(); - fs::write( - skill_global_claude.join("SKILL.md"), - r#"--- -name: my-skill -description: From global claude ---- -Global claude content -"#, - ) - .unwrap(); - - // Simulate ~/.config/goose/skills (global, medium priority) - let global_goose = temp_dir.path().join("global-goose"); - fs::create_dir(&global_goose).unwrap(); - let skill_global_goose = global_goose.join("my-skill"); - fs::create_dir(&skill_global_goose).unwrap(); - fs::write( - skill_global_goose.join("SKILL.md"), - r#"--- -name: my-skill -description: From global goose config ---- -Global goose config content -"#, - ) - .unwrap(); - - // Simulate $PWD/.claude/skills (working dir, higher priority) - let working_claude = temp_dir.path().join("working-claude"); - fs::create_dir(&working_claude).unwrap(); - let skill_working_claude = working_claude.join("my-skill"); - fs::create_dir(&skill_working_claude).unwrap(); - fs::write( - skill_working_claude.join("SKILL.md"), - r#"--- -name: my-skill -description: From working dir claude ---- -Working dir claude content -"#, - ) - .unwrap(); - - // Simulate $PWD/.goose/skills (working dir, highest priority) - let working_goose = temp_dir.path().join("working-goose"); - fs::create_dir(&working_goose).unwrap(); - let skill_working_goose = working_goose.join("my-skill"); - fs::create_dir(&skill_working_goose).unwrap(); - fs::write( - skill_working_goose.join("SKILL.md"), - r#"--- -name: my-skill -description: From working dir goose ---- -Working dir goose content -"#, - ) - .unwrap(); - - // Test priority order: global_claude < global_goose < working_claude < working_goose - let skills = SkillsClient::discover_skills_in_directories(&[ - global_claude, - global_goose, - working_claude, - working_goose, - ]); - - assert_eq!(skills.len(), 1); - assert!(skills.contains_key("my-skill")); - // The last directory (working_goose) should win - assert_eq!( - skills.get("my-skill").unwrap().metadata.description, - "From working dir goose" - ); - assert!(skills - .get("my-skill") - .unwrap() - .body - .contains("Working dir goose content")); - } - - #[test] - fn test_builtin_skills_loaded() { - let skills = SkillsClient::load_builtin_skills(); - - assert!(!skills.is_empty()); - assert!(skills.contains_key("goose-doc-guide")); - } -} diff --git a/crates/goose/src/agents/subagent_handler.rs b/crates/goose/src/agents/subagent_handler.rs index 126e224d20a9..fd23d20d0a64 100644 --- a/crates/goose/src/agents/subagent_handler.rs +++ b/crates/goose/src/agents/subagent_handler.rs @@ -20,6 +20,8 @@ use std::sync::Arc; use tokio_util::sync::CancellationToken; use tracing::{debug, info}; +pub type OnMessageCallback = Arc; + #[derive(Serialize)] pub struct SubagentPromptContext { pub max_turns: usize, @@ -52,6 +54,39 @@ pub async fn run_complete_subagent_task( .await } +pub async fn run_subagent_task_with_callback( + config: AgentConfig, + recipe: Recipe, + task_config: TaskConfig, + return_last_only: bool, + session_id: String, + cancellation_token: Option, + on_message: Option, +) -> Result { + let (messages, final_output) = get_agent_messages_with_callback( + config, + recipe, + task_config, + session_id, + cancellation_token, + on_message, + ) + .await + .map_err(|e| { + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to execute task: {}", e), + None, + ) + })?; + + if let Some(output) = final_output { + return Ok(output); + } + + Ok(extract_response_text(&messages, return_last_only)) +} + pub async fn run_complete_subagent_task_with_notifications( config: AgentConfig, recipe: Recipe, @@ -82,7 +117,11 @@ pub async fn run_complete_subagent_task_with_notifications( return Ok(output); } - let response_text = if return_last_only { + Ok(extract_response_text(&messages, return_last_only)) +} + +fn extract_response_text(messages: &Conversation, return_last_only: bool) -> String { + if return_last_only { messages .messages() .last() @@ -133,13 +172,101 @@ pub async fn run_complete_subagent_task_with_notifications( .collect(); all_text_content.join("\n") - }; - - Ok(response_text) + } } pub const SUBAGENT_TOOL_REQUEST_TYPE: &str = "subagent_tool_request"; +fn get_agent_messages_with_callback( + config: AgentConfig, + recipe: Recipe, + task_config: TaskConfig, + session_id: String, + cancellation_token: Option, + on_message: Option, +) -> AgentMessagesFuture { + Box::pin(async move { + let system_instructions = recipe.instructions.clone().unwrap_or_default(); + let user_task = recipe + .prompt + .clone() + .unwrap_or_else(|| "Begin.".to_string()); + + let agent = Arc::new(Agent::with_config(config)); + + agent + .update_provider(task_config.provider.clone(), &session_id) + .await + .map_err(|e| anyhow!("Failed to set provider on sub agent: {}", e))?; + + for extension in &task_config.extensions { + if let Err(e) = agent.add_extension(extension.clone(), &session_id).await { + debug!( + "Failed to add extension '{}' to subagent: {}", + extension.name(), + e + ); + } + } + + let has_response_schema = recipe.response.is_some(); + agent + .apply_recipe_components(recipe.response.clone(), true) + .await; + + let subagent_prompt = + build_subagent_prompt(&agent, &task_config, &session_id, system_instructions).await?; + agent.override_system_prompt(subagent_prompt).await; + + let user_message = Message::user().with_text(user_task); + let mut conversation = Conversation::new_unvalidated(vec![user_message.clone()]); + + if let Some(activities) = recipe.activities { + for activity in activities { + info!("Recipe activity: {}", activity); + } + } + let session_config = SessionConfig { + id: session_id.clone(), + schedule_id: None, + max_turns: task_config.max_turns.map(|v| v as u32), + retry_config: recipe.retry, + }; + + let mut stream = + crate::session_context::with_session_id(Some(session_id.to_string()), async { + agent + .reply(user_message, session_config, cancellation_token) + .await + }) + .await + .map_err(|e| anyhow!("Failed to get reply from agent: {}", e))?; + + while let Some(message_result) = stream.next().await { + match message_result { + Ok(AgentEvent::Message(msg)) => { + if let Some(ref callback) = on_message { + callback(&msg); + } + conversation.push(msg); + } + Ok(AgentEvent::McpNotification(_)) | Ok(AgentEvent::ModelChange { .. }) => {} + Ok(AgentEvent::HistoryReplaced(updated_conversation)) => { + conversation = updated_conversation; + } + Err(e) => { + tracing::error!("Error receiving message from subagent: {}", e); + break; + } + } + } + + let final_output = get_final_output(&agent, has_response_schema).await; + + Ok((conversation, final_output)) + }) +} + fn get_agent_messages_with_notifications( config: AgentConfig, recipe: Recipe, @@ -174,7 +301,7 @@ fn get_agent_messages_with_notifications( let has_response_schema = recipe.response.is_some(); agent - .apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true) + .apply_recipe_components(recipe.response.clone(), true) .await; let subagent_prompt = diff --git a/crates/goose/src/agents/subagent_tool.rs b/crates/goose/src/agents/subagent_tool.rs deleted file mode 100644 index 6c19fe53bc1f..000000000000 --- a/crates/goose/src/agents/subagent_tool.rs +++ /dev/null @@ -1,530 +0,0 @@ -use std::borrow::Cow; -use std::collections::HashMap; -use std::path::PathBuf; - -use anyhow::{anyhow, Result}; -use futures::FutureExt; -use rmcp::model::{Content, ErrorCode, ErrorData, ServerNotification, Tool}; -use serde::Deserialize; -use serde_json::{json, Value}; -use tokio::sync::mpsc; -use tokio_stream::wrappers::UnboundedReceiverStream; -use tokio_util::sync::CancellationToken; - -use crate::agents::subagent_handler::run_complete_subagent_task_with_notifications; -use crate::agents::subagent_task_config::TaskConfig; -use crate::agents::tool_execution::ToolCallResult; -use crate::agents::AgentConfig; -use crate::providers; -use crate::recipe::build_recipe::build_recipe_from_template; -use crate::recipe::local_recipes::load_local_recipe_file; -use crate::recipe::{Recipe, SubRecipe}; - -pub const SUBAGENT_TOOL_NAME: &str = "subagent"; - -const SUMMARY_INSTRUCTIONS: &str = r#" -Important: Your parent agent will only receive your final message as a summary of your work. -Make sure your last message provides a comprehensive summary of: -- What you were asked to do -- What actions you took -- The results or outcomes -- Any important findings or recommendations - -Be concise but complete. -"#; - -#[derive(Debug, Deserialize, Clone)] -pub struct SubagentParams { - pub instructions: Option, - pub subrecipe: Option, - pub parameters: Option>, - pub extensions: Option>, - pub settings: Option, - #[serde(default = "default_summary")] - pub summary: bool, -} - -fn default_summary() -> bool { - true -} - -#[derive(Debug, Deserialize, Clone)] -pub struct SubagentSettings { - pub provider: Option, - pub model: Option, - pub temperature: Option, - pub max_turns: Option, -} - -pub fn create_subagent_tool(sub_recipes: &[SubRecipe]) -> Tool { - let description = build_tool_description(sub_recipes); - - let schema = json!({ - "type": "object", - "properties": { - "instructions": { - "type": "string", - "description": "Instructions for the subagent. Required for ad-hoc tasks. For predefined tasks, adds additional context." - }, - "subrecipe": { - "type": "string", - "description": "Name of a predefined subrecipe to run." - }, - "parameters": { - "type": "object", - "additionalProperties": true, - "description": "Parameters for the subrecipe. Only valid when 'subrecipe' is specified." - }, - "extensions": { - "type": "array", - "items": {"type": "string"}, - "description": "Extensions to enable. Omit to inherit all, empty array for none." - }, - "settings": { - "type": "object", - "properties": { - "provider": {"type": "string", "description": "Override LLM provider"}, - "model": {"type": "string", "description": "Override model"}, - "temperature": {"type": "number", "description": "Override temperature"}, - "max_turns": {"type": "number", "description": "Override max turns"} - }, - "description": "Override model/provider/settings." - }, - "summary": { - "type": "boolean", - "default": true, - "description": "If true (default), return only the subagent's final summary." - } - } - }); - - Tool::new( - SUBAGENT_TOOL_NAME, - description, - schema.as_object().unwrap().clone(), - ) -} - -fn build_tool_description(sub_recipes: &[SubRecipe]) -> String { - let mut desc = String::from( - "Delegate a task to a subagent that runs independently with its own context.\n\n\ - Modes:\n\ - 1. Ad-hoc: Provide `instructions` for a custom task\n\ - 2. Predefined: Provide `subrecipe` name to run a predefined task\n\ - 3. Augmented: Provide both `subrecipe` and `instructions` to add context\n\n\ - The subagent has access to the same tools as you by default. \ - Use `extensions` to limit which extensions the subagent can use.\n\n\ - For parallel execution, make multiple `subagent` tool calls in the same message.", - ); - - if !sub_recipes.is_empty() { - desc.push_str("\n\nAvailable subrecipes:"); - for sr in sub_recipes { - let params_info = get_subrecipe_params_description(sr); - let sequential_hint = if sr.sequential_when_repeated { - " [run sequentially, not in parallel]" - } else { - "" - }; - desc.push_str(&format!( - "\nโ€ข {}{} - {}{}", - sr.name, - sequential_hint, - sr.description.as_deref().unwrap_or("No description"), - if params_info.is_empty() { - String::new() - } else { - format!(" (params: {})", params_info) - } - )); - } - } - - desc -} - -fn get_subrecipe_params_description(sub_recipe: &SubRecipe) -> String { - match load_local_recipe_file(&sub_recipe.path) { - Ok(recipe_file) => match Recipe::from_content(&recipe_file.content) { - Ok(recipe) => { - if let Some(params) = recipe.parameters { - params - .iter() - .filter(|p| { - sub_recipe - .values - .as_ref() - .map(|v| !v.contains_key(&p.key)) - .unwrap_or(true) - }) - .map(|p| { - let req = match p.requirement { - crate::recipe::RecipeParameterRequirement::Required => "[required]", - _ => "[optional]", - }; - format!("{} {}", p.key, req) - }) - .collect::>() - .join(", ") - } else { - String::new() - } - } - Err(_) => String::new(), - }, - Err(_) => String::new(), - } -} - -/// Note: SubRecipe.sequential_when_repeated is surfaced as a hint in the tool description -/// (e.g., "[run sequentially, not in parallel]") but not enforced. The LLM controls -/// sequencing by making sequential vs parallel tool calls. -pub fn handle_subagent_tool( - config: &AgentConfig, - params: Value, - task_config: TaskConfig, - sub_recipes: HashMap, - working_dir: PathBuf, - cancellation_token: Option, -) -> ToolCallResult { - let parsed_params: SubagentParams = match serde_json::from_value(params) { - Ok(p) => p, - Err(e) => { - return ToolCallResult::from(Err(ErrorData { - code: ErrorCode::INVALID_PARAMS, - message: Cow::from(format!("Invalid parameters: {}", e)), - data: None, - })); - } - }; - - if parsed_params.instructions.is_none() && parsed_params.subrecipe.is_none() { - return ToolCallResult::from(Err(ErrorData { - code: ErrorCode::INVALID_PARAMS, - message: Cow::from("Must provide 'instructions' or 'subrecipe' (or both)"), - data: None, - })); - } - - if parsed_params.parameters.is_some() && parsed_params.subrecipe.is_none() { - return ToolCallResult::from(Err(ErrorData { - code: ErrorCode::INVALID_PARAMS, - message: Cow::from("'parameters' can only be used with 'subrecipe'"), - data: None, - })); - } - - let recipe = match build_recipe(&parsed_params, &sub_recipes) { - Ok(r) => r, - Err(e) => { - return ToolCallResult::from(Err(ErrorData { - code: ErrorCode::INVALID_PARAMS, - message: Cow::from(e.to_string()), - data: None, - })); - } - }; - - let config = config.clone(); - let (notification_tx, notification_rx) = mpsc::unbounded_channel(); - - ToolCallResult { - notification_stream: Some(Box::new(UnboundedReceiverStream::new(notification_rx))), - result: Box::new( - execute_subagent_with_notifications( - config, - recipe, - task_config, - parsed_params, - working_dir, - cancellation_token, - notification_tx, - ) - .boxed(), - ), - } -} - -async fn execute_subagent_with_notifications( - config: AgentConfig, - recipe: Recipe, - task_config: TaskConfig, - params: SubagentParams, - working_dir: PathBuf, - cancellation_token: Option, - notification_tx: mpsc::UnboundedSender, -) -> Result { - let session = config - .session_manager - .create_session( - working_dir, - "Subagent task".to_string(), - crate::session::session_manager::SessionType::SubAgent, - ) - .await - .map_err(|e| ErrorData { - code: ErrorCode::INTERNAL_ERROR, - message: Cow::from(format!("Failed to create session: {}", e)), - data: None, - })?; - - let task_config = apply_settings_overrides(task_config, ¶ms) - .await - .map_err(|e| ErrorData { - code: ErrorCode::INVALID_PARAMS, - message: Cow::from(e.to_string()), - data: None, - })?; - - let result = run_complete_subagent_task_with_notifications( - config, - recipe, - task_config, - params.summary, - session.id, - cancellation_token, - Some(notification_tx), - ) - .await; - - match result { - Ok(text) => Ok(rmcp::model::CallToolResult { - content: vec![Content::text(text)], - structured_content: None, - is_error: Some(false), - meta: None, - }), - Err(e) => Err(ErrorData { - code: ErrorCode::INTERNAL_ERROR, - message: Cow::from(e.to_string()), - data: None, - }), - } -} - -fn build_recipe( - params: &SubagentParams, - sub_recipes: &HashMap, -) -> Result { - let mut recipe = if let Some(subrecipe_name) = ¶ms.subrecipe { - build_subrecipe(subrecipe_name, params, sub_recipes)? - } else { - build_adhoc_recipe(params)? - }; - - if params.summary { - let current = recipe.instructions.unwrap_or_default(); - recipe.instructions = Some(format!("{}\n{}", current, SUMMARY_INSTRUCTIONS)); - } - - Ok(recipe) -} - -fn build_subrecipe( - subrecipe_name: &str, - params: &SubagentParams, - sub_recipes: &HashMap, -) -> Result { - let sub_recipe = sub_recipes.get(subrecipe_name).ok_or_else(|| { - let available: Vec<_> = sub_recipes.keys().cloned().collect(); - anyhow!( - "Unknown subrecipe '{}'. Available: {}", - subrecipe_name, - available.join(", ") - ) - })?; - - let recipe_file = load_local_recipe_file(&sub_recipe.path) - .map_err(|e| anyhow!("Failed to load subrecipe '{}': {}", subrecipe_name, e))?; - - let mut param_values: Vec<(String, String)> = Vec::new(); - - if let Some(values) = &sub_recipe.values { - for (k, v) in values { - param_values.push((k.clone(), v.clone())); - } - } - - if let Some(provided_params) = ¶ms.parameters { - for (k, v) in provided_params { - let value_str = match v { - Value::String(s) => s.clone(), - other => other.to_string(), - }; - param_values.push((k.clone(), value_str)); - } - } - - let mut recipe = build_recipe_from_template( - recipe_file.content, - &recipe_file.parent_dir, - param_values, - None:: Result>, - ) - .map_err(|e| anyhow!("Failed to build subrecipe: {}", e))?; - - if let Some(extra) = ¶ms.instructions { - let mut current = recipe.instructions.take().unwrap_or_default(); - if !current.is_empty() { - current.push_str("\n\n"); - } - current.push_str(extra); - recipe.instructions = Some(current); - } - - Ok(recipe) -} - -fn build_adhoc_recipe(params: &SubagentParams) -> Result { - let instructions = params - .instructions - .as_ref() - .ok_or_else(|| anyhow!("Instructions required for ad-hoc task"))?; - - let recipe = Recipe::builder() - .version("1.0.0") - .title("Subagent Task") - .description("Ad-hoc subagent task") - .instructions(instructions) - .build() - .map_err(|e| anyhow!("Failed to build recipe: {}", e))?; - - if recipe.check_for_security_warnings() { - return Err(anyhow!("Recipe contains potentially harmful content")); - } - - Ok(recipe) -} - -async fn apply_settings_overrides( - mut task_config: TaskConfig, - params: &SubagentParams, -) -> Result { - if let Some(settings) = ¶ms.settings { - if let Some(max_turns) = settings.max_turns { - task_config.max_turns = Some(max_turns); - } - - if settings.provider.is_some() || settings.model.is_some() || settings.temperature.is_some() - { - let provider_name = settings - .provider - .clone() - .unwrap_or_else(|| task_config.provider.get_name().to_string()); - - let mut model_config = task_config.provider.get_model_config(); - - if let Some(model) = &settings.model { - model_config.model_name = model.clone(); - } - - if let Some(temp) = settings.temperature { - model_config = model_config.with_temperature(Some(temp)); - } - - task_config.provider = providers::create(&provider_name, model_config) - .await - .map_err(|e| anyhow!("Failed to create provider '{}': {}", provider_name, e))?; - } - } - - if let Some(extension_names) = ¶ms.extensions { - if extension_names.is_empty() { - task_config.extensions = Vec::new(); - } else { - task_config - .extensions - .retain(|ext| extension_names.contains(&ext.name())); - } - } - - Ok(task_config) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_tool_name() { - assert_eq!(SUBAGENT_TOOL_NAME, "subagent"); - } - - #[test] - fn test_create_tool_without_subrecipes() { - let tool = create_subagent_tool(&[]); - assert_eq!(tool.name, "subagent"); - assert!(tool.description.as_ref().unwrap().contains("Ad-hoc")); - assert!(!tool - .description - .as_ref() - .unwrap() - .contains("Available subrecipes")); - } - - #[test] - fn test_create_tool_with_subrecipes() { - let sub_recipes = vec![SubRecipe { - name: "test_recipe".to_string(), - path: "test.yaml".to_string(), - values: None, - sequential_when_repeated: false, - description: Some("A test recipe".to_string()), - }]; - - let tool = create_subagent_tool(&sub_recipes); - assert!(tool - .description - .as_ref() - .unwrap() - .contains("Available subrecipes")); - assert!(tool.description.as_ref().unwrap().contains("test_recipe")); - } - - #[test] - fn test_sequential_hint_in_description() { - let sub_recipes = vec![ - SubRecipe { - name: "parallel_ok".to_string(), - path: "test.yaml".to_string(), - values: None, - sequential_when_repeated: false, - description: Some("Can run in parallel".to_string()), - }, - SubRecipe { - name: "sequential_only".to_string(), - path: "test.yaml".to_string(), - values: None, - sequential_when_repeated: true, - description: Some("Must run sequentially".to_string()), - }, - ]; - - let tool = create_subagent_tool(&sub_recipes); - let desc = tool.description.as_ref().unwrap(); - - assert!(desc.contains("parallel_ok")); - assert!(!desc.contains("parallel_ok [run sequentially")); - - assert!(desc.contains("sequential_only [run sequentially, not in parallel]")); - } - - #[test] - fn test_params_deserialization_full() { - let params: SubagentParams = serde_json::from_value(json!({ - "instructions": "Extra context", - "subrecipe": "my_recipe", - "parameters": {"key": "value"}, - "extensions": ["developer"], - "settings": {"model": "gpt-4"}, - "summary": false - })) - .unwrap(); - - assert_eq!(params.instructions, Some("Extra context".to_string())); - assert_eq!(params.subrecipe, Some("my_recipe".to_string())); - assert!(params.parameters.is_some()); - assert_eq!(params.extensions, Some(vec!["developer".to_string()])); - assert!(!params.summary); - } -} diff --git a/crates/goose/src/agents/summon_extension.rs b/crates/goose/src/agents/summon_extension.rs new file mode 100644 index 000000000000..238090518aa2 --- /dev/null +++ b/crates/goose/src/agents/summon_extension.rs @@ -0,0 +1,1980 @@ +//! Summon Extension - Unified tooling for recipes, skills, and subagents +//! +//! Provides two tools: +//! - `load`: Inject knowledge into current context or discover available sources +//! - `delegate`: Run tasks in isolated subagents (sync or async) + +use crate::agents::builtin_skills; +use crate::agents::extension::PlatformExtensionContext; +use crate::agents::mcp_client::{Error, McpClientTrait}; +use crate::agents::subagent_handler::{ + run_complete_subagent_task, run_subagent_task_with_callback, OnMessageCallback, +}; +use crate::agents::subagent_task_config::{TaskConfig, DEFAULT_SUBAGENT_MAX_TURNS}; +use crate::agents::AgentConfig; +use crate::config::paths::Paths; +use crate::providers; +use crate::recipe::build_recipe::build_recipe_from_template; +use crate::recipe::local_recipes::load_local_recipe_file; +use crate::recipe::{Recipe, Settings, RECIPE_FILE_EXTENSIONS}; +use crate::session::extension_data::{EnabledExtensionsState, ExtensionState}; +use crate::session::SessionType; +use anyhow::Result; +use async_trait::async_trait; +use rmcp::model::{ + CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult, + ProtocolVersion, ServerCapabilities, Tool, ToolsCapability, +}; +use serde::Deserialize; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use tokio::sync::Mutex; + +use tokio::task::JoinHandle; +use tokio_util::sync::CancellationToken; +use tracing::{info, warn}; + +pub static EXTENSION_NAME: &str = "summon"; + +#[derive(Debug, Clone)] +pub struct Source { + pub name: String, + pub kind: SourceKind, + pub description: String, + pub path: PathBuf, + pub content: String, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum SourceKind { + Subrecipe, + Recipe, + Skill, + Agent, + BuiltinSkill, +} + +impl std::fmt::Display for SourceKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SourceKind::Subrecipe => write!(f, "subrecipe"), + SourceKind::Recipe => write!(f, "recipe"), + SourceKind::Skill => write!(f, "skill"), + SourceKind::Agent => write!(f, "agent"), + SourceKind::BuiltinSkill => write!(f, "builtin skill"), + } + } +} + +impl Source { + /// Format the source content for loading into context + pub fn to_load_text(&self) -> String { + format!( + "## {} ({})\n\n{}\n\n### Content\n\n{}", + self.name, self.kind, self.description, self.content + ) + } +} + +fn kind_plural(kind: SourceKind) -> &'static str { + match kind { + SourceKind::Subrecipe => "Subrecipes", + SourceKind::Recipe => "Recipes", + SourceKind::Skill => "Skills", + SourceKind::Agent => "Agents", + SourceKind::BuiltinSkill => "Builtin Skills", + } +} + +fn truncate(s: &str, max_len: usize) -> String { + if s.chars().count() <= max_len { + s.to_string() + } else if max_len <= 3 { + "...".to_string() + } else { + let truncated: String = s.chars().take(max_len - 3).collect(); + format!("{}...", truncated) + } +} + +#[derive(Debug, Default, Deserialize)] +pub struct DelegateParams { + pub instructions: Option, + pub source: Option, + pub parameters: Option>, + pub extensions: Option>, + pub provider: Option, + pub model: Option, + pub temperature: Option, + #[serde(default)] + pub r#async: bool, +} + +pub struct BackgroundTask { + pub id: String, + pub description: String, + pub started_at: Instant, + pub turns: Arc, + pub last_activity: Arc, + pub handle: JoinHandle>, + pub cancellation_token: CancellationToken, +} + +pub struct CompletedTask { + pub id: String, + pub description: String, + pub result: Result, + pub turns_taken: u32, + pub duration: Duration, +} + +#[derive(Debug, Deserialize)] +struct SkillMetadata { + name: String, + description: String, +} + +#[derive(Debug, Deserialize)] +struct AgentMetadata { + name: String, + #[serde(default)] + description: Option, + #[serde(default)] + model: Option, +} + +fn parse_frontmatter Deserialize<'de>>(content: &str) -> Option<(T, String)> { + let parts: Vec<&str> = content.split("---").collect(); + if parts.len() < 3 { + return None; + } + + let yaml_content = parts[1].trim(); + let metadata: T = match serde_yaml::from_str(yaml_content) { + Ok(m) => m, + Err(e) => { + warn!("Failed to parse frontmatter: {}", e); + return None; + } + }; + + let body = parts[2..].join("---").trim().to_string(); + Some((metadata, body)) +} + +fn parse_skill_content(content: &str, path: PathBuf) -> Option { + let (metadata, body): (SkillMetadata, String) = parse_frontmatter(content)?; + + Some(Source { + name: metadata.name, + kind: SourceKind::Skill, + description: metadata.description, + path, + content: body, + }) +} + +fn parse_agent_content(content: &str, path: PathBuf) -> Option { + let (metadata, body): (AgentMetadata, String) = parse_frontmatter(content)?; + + let description = metadata.description.unwrap_or_else(|| { + let model_info = metadata + .model + .as_ref() + .map(|m| format!(" ({})", m)) + .unwrap_or_default(); + format!("Agent{}", model_info) + }); + + Some(Source { + name: metadata.name, + kind: SourceKind::Agent, + description, + path, + content: body, + }) +} + +fn round_duration(d: Duration) -> String { + let secs = d.as_secs(); + if secs < 60 { + format!("{}s", (secs / 10) * 10) + } else { + format!("{}m", secs / 60) + } +} + +fn current_epoch_millis() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64 +} + +/// Get maximum number of concurrent background tasks +fn max_background_tasks() -> usize { + std::env::var("GOOSE_MAX_BACKGROUND_TASKS") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(5) +} + +fn is_session_id(s: &str) -> bool { + let parts: Vec<&str> = s.split('_').collect(); + parts.len() == 2 && parts[0].len() == 8 && parts[0].chars().all(|c| c.is_ascii_digit()) +} + +pub struct SummonClient { + info: InitializeResult, + context: PlatformExtensionContext, + source_cache: Mutex)>>, + background_tasks: Mutex>, + completed_tasks: Mutex>, +} + +impl Drop for SummonClient { + fn drop(&mut self) { + // Best-effort cancellation of running tasks on shutdown + if let Ok(tasks) = self.background_tasks.try_lock() { + for task in tasks.values() { + task.cancellation_token.cancel(); + } + } + } +} + +impl SummonClient { + pub fn new(context: PlatformExtensionContext) -> Result { + let info = InitializeResult { + protocol_version: ProtocolVersion::V_2025_03_26, + capabilities: ServerCapabilities { + tasks: None, + tools: Some(ToolsCapability { + list_changed: Some(false), + }), + resources: None, + prompts: None, + completions: None, + experimental: None, + logging: None, + }, + server_info: Implementation { + name: EXTENSION_NAME.to_string(), + title: Some("Summon".to_string()), + version: "1.0.0".to_string(), + icons: None, + website_url: None, + }, + instructions: Some( + "Load knowledge and delegate tasks to subagents using the summon extension." + .to_string(), + ), + }; + + Ok(Self { + info, + context, + source_cache: Mutex::new(None), + background_tasks: Mutex::new(HashMap::new()), + completed_tasks: Mutex::new(HashMap::new()), + }) + } + + fn create_load_tool(&self) -> Tool { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "source": { + "type": "string", + "description": "Name of the source to load. If omitted, lists all available sources." + }, + "cancel": { + "type": "boolean", + "default": false, + "description": "For running background tasks: cancel and return output." + } + } + }); + + Tool::new( + "load", + "Load knowledge into your current context or discover available sources.\n\n\ + Call with no arguments to list all available sources (subrecipes, recipes, skills, agents).\n\ + Call with a source name to load its content into your context.\n\ + For background tasks: load(source: \"task_id\", cancel: true) stops and returns output.\n\n\ + Examples:\n\ + - load() โ†’ Lists available sources\n\ + - load(source: \"rust-patterns\") โ†’ Loads the rust-patterns skill" + .to_string(), + schema.as_object().unwrap().clone(), + ) + } + + fn create_delegate_tool(&self) -> Tool { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "instructions": { + "type": "string", + "description": "Task instructions. Required for ad-hoc tasks." + }, + "source": { + "type": "string", + "description": "Name of a recipe, skill, or agent to run." + }, + "parameters": { + "type": "object", + "additionalProperties": true, + "description": "Parameters for the source (only valid with source)." + }, + "extensions": { + "type": "array", + "items": {"type": "string"}, + "description": "Extensions to enable. Omit to inherit all, empty array for none." + }, + "provider": { + "type": "string", + "description": "Override LLM provider." + }, + "model": { + "type": "string", + "description": "Override model." + }, + "temperature": { + "type": "number", + "description": "Override temperature." + }, + "async": { + "type": "boolean", + "default": false, + "description": "Run in background (default: false)." + } + } + }); + + Tool::new( + "delegate", + "Delegate a task to a subagent that runs independently with its own context.\n\n\ + Modes:\n\ + 1. Ad-hoc: Provide `instructions` for a custom task\n\ + 2. Source-based: Provide `source` name to run a subrecipe, recipe, skill, or agent\n\ + 3. Combined: Pair a source with a task (e.g., source: \"rust-patterns\", instructions: \"review auth.rs\")\n\n\ + Effective Delegation:\n\ + - Delegates know only instructions + source content\n\ + - Delegates cannot coordinate. Same-file work = conflicts.\n\ + - Parallel: async: true, sleep to poll. Single: sync.\n\n\ + Research (read-only): parallelize freely - delegates explore and report back.\n\ + Work (writes): partition files strictly - no two delegates touch the same file.\n\n\ + Decompose โ†’ async delegates โ†’ sleep โ†’ synthesize." + .to_string(), + schema.as_object().unwrap().clone(), + ) + } + + async fn get_working_dir(&self, session_id: &str) -> PathBuf { + self.context + .session_manager + .get_session(session_id, false) + .await + .ok() + .map(|s| s.working_dir) + .unwrap_or_else(|| std::env::current_dir().unwrap_or_default()) + } + + async fn get_sources(&self, session_id: &str, working_dir: &Path) -> Vec { + let fs_sources = self.get_filesystem_sources(working_dir).await; + + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + let mut sources: Vec = Vec::new(); + + self.add_subrecipes(session_id, &mut sources, &mut seen) + .await; + + for source in fs_sources { + if !seen.contains(&source.name) { + seen.insert(source.name.clone()); + sources.push(source); + } + } + + sources.sort_by(|a, b| (&a.kind, &a.name).cmp(&(&b.kind, &b.name))); + sources + } + + async fn get_filesystem_sources(&self, working_dir: &Path) -> Vec { + let mut cache = self.source_cache.lock().await; + if let Some((cached_at, cached_dir, sources)) = cache.as_ref() { + if cached_dir == working_dir && cached_at.elapsed() < Duration::from_secs(60) { + return sources.clone(); + } + } + let sources = self.discover_filesystem_sources(working_dir); + *cache = Some((Instant::now(), working_dir.to_path_buf(), sources.clone())); + sources + } + + async fn resolve_source( + &self, + session_id: &str, + name: &str, + working_dir: &Path, + ) -> Option { + let sources = self.get_sources(session_id, working_dir).await; + let mut source = sources.into_iter().find(|s| s.name == name)?; + + if source.kind == SourceKind::Subrecipe && source.content.is_empty() { + source.content = self.load_subrecipe_content(session_id, &source.name).await; + } + + Some(source) + } + + async fn load_subrecipe_content(&self, session_id: &str, name: &str) -> String { + let session = match self + .context + .session_manager + .get_session(session_id, false) + .await + { + Ok(s) => s, + Err(_) => return String::new(), + }; + + let sub_recipes = match session.recipe.as_ref().and_then(|r| r.sub_recipes.as_ref()) { + Some(sr) => sr, + None => return String::new(), + }; + + let sr = match sub_recipes.iter().find(|sr| sr.name == name) { + Some(sr) => sr, + None => return String::new(), + }; + + match load_local_recipe_file(&sr.path) { + Ok(recipe_file) => match Recipe::from_content(&recipe_file.content) { + Ok(recipe) => recipe.instructions.unwrap_or_default(), + Err(_) => recipe_file.content, + }, + Err(_) => String::new(), + } + } + + fn discover_filesystem_sources(&self, working_dir: &Path) -> Vec { + let mut sources: Vec = Vec::new(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + + let home = dirs::home_dir(); + let config = Paths::config_dir(); + + let local_recipe_dirs: Vec = vec![ + working_dir.to_path_buf(), + working_dir.join(".goose/recipes"), + ]; + + let global_recipe_dirs: Vec = std::env::var("GOOSE_RECIPE_PATH") + .ok() + .into_iter() + .flat_map(|p| { + let sep = if cfg!(windows) { ';' } else { ':' }; + p.split(sep).map(PathBuf::from).collect::>() + }) + .chain([config.join("recipes")]) + .collect(); + + let local_skill_dirs: Vec = vec![ + working_dir.join(".goose/skills"), + working_dir.join(".claude/skills"), + working_dir.join(".agents/skills"), + ]; + + let global_skill_dirs: Vec = [ + Some(config.join("skills")), + home.as_ref().map(|h| h.join(".claude/skills")), + home.as_ref().map(|h| h.join(".config/agents/skills")), + ] + .into_iter() + .flatten() + .collect(); + + let local_agent_dirs: Vec = vec![ + working_dir.join(".goose/agents"), + working_dir.join(".claude/agents"), + ]; + + let global_agent_dirs: Vec = [ + Some(config.join("agents")), + home.as_ref().map(|h| h.join(".claude/agents")), + ] + .into_iter() + .flatten() + .collect(); + + for dir in local_recipe_dirs { + self.scan_recipes_dir(&dir, SourceKind::Recipe, &mut sources, &mut seen); + } + + for dir in local_skill_dirs { + self.scan_skills_dir(&dir, &mut sources, &mut seen); + } + + for dir in local_agent_dirs { + self.scan_agents_dir(&dir, &mut sources, &mut seen); + } + + for dir in global_recipe_dirs { + self.scan_recipes_dir(&dir, SourceKind::Recipe, &mut sources, &mut seen); + } + + for dir in global_skill_dirs { + self.scan_skills_dir(&dir, &mut sources, &mut seen); + } + + for dir in global_agent_dirs { + self.scan_agents_dir(&dir, &mut sources, &mut seen); + } + + for content in builtin_skills::get_all() { + if let Some(source) = parse_skill_content(content, PathBuf::new()) { + if !seen.contains(&source.name) { + seen.insert(source.name.clone()); + sources.push(Source { + kind: SourceKind::BuiltinSkill, + ..source + }); + } + } + } + + sources + } + + async fn add_subrecipes( + &self, + session_id: &str, + sources: &mut Vec, + seen: &mut std::collections::HashSet, + ) { + let session = match self + .context + .session_manager + .get_session(session_id, false) + .await + { + Ok(s) => s, + Err(_) => return, + }; + + let sub_recipes = match session.recipe.as_ref().and_then(|r| r.sub_recipes.as_ref()) { + Some(sr) => sr, + None => return, + }; + + for sr in sub_recipes { + if seen.contains(&sr.name) { + continue; + } + seen.insert(sr.name.clone()); + + let description = self.build_subrecipe_description(sr).await; + + sources.push(Source { + name: sr.name.clone(), + kind: SourceKind::Subrecipe, + description, + path: PathBuf::from(&sr.path), + content: String::new(), + }); + } + } + + async fn build_subrecipe_description(&self, sr: &crate::recipe::SubRecipe) -> String { + if let Some(desc) = &sr.description { + return desc.clone(); + } + + if let Ok(recipe_file) = load_local_recipe_file(&sr.path) { + if let Ok(recipe) = Recipe::from_content(&recipe_file.content) { + let mut desc = recipe.description.clone(); + + if let Some(params) = &recipe.parameters { + let param_names: Vec<&str> = params.iter().map(|p| p.key.as_str()).collect(); + if !param_names.is_empty() { + let params_str = param_names.join(", "); + desc = format!("{} (params: {})", desc, params_str); + } + } + + return desc; + } + } + + format!("Subrecipe from {}", sr.path) + } + + fn scan_recipes_dir( + &self, + dir: &Path, + kind: SourceKind, + sources: &mut Vec, + seen: &mut std::collections::HashSet, + ) { + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + if !RECIPE_FILE_EXTENSIONS.contains(&ext) { + continue; + } + + let name = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("") + .to_string(); + + if name.is_empty() || seen.contains(&name) { + continue; + } + + match Recipe::from_file_path(&path) { + Ok(recipe) => { + seen.insert(name.clone()); + sources.push(Source { + name, + kind, + description: recipe.description.clone(), + path: path.clone(), + content: recipe.instructions.clone().unwrap_or_default(), + }); + } + Err(e) => { + warn!("Failed to parse recipe {}: {}", path.display(), e); + } + } + } + } + + fn scan_skills_dir( + &self, + dir: &Path, + sources: &mut Vec, + seen: &mut std::collections::HashSet, + ) { + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + + for entry in entries.flatten() { + let skill_dir = entry.path(); + if !skill_dir.is_dir() { + continue; + } + + let skill_file = skill_dir.join("SKILL.md"); + if !skill_file.exists() { + continue; + } + + let content = match std::fs::read_to_string(&skill_file) { + Ok(c) => c, + Err(e) => { + warn!("Failed to read skill file {}: {}", skill_file.display(), e); + continue; + } + }; + + if let Some(source) = parse_skill_content(&content, skill_file) { + if !seen.contains(&source.name) { + seen.insert(source.name.clone()); + sources.push(source); + } + } + } + } + + fn scan_agents_dir( + &self, + dir: &Path, + sources: &mut Vec, + seen: &mut std::collections::HashSet, + ) { + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + if ext != "md" { + continue; + } + + let content = match std::fs::read_to_string(&path) { + Ok(c) => c, + Err(e) => { + warn!("Failed to read agent file {}: {}", path.display(), e); + continue; + } + }; + + if let Some(source) = parse_agent_content(&content, path) { + if !seen.contains(&source.name) { + seen.insert(source.name.clone()); + sources.push(source); + } + } + } + } + + async fn handle_load( + &self, + session_id: &str, + arguments: Option, + ) -> Result, String> { + self.cleanup_completed_tasks().await; + + let source_name = arguments + .as_ref() + .and_then(|args| args.get("source")) + .and_then(|v| v.as_str()); + + let cancel = arguments + .as_ref() + .and_then(|args| args.get("cancel")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + let working_dir = self.get_working_dir(session_id).await; + + if source_name.is_none() { + return self.handle_load_discovery(session_id, &working_dir).await; + } + + let name = source_name.unwrap(); + + if is_session_id(name) { + return self.handle_load_task_result(name, cancel).await; + } + + self.handle_load_source(session_id, name, &working_dir) + .await + } + + async fn handle_load_task_result( + &self, + task_id: &str, + cancel: bool, + ) -> Result, String> { + let mut completed = self.completed_tasks.lock().await; + + if let Some(task) = completed.remove(task_id) { + let status = if task.result.is_ok() { + "โœ“ Completed" + } else { + "โœ— Failed" + }; + let output = match task.result { + Ok(output) => output, + Err(error) => format!("Error: {}", error), + }; + + return Ok(vec![Content::text(format!( + "# Background Task Result: {}\n\n\ + **Task:** {}\n\ + **Status:** {}\n\ + **Duration:** {} ({} turns)\n\n\ + ## Output\n\n{}", + task_id, + task.description, + status, + round_duration(task.duration), + task.turns_taken, + output + ))]); + } + + drop(completed); + + let mut running = self.background_tasks.lock().await; + if running.contains_key(task_id) { + if !cancel { + return Err(format!( + "Task '{}' is still running. Use load(source: \"{}\", cancel: true) to stop.", + task_id, task_id + )); + } + + let task = running.remove(task_id).unwrap(); + drop(running); + + task.cancellation_token.cancel(); + + let duration = task.started_at.elapsed(); + let turns_taken = task.turns.load(Ordering::Relaxed); + + let mut handle = task.handle; + let output = tokio::select! { + result = &mut handle => { + match result { + Ok(Ok(s)) => s, + Ok(Err(e)) => format!("Error: {}", e), + Err(e) => format!("Task panicked: {}", e), + } + } + _ = tokio::time::sleep(Duration::from_secs(5)) => { + handle.abort(); + "Task did not stop in time (aborted)".to_string() + } + }; + + return Ok(vec![Content::text(format!( + "# Background Task Result: {}\n\n\ + **Task:** {}\n\ + **Status:** โŠ˜ Cancelled\n\ + **Duration:** {} ({} turns)\n\n\ + ## Output\n\n{}", + task_id, + task.description, + round_duration(duration), + turns_taken, + output + ))]); + } + + Err(format!("Task '{}' not found.", task_id)) + } + + async fn handle_load_discovery( + &self, + session_id: &str, + working_dir: &Path, + ) -> Result, String> { + { + let mut cache = self.source_cache.lock().await; + *cache = None; + } + + let sources = self.get_sources(session_id, working_dir).await; + let completed = self.completed_tasks.lock().await; + + if sources.is_empty() && completed.is_empty() { + return Ok(vec![Content::text( + "No sources available for load/delegate.\n\n\ + Sources are discovered from:\n\ + โ€ข Current recipe's sub_recipes\n\ + โ€ข .goose/recipes/, .goose/skills/, .goose/agents/\n\ + โ€ข ~/.config/goose/recipes/, skills/, agents/\n\ + โ€ข GOOSE_RECIPE_PATH directories\n\ + โ€ข Builtin skills", + )]); + } + + let mut output = String::from("Available sources for load/delegate:\n"); + + if !completed.is_empty() { + output.push_str("\nCompleted Tasks (awaiting retrieval):\n"); + let mut sorted_completed: Vec<_> = completed.values().collect(); + sorted_completed.sort_by_key(|t| &t.id); + for task in sorted_completed { + let status = if task.result.is_ok() { + "completed" + } else { + "failed" + }; + output.push_str(&format!( + "โ€ข {} - \"{}\" ({})\n", + task.id, task.description, status + )); + } + } + + for kind in [ + SourceKind::Subrecipe, + SourceKind::Recipe, + SourceKind::Skill, + SourceKind::Agent, + SourceKind::BuiltinSkill, + ] { + let kind_sources: Vec<_> = sources.iter().filter(|s| s.kind == kind).collect(); + if !kind_sources.is_empty() { + output.push_str(&format!("\n{}:\n", kind_plural(kind))); + for source in kind_sources { + output.push_str(&format!( + "โ€ข {} - {}\n", + source.name, + truncate(&source.description, 60) + )); + } + } + } + + output.push_str("\nUse load(source: \"name\") to load into context.\n"); + output.push_str("Use delegate(source: \"name\") to run as subagent."); + + Ok(vec![Content::text(output)]) + } + + async fn handle_load_source( + &self, + session_id: &str, + name: &str, + working_dir: &Path, + ) -> Result, String> { + let source = self.resolve_source(session_id, name, working_dir).await; + + match source { + Some(source) => { + let content = source.to_load_text(); + + let output = format!( + "# Loaded: {} ({})\n\n{}\n\n---\nThis knowledge is now available in your context.", + source.name, source.kind, content + ); + + Ok(vec![Content::text(output)]) + } + None => { + let sources = self.get_sources(session_id, working_dir).await; + let suggestions: Vec<&str> = sources + .iter() + .filter(|s| { + s.name.to_lowercase().contains(&name.to_lowercase()) + || name.to_lowercase().contains(&s.name.to_lowercase()) + }) + .take(3) + .map(|s| s.name.as_str()) + .collect(); + + let error_msg = if suggestions.is_empty() { + format!( + "Source '{}' not found. Use load() to see available sources.", + name + ) + } else { + format!( + "Source '{}' not found. Did you mean: {}?", + name, + suggestions.join(", ") + ) + }; + + Err(error_msg) + } + } + } + + async fn handle_delegate( + &self, + session_id: &str, + arguments: Option, + cancellation_token: CancellationToken, + ) -> Result, String> { + self.cleanup_completed_tasks().await; + + let params: DelegateParams = arguments + .map(|args| serde_json::from_value(serde_json::Value::Object(args))) + .transpose() + .map_err(|e| format!("Invalid parameters: {}", e))? + .unwrap_or(DelegateParams { + instructions: None, + source: None, + parameters: None, + extensions: None, + provider: None, + model: None, + temperature: None, + r#async: false, + }); + + self.validate_delegate_params(¶ms)?; + + let session = self + .context + .session_manager + .get_session(session_id, false) + .await + .map_err(|e| format!("Failed to get session: {}", e))?; + + if session.session_type == SessionType::SubAgent { + return Err("Delegated tasks cannot spawn further delegations".to_string()); + } + + if params.r#async { + return self.handle_async_delegate(session_id, params).await; + } + + let working_dir = session.working_dir.clone(); + let recipe = self + .build_delegate_recipe(¶ms, session_id, &working_dir) + .await?; + + let task_config = self + .build_task_config(¶ms, &recipe, &session) + .await + .map_err(|e| format!("Failed to build task config: {}", e))?; + + let agent_config = AgentConfig::new( + self.context.session_manager.clone(), + crate::config::permission::PermissionManager::instance(), + None, + crate::config::GooseMode::Auto, + true, // disable session naming for subagents + ); + + let subagent_session = self + .context + .session_manager + .create_session( + working_dir, + "Delegated task".to_string(), + SessionType::SubAgent, + ) + .await + .map_err(|e| format!("Failed to create subagent session: {}", e))?; + + let result = run_complete_subagent_task( + agent_config, + recipe, + task_config, + true, + subagent_session.id, + Some(cancellation_token), + ) + .await + .map_err(|e| format!("Delegation failed: {}", e))?; + + Ok(vec![Content::text(result)]) + } + + fn validate_delegate_params(&self, params: &DelegateParams) -> Result<(), String> { + if params.instructions.is_none() && params.source.is_none() { + return Err("Must provide 'instructions' or 'source' (or both)".to_string()); + } + + if params.parameters.is_some() && params.source.is_none() { + return Err("'parameters' can only be used with 'source'".to_string()); + } + + Ok(()) + } + + async fn build_delegate_recipe( + &self, + params: &DelegateParams, + session_id: &str, + working_dir: &Path, + ) -> Result { + if let Some(source_name) = ¶ms.source { + self.build_source_recipe(source_name, params, session_id, working_dir) + .await + } else { + self.build_adhoc_recipe(params) + } + } + + fn build_adhoc_recipe(&self, params: &DelegateParams) -> Result { + let task = params + .instructions + .as_ref() + .ok_or("Instructions required for ad-hoc task")?; + + Recipe::builder() + .version("1.0.0") + .title("Delegated Task") + .description("Ad-hoc delegated task") + .prompt(task) + .build() + .map_err(|e| format!("Failed to build recipe: {}", e)) + } + + async fn build_source_recipe( + &self, + source_name: &str, + params: &DelegateParams, + session_id: &str, + working_dir: &Path, + ) -> Result { + let source = self + .resolve_source(session_id, source_name, working_dir) + .await + .ok_or_else(|| format!("Source '{}' not found", source_name))?; + + let mut recipe = match source.kind { + SourceKind::Recipe | SourceKind::Subrecipe => { + self.build_recipe_from_source(&source, params, session_id) + .await? + } + SourceKind::Skill | SourceKind::BuiltinSkill => { + self.build_recipe_from_skill(&source, params)? + } + SourceKind::Agent => self.build_recipe_from_agent(&source, params)?, + }; + + if let Some(extra_instructions) = ¶ms.instructions { + if recipe.prompt.is_some() { + let current_prompt = recipe.prompt.take().unwrap(); + recipe.prompt = Some(format!("{}\n\n{}", current_prompt, extra_instructions)); + } else { + recipe.prompt = Some(extra_instructions.clone()); + } + } + + Ok(recipe) + } + + async fn build_recipe_from_source( + &self, + source: &Source, + params: &DelegateParams, + session_id: &str, + ) -> Result { + let session = self + .context + .session_manager + .get_session(session_id, false) + .await + .map_err(|e| format!("Failed to get session: {}", e))?; + + if source.kind == SourceKind::Subrecipe { + let sub_recipes = session.recipe.as_ref().and_then(|r| r.sub_recipes.as_ref()); + + if let Some(sub_recipes) = sub_recipes { + if let Some(sr) = sub_recipes.iter().find(|sr| sr.name == source.name) { + let recipe_file = load_local_recipe_file(&sr.path).map_err(|e| { + format!("Failed to load subrecipe '{}': {}", source.name, e) + })?; + + let mut merged: HashMap = HashMap::new(); + if let Some(values) = &sr.values { + for (k, v) in values { + merged.insert(k.clone(), v.clone()); + } + } + if let Some(provided_params) = ¶ms.parameters { + for (k, v) in provided_params { + let value_str = match v { + serde_json::Value::String(s) => s.clone(), + other => other.to_string(), + }; + merged.insert(k.clone(), value_str); + } + } + let param_values: Vec<(String, String)> = merged.into_iter().collect(); + + return build_recipe_from_template( + recipe_file.content, + &recipe_file.parent_dir, + param_values, + None:: Result>, + ) + .map_err(|e| format!("Failed to build subrecipe: {}", e)); + } + } + } + + let recipe_file = load_local_recipe_file(source.path.to_str().unwrap_or("")) + .map_err(|e| format!("Failed to load recipe '{}': {}", source.name, e))?; + + let param_values: Vec<(String, String)> = params + .parameters + .as_ref() + .map(|p| { + p.iter() + .map(|(k, v)| { + let value_str = match v { + serde_json::Value::String(s) => s.clone(), + other => other.to_string(), + }; + (k.clone(), value_str) + }) + .collect() + }) + .unwrap_or_default(); + + build_recipe_from_template( + recipe_file.content, + &recipe_file.parent_dir, + param_values, + None:: Result>, + ) + .map_err(|e| format!("Failed to build recipe: {}", e)) + } + + fn build_recipe_from_skill( + &self, + source: &Source, + params: &DelegateParams, + ) -> Result { + let mut builder = Recipe::builder() + .version("1.0.0") + .title(format!("Skill: {}", source.name)) + .description(source.description.clone()) + .instructions(&source.content); + + if params.instructions.is_none() { + builder = builder.prompt("Apply the skill knowledge to produce a useful result."); + } + + builder + .build() + .map_err(|e| format!("Failed to build recipe from skill: {}", e)) + } + + fn build_recipe_from_agent( + &self, + source: &Source, + params: &DelegateParams, + ) -> Result { + let agent_content = if source.path.as_os_str().is_empty() { + return Err("Agent source has no path".to_string()); + } else { + std::fs::read_to_string(&source.path) + .map_err(|e| format!("Failed to read agent file: {}", e))? + }; + + let (metadata, _): (AgentMetadata, String) = + parse_frontmatter(&agent_content).ok_or("Failed to parse agent frontmatter")?; + + let model = metadata.model; + + let settings = model.map(|m| Settings { + goose_model: Some(m), + goose_provider: params.provider.clone(), + temperature: params.temperature, + max_turns: None, + }); + + let mut builder = Recipe::builder() + .version("1.0.0") + .title(format!("Agent: {}", source.name)) + .description(source.description.clone()) + .instructions(&source.content); + + if let Some(settings) = settings { + builder = builder.settings(settings); + } + + if params.instructions.is_none() { + builder = builder.prompt("Proceed with your expertise to produce a useful result."); + } + + builder + .build() + .map_err(|e| format!("Failed to build recipe from agent: {}", e)) + } + + async fn build_task_config( + &self, + params: &DelegateParams, + recipe: &Recipe, + session: &crate::session::Session, + ) -> Result { + let provider = self.resolve_provider(params, recipe, session).await?; + + let mut extensions = self.resolve_extensions(session)?; + + if let Some(filter) = ¶ms.extensions { + if filter.is_empty() { + extensions = Vec::new(); + } else { + extensions.retain(|ext| filter.contains(&ext.name())); + } + } + + let max_turns = self.resolve_max_turns(session); + + let mut task_config = + TaskConfig::new(provider, &session.id, &session.working_dir, extensions); + task_config.max_turns = Some(max_turns); + + Ok(task_config) + } + + async fn resolve_provider( + &self, + params: &DelegateParams, + recipe: &Recipe, + session: &crate::session::Session, + ) -> Result, anyhow::Error> { + let provider_name = params + .provider + .clone() + .or_else(|| { + recipe + .settings + .as_ref() + .and_then(|s| s.goose_provider.clone()) + }) + .or_else(|| session.provider_name.clone()) + .ok_or_else(|| anyhow::anyhow!("No provider configured"))?; + + let mut model_config = session + .model_config + .clone() + .map(Ok) + .unwrap_or_else(|| crate::model::ModelConfig::new("default"))?; + + if let Some(model) = ¶ms.model { + model_config.model_name = model.clone(); + } else if let Some(model) = recipe + .settings + .as_ref() + .and_then(|s| s.goose_model.as_ref()) + { + model_config.model_name = model.clone(); + } + + if let Some(temp) = params.temperature { + model_config = model_config.with_temperature(Some(temp)); + } else if let Some(temp) = recipe.settings.as_ref().and_then(|s| s.temperature) { + model_config = model_config.with_temperature(Some(temp)); + } + + providers::create(&provider_name, model_config).await + } + + fn resolve_extensions( + &self, + session: &crate::session::Session, + ) -> Result, anyhow::Error> { + let extensions = EnabledExtensionsState::from_extension_data(&session.extension_data) + .map(|s| s.extensions) + .unwrap_or_else(crate::config::get_enabled_extensions); + + Ok(extensions) + } + + fn resolve_max_turns(&self, session: &crate::session::Session) -> usize { + std::env::var("GOOSE_SUBAGENT_MAX_TURNS") + .ok() + .and_then(|v| v.parse().ok()) + .or_else(|| { + session + .recipe + .as_ref() + .and_then(|r| r.settings.as_ref()) + .and_then(|s| s.max_turns) + }) + .unwrap_or(DEFAULT_SUBAGENT_MAX_TURNS) + } + + async fn cleanup_completed_tasks(&self) { + let finished: Vec<(String, BackgroundTask)> = { + let mut tasks = self.background_tasks.lock().await; + let ids: Vec = tasks + .iter() + .filter(|(_, t)| t.handle.is_finished()) + .map(|(id, _)| id.clone()) + .collect(); + ids.into_iter() + .filter_map(|id| tasks.remove(&id).map(|t| (id, t))) + .collect() + }; + + let mut completed = self.completed_tasks.lock().await; + + for (id, task) in finished { + let duration = task.started_at.elapsed(); + let turns_taken = task.turns.load(Ordering::Relaxed); + + let result = match task.handle.await { + Ok(Ok(output)) => { + info!("Background task {} completed successfully", id); + Ok(output) + } + Ok(Err(e)) => { + warn!("Background task {} failed: {}", id, e); + Err(e.to_string()) + } + Err(e) => { + warn!("Background task {} panicked: {}", id, e); + Err(format!("Task panicked: {}", e)) + } + }; + + completed.insert( + id.clone(), + CompletedTask { + id, + description: task.description, + result, + turns_taken, + duration, + }, + ); + } + } + + fn get_task_description(params: &DelegateParams) -> String { + if let Some(source) = ¶ms.source { + if let Some(instructions) = ¶ms.instructions { + format!("{}: {}", source, truncate(instructions, 30)) + } else { + source.clone() + } + } else if let Some(instructions) = ¶ms.instructions { + truncate(instructions, 40) + } else { + "Unknown task".to_string() + } + } + + async fn handle_async_delegate( + &self, + session_id: &str, + params: DelegateParams, + ) -> Result, String> { + let task_count = self.background_tasks.lock().await.len(); + let max_tasks = max_background_tasks(); + if task_count >= max_tasks { + return Err(format!( + "Maximum {} background tasks already running. Wait for completion or use sync mode.", + max_tasks + )); + } + + let session = self + .context + .session_manager + .get_session(session_id, false) + .await + .map_err(|e| format!("Failed to get session: {}", e))?; + + let working_dir = session.working_dir.clone(); + let recipe = self + .build_delegate_recipe(¶ms, session_id, &working_dir) + .await?; + + let task_config = self + .build_task_config(¶ms, &recipe, &session) + .await + .map_err(|e| format!("Failed to build task config: {}", e))?; + + let description = truncate(&Self::get_task_description(¶ms), 40); + + let agent_config = AgentConfig::new( + self.context.session_manager.clone(), + crate::config::permission::PermissionManager::instance(), + None, + crate::config::GooseMode::Auto, + true, // disable session naming for subagents + ); + + let subagent_session = self + .context + .session_manager + .create_session(working_dir, description.clone(), SessionType::SubAgent) + .await + .map_err(|e| format!("Failed to create subagent session: {}", e))?; + + let task_id = subagent_session.id.clone(); + + let turns = Arc::new(AtomicU32::new(0)); + let last_activity = Arc::new(AtomicU64::new(current_epoch_millis())); + + let turns_clone = Arc::clone(&turns); + let last_activity_clone = Arc::clone(&last_activity); + + let on_message: OnMessageCallback = Arc::new(move |_msg| { + turns_clone.fetch_add(1, Ordering::Relaxed); + last_activity_clone.store(current_epoch_millis(), Ordering::Relaxed); + }); + + let task_token = CancellationToken::new(); + let task_token_clone = task_token.clone(); + + let handle = tokio::spawn(async move { + run_subagent_task_with_callback( + agent_config, + recipe, + task_config, + true, + subagent_session.id, + Some(task_token_clone), + Some(on_message), + ) + .await + }); + + let task = BackgroundTask { + id: task_id.clone(), + description: description.clone(), + started_at: Instant::now(), + turns, + last_activity, + handle, + cancellation_token: task_token, + }; + + self.background_tasks + .lock() + .await + .insert(task_id.clone(), task); + + Ok(vec![Content::text(format!( + "Task {} started in background: \"{}\"\nStatus will appear in context.", + task_id, description + ))]) + } +} + +#[async_trait] +impl McpClientTrait for SummonClient { + async fn list_tools( + &self, + session_id: &str, + _next_cursor: Option, + _cancellation_token: CancellationToken, + ) -> Result { + self.cleanup_completed_tasks().await; + + let is_subagent = self + .context + .session_manager + .get_session(session_id, false) + .await + .map(|s| s.session_type == SessionType::SubAgent) + .unwrap_or(false); + + let mut tools = vec![self.create_load_tool()]; + + if !is_subagent { + tools.push(self.create_delegate_tool()); + } + + Ok(ListToolsResult { + tools, + next_cursor: None, + meta: None, + }) + } + + async fn call_tool( + &self, + session_id: &str, + name: &str, + arguments: Option, + _working_dir: Option<&str>, + cancellation_token: CancellationToken, + ) -> Result { + let content = match name { + "load" => self.handle_load(session_id, arguments).await, + "delegate" => { + self.handle_delegate(session_id, arguments, cancellation_token) + .await + } + _ => Err(format!("Unknown tool: {}", name)), + }; + + match content { + Ok(content) => Ok(CallToolResult::success(content)), + Err(error) => Ok(CallToolResult::error(vec![Content::text(format!( + "Error: {}", + error + ))])), + } + } + + fn get_info(&self) -> Option<&InitializeResult> { + Some(&self.info) + } + + async fn get_moim(&self, _session_id: &str) -> Option { + self.cleanup_completed_tasks().await; + + let running = self.background_tasks.lock().await; + let completed = self.completed_tasks.lock().await; + + if running.is_empty() && completed.is_empty() { + return None; + } + + let mut lines = vec!["Background tasks:".to_string()]; + let now = current_epoch_millis(); + + let mut shortest_elapsed_secs: Option = None; + + let mut sorted_running: Vec<_> = running.values().collect(); + sorted_running.sort_by_key(|t| &t.id); + + for task in sorted_running { + let elapsed = task.started_at.elapsed(); + let idle_ms = now.saturating_sub(task.last_activity.load(Ordering::Relaxed)); + + let elapsed_secs = elapsed.as_secs(); + shortest_elapsed_secs = + Some(shortest_elapsed_secs.map_or(elapsed_secs, |s| s.min(elapsed_secs))); + + lines.push(format!( + "โ€ข {}: \"{}\" - running {}, {} turns, idle {}", + task.id, + task.description, + round_duration(elapsed), + task.turns.load(Ordering::Relaxed), + round_duration(Duration::from_millis(idle_ms)), + )); + } + + let mut sorted_completed: Vec<_> = completed.values().collect(); + sorted_completed.sort_by_key(|t| &t.id); + + for task in sorted_completed { + let status = if task.result.is_ok() { + "completed" + } else { + "failed" + }; + lines.push(format!( + "โ€ข {}: \"{}\" - {} in {} ({} turns) - use load(\"{}\") to get result", + task.id, + task.description, + status, + round_duration(task.duration), + task.turns_taken, + task.id + )); + } + + if let Some(shortest) = shortest_elapsed_secs { + let sleep_secs = 300u64.saturating_sub(shortest).max(10); + lines.push(format!( + "\nโ†’ sleep {} to wait, or load(source: \"id\", cancel: true) to stop", + sleep_secs + )); + } + + Some(lines.join("\n")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::sync::Arc; + use tempfile::TempDir; + + fn create_test_context() -> PlatformExtensionContext { + PlatformExtensionContext { + extension_manager: None, + session_manager: Arc::new(crate::session::SessionManager::instance()), + } + } + + #[test] + fn test_frontmatter_parsing() { + let skill = r#"--- +name: test-skill +description: A test skill +--- +Skill body here."#; + let source = parse_skill_content(skill, PathBuf::new()).unwrap(); + assert_eq!(source.name, "test-skill"); + assert_eq!(source.kind, SourceKind::Skill); + assert!(source.content.contains("Skill body")); + + let agent = r#"--- +name: reviewer +model: sonnet +--- +You review code."#; + let source = parse_agent_content(agent, PathBuf::new()).unwrap(); + assert_eq!(source.name, "reviewer"); + assert!(source.description.contains("sonnet")); + + assert!(parse_skill_content("no frontmatter", PathBuf::new()).is_none()); + assert!(parse_skill_content("---\nunclosed", PathBuf::new()).is_none()); + } + + #[tokio::test] + async fn test_source_discovery_and_priority() { + let temp_dir = TempDir::new().unwrap(); + + let goose_skill = temp_dir.path().join(".goose/skills/my-skill"); + fs::create_dir_all(&goose_skill).unwrap(); + fs::write( + goose_skill.join("SKILL.md"), + "---\nname: my-skill\ndescription: goose version\n---\nContent", + ) + .unwrap(); + + let claude_skill = temp_dir.path().join(".claude/skills/my-skill"); + fs::create_dir_all(&claude_skill).unwrap(); + fs::write( + claude_skill.join("SKILL.md"), + "---\nname: my-skill\ndescription: claude version\n---\nContent", + ) + .unwrap(); + + let recipes = temp_dir.path().join(".goose/recipes"); + fs::create_dir_all(&recipes).unwrap(); + fs::write( + recipes.join("test.yaml"), + "title: Test\ndescription: A recipe\ninstructions: Do it", + ) + .unwrap(); + + let client = SummonClient::new(create_test_context()).unwrap(); + let sources = client.discover_filesystem_sources(temp_dir.path()); + + let skill = sources.iter().find(|s| s.name == "my-skill").unwrap(); + assert_eq!(skill.description, "goose version"); + + assert!(sources + .iter() + .any(|s| s.name == "test" && s.kind == SourceKind::Recipe)); + + assert!(sources.iter().any(|s| s.kind == SourceKind::BuiltinSkill)); + } + + #[tokio::test] + async fn test_client_tools_and_unknown_tool() { + let client = SummonClient::new(create_test_context()).unwrap(); + + let result = client + .list_tools("test", None, CancellationToken::new()) + .await + .unwrap(); + let names: Vec<_> = result.tools.iter().map(|t| t.name.as_ref()).collect(); + assert!(names.contains(&"load") && names.contains(&"delegate")); + + let result = client + .call_tool("test", "unknown", None, None, CancellationToken::new()) + .await + .unwrap(); + assert!(result.is_error.unwrap_or(false)); + } + + #[test] + fn test_duration_rounding_for_moim() { + assert_eq!(round_duration(Duration::from_secs(5)), "0s"); + assert_eq!(round_duration(Duration::from_secs(15)), "10s"); + assert_eq!(round_duration(Duration::from_secs(59)), "50s"); + + assert_eq!(round_duration(Duration::from_secs(60)), "1m"); + assert_eq!(round_duration(Duration::from_secs(90)), "1m"); + assert_eq!(round_duration(Duration::from_secs(120)), "2m"); + } + + #[test] + fn test_task_description_formatting() { + let make_params = |source: Option<&str>, instructions: Option<&str>| DelegateParams { + source: source.map(String::from), + instructions: instructions.map(String::from), + parameters: None, + extensions: None, + provider: None, + model: None, + temperature: None, + r#async: false, + }; + + assert_eq!( + SummonClient::get_task_description(&make_params(Some("recipe"), None)), + "recipe" + ); + assert_eq!( + SummonClient::get_task_description(&make_params(None, Some("do stuff"))), + "do stuff" + ); + assert_eq!( + SummonClient::get_task_description(&make_params(Some("r"), Some("task"))), + "r: task" + ); + + let long = "x".repeat(100); + let desc = SummonClient::get_task_description(&make_params(None, Some(&long))); + assert!(desc.len() <= 43 && desc.ends_with("...")); + } + + fn extract_text(content: &Content) -> &str { + use rmcp::model::RawContent; + match &content.raw { + RawContent::Text(t) => t.text.as_str(), + _ => panic!("Expected text content"), + } + } + + #[test] + fn test_is_session_id() { + assert!(is_session_id("20260204_1")); + assert!(is_session_id("20260204_42")); + assert!(is_session_id("20260204_999")); + assert!(!is_session_id("task_12345_0001")); + assert!(!is_session_id("my-recipe")); + assert!(!is_session_id("2026020_1")); + assert!(!is_session_id("20260204")); + } + + #[tokio::test] + async fn test_async_task_result_lifecycle() { + let client = SummonClient::new(create_test_context()).unwrap(); + let temp_dir = TempDir::new().unwrap(); + + let result = client.handle_load_task_result("20260204_999", false).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("not found")); + + { + let mut running = client.background_tasks.lock().await; + running.insert( + "20260204_1".to_string(), + BackgroundTask { + id: "20260204_1".to_string(), + description: "Running task".to_string(), + started_at: Instant::now(), + turns: Arc::new(AtomicU32::new(2)), + last_activity: Arc::new(AtomicU64::new(current_epoch_millis())), + handle: tokio::spawn(async { + tokio::time::sleep(Duration::from_secs(1000)).await; + Ok("done".to_string()) + }), + cancellation_token: CancellationToken::new(), + }, + ); + } + let result = client.handle_load_task_result("20260204_1", false).await; + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("still running")); + assert!(err.contains("cancel: true")); + + let moim = client.get_moim("test").await.unwrap(); + assert!(moim.contains("20260204_1")); + assert!(moim.contains("running")); + assert!(moim.contains("sleep")); + assert!(moim.contains("cancel: true")); + + { + let mut completed = client.completed_tasks.lock().await; + completed.insert( + "20260204_2".to_string(), + CompletedTask { + id: "20260204_2".to_string(), + description: "Successful task".to_string(), + result: Ok("Task completed successfully with output".to_string()), + turns_taken: 5, + duration: Duration::from_secs(60), + }, + ); + completed.insert( + "20260204_3".to_string(), + CompletedTask { + id: "20260204_3".to_string(), + description: "Failed task".to_string(), + result: Err("Something went wrong".to_string()), + turns_taken: 3, + duration: Duration::from_secs(30), + }, + ); + } + + let moim = client.get_moim("test").await.unwrap(); + assert!(moim.contains("20260204_2")); + assert!(moim.contains("20260204_3")); + assert!(moim.contains(r#"use load("20260204_2") to get result"#)); + assert!(moim.contains(r#"use load("20260204_3") to get result"#)); + + let discovery = client + .handle_load_discovery("test", temp_dir.path()) + .await + .unwrap(); + let discovery_text = extract_text(&discovery[0]); + assert!(discovery_text.contains("Completed Tasks (awaiting retrieval)")); + assert!(discovery_text.contains("20260204_2")); + assert!(discovery_text.contains("20260204_3")); + + let result = client + .handle_load_task_result("20260204_2", false) + .await + .unwrap(); + let text = extract_text(&result[0]); + assert!(text.contains("20260204_2")); + assert!(text.contains("Successful task")); + assert!(text.contains("โœ“ Completed")); + assert!(text.contains("1m")); + assert!(text.contains("5 turns")); + assert!(text.contains("Task completed successfully with output")); + + assert!(!client + .completed_tasks + .lock() + .await + .contains_key("20260204_2")); + + let result = client + .handle_load_task_result("20260204_3", false) + .await + .unwrap(); + let text = extract_text(&result[0]); + assert!(text.contains("โœ— Failed")); + assert!(text.contains("Error: Something went wrong")); + + let result = client.handle_load_task_result("20260204_3", false).await; + assert!(result.is_err()); + assert!(result.unwrap_err().contains("not found")); + + let moim = client.get_moim("test").await.unwrap(); + assert!(moim.contains("20260204_1")); + assert!(moim.contains("sleep")); + assert!(!moim.contains("20260204_2")); + assert!(!moim.contains("20260204_3")); + } + + #[tokio::test] + async fn test_cancel_running_task() { + let client = SummonClient::new(create_test_context()).unwrap(); + let token = CancellationToken::new(); + + { + let mut running = client.background_tasks.lock().await; + running.insert( + "20260204_1".to_string(), + BackgroundTask { + id: "20260204_1".to_string(), + description: "Cancellable task".to_string(), + started_at: Instant::now(), + turns: Arc::new(AtomicU32::new(3)), + last_activity: Arc::new(AtomicU64::new(current_epoch_millis())), + handle: tokio::spawn(async { + tokio::time::sleep(Duration::from_secs(1000)).await; + Ok("should not see this".to_string()) + }), + cancellation_token: token.clone(), + }, + ); + } + + let result = client + .handle_load_task_result("20260204_1", true) + .await + .unwrap(); + let text = extract_text(&result[0]); + assert!(text.contains("Cancelled")); + assert!(text.contains("20260204_1")); + assert!(text.contains("Cancellable task")); + assert!(token.is_cancelled()); + assert!(!client + .background_tasks + .lock() + .await + .contains_key("20260204_1")); + } +} diff --git a/crates/goose/src/providers/testprovider.rs b/crates/goose/src/providers/testprovider.rs index 942c0a2b5bd0..32a017eacc94 100644 --- a/crates/goose/src/providers/testprovider.rs +++ b/crates/goose/src/providers/testprovider.rs @@ -72,9 +72,29 @@ impl TestProvider { } fn hash_input(messages: &[Message]) -> String { + use crate::conversation::message::MessageContent; + + // Strip internal metadata (e.g. tool_meta/_meta) from content before hashing. + // This metadata is used for internal routing (like goose_extension ownership) + // and isn't part of the semantic input the LLM sees, so it shouldn't affect + // replay matching. let stable_messages: Vec<_> = messages .iter() - .map(|msg| (msg.role.clone(), msg.content.clone())) + .map(|msg| { + let cleaned_content: Vec<_> = msg + .content + .iter() + .map(|c| match c { + MessageContent::ToolRequest(req) => { + let mut req = req.clone(); + req.tool_meta = None; + MessageContent::ToolRequest(req) + } + other => other.clone(), + }) + .collect(); + (msg.role.clone(), cleaned_content) + }) .collect(); let serialized = serde_json::to_string(&stable_messages).unwrap_or_default(); let mut hasher = Sha256::new(); diff --git a/crates/goose/src/recipe/mod.rs b/crates/goose/src/recipe/mod.rs index 1dd3cb12fba1..b47192a99e1d 100644 --- a/crates/goose/src/recipe/mod.rs +++ b/crates/goose/src/recipe/mod.rs @@ -226,6 +226,24 @@ pub struct RecipeBuilder { } impl Recipe { + fn ensure_summon_for_subrecipes(&mut self) { + if self.sub_recipes.is_none() { + return; + } + let summon = ExtensionConfig::Platform { + name: "summon".to_string(), + description: String::new(), + display_name: None, + bundled: None, + available_tools: vec![], + }; + match &mut self.extensions { + Some(exts) if !exts.iter().any(|e| e.name() == "summon") => exts.push(summon), + None => self.extensions = Some(vec![summon]), + _ => {} + } + } + /// Returns true if harmful content is detected in instructions, prompt, or activities fields pub fn check_for_security_warnings(&self) -> bool { if [self.instructions.as_deref(), self.prompt.as_deref()] @@ -277,7 +295,7 @@ impl Recipe { } pub fn from_content(content: &str) -> Result { - let recipe: Recipe = match serde_yaml::from_str::(content) { + let mut recipe: Recipe = match serde_yaml::from_str::(content) { Ok(yaml_value) => { if let Some(nested_recipe) = yaml_value.get("recipe") { serde_yaml::from_value(nested_recipe.clone()) @@ -291,6 +309,7 @@ impl Recipe { .map_err(|e| anyhow::anyhow!("{}", strip_error_location(&e.to_string())))?, }; + recipe.ensure_summon_for_subrecipes(); Ok(recipe) } } @@ -450,8 +469,10 @@ mod tests { assert_eq!(recipe.prompt, Some("Test prompt".to_string())); assert!(recipe.extensions.is_some()); - let extensions = recipe.extensions.unwrap(); - assert_eq!(extensions.len(), 1); + let extensions = recipe.extensions.as_ref().unwrap(); + assert_eq!(extensions.len(), 2); + assert!(extensions.iter().any(|e| e.name() == "test_extension")); + assert!(extensions.iter().any(|e| e.name() == "summon")); assert!(recipe.parameters.is_some()); let parameters = recipe.parameters.unwrap(); @@ -533,8 +554,10 @@ sub_recipes: assert_eq!(recipe.prompt, Some("Test prompt".to_string())); assert!(recipe.extensions.is_some()); - let extensions = recipe.extensions.unwrap(); - assert_eq!(extensions.len(), 1); + let extensions = recipe.extensions.as_ref().unwrap(); + assert_eq!(extensions.len(), 2); + assert!(extensions.iter().any(|e| e.name() == "test_extension")); + assert!(extensions.iter().any(|e| e.name() == "summon")); assert!(recipe.parameters.is_some()); let parameters = recipe.parameters.unwrap(); diff --git a/crates/goose/src/session/session_manager.rs b/crates/goose/src/session/session_manager.rs index 13fbe82b70f3..89955c6d546e 100644 --- a/crates/goose/src/session/session_manager.rs +++ b/crates/goose/src/session/session_manager.rs @@ -491,7 +491,7 @@ impl SessionStorage { let options = SqliteConnectOptions::new() .filename(path) .create_if_missing(true) - .busy_timeout(std::time::Duration::from_secs(5)) + .busy_timeout(std::time::Duration::from_secs(30)) .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal); SqlitePoolOptions::new().connect_lazy_with(options) diff --git a/crates/goose/tests/subagent_tool_tests.rs b/crates/goose/tests/subagent_tool_tests.rs deleted file mode 100644 index 2350d31598bd..000000000000 --- a/crates/goose/tests/subagent_tool_tests.rs +++ /dev/null @@ -1,104 +0,0 @@ -use goose::agents::subagent_tool::{create_subagent_tool, SUBAGENT_TOOL_NAME}; -use goose::recipe::{Recipe, SubRecipe}; -use std::collections::HashMap; -use tempfile::TempDir; - -const RECIPE_TWO_PARAMS: &str = r#" -version: "1.0.0" -title: "Test Task" -description: "A test task" -instructions: "Process {{ first }} and {{ second }}" -parameters: - - key: first - input_type: string - requirement: required - description: "First param" - - key: second - input_type: string - requirement: required - description: "Second param" -"#; - -fn write_recipe(temp_dir: &TempDir, name: &str, content: &str) -> String { - let path = temp_dir.path().join(format!("{}.yaml", name)); - std::fs::write(&path, content).unwrap(); - path.to_string_lossy().to_string() -} - -fn make_subrecipe(path: String, name: &str, values: Option>) -> SubRecipe { - SubRecipe { - name: name.to_string(), - path, - values, - sequential_when_repeated: false, - description: Some(format!("{} description", name)), - } -} - -#[test] -fn test_tool_description_includes_subrecipe_params_and_filters_presets() { - let temp_dir = TempDir::new().unwrap(); - let path = write_recipe(&temp_dir, "mytask", RECIPE_TWO_PARAMS); - - let no_presets = make_subrecipe(path.clone(), "mytask", None); - let tool = create_subagent_tool(&[no_presets]); - let desc = tool.description.as_ref().unwrap(); - assert!(desc.contains("mytask")); - assert!(desc.contains("first [required]")); - assert!(desc.contains("second [required]")); - - let mut preset = HashMap::new(); - preset.insert("second".to_string(), "preset_value".to_string()); - let with_presets = make_subrecipe(path, "deploy", Some(preset)); - let tool = create_subagent_tool(&[with_presets]); - let params_section = tool - .description - .as_ref() - .unwrap() - .split("(params:") - .nth(1) - .unwrap_or(""); - assert!(params_section.contains("first")); - assert!(!params_section.contains("second")); -} - -#[test] -fn test_adhoc_recipe_builder_and_security_check() { - let recipe = Recipe::builder() - .version("1.0.0") - .title("Adhoc Task") - .description("An ad-hoc task") - .instructions("Do the thing") - .build() - .unwrap(); - - assert_eq!(recipe.title, "Adhoc Task"); - assert_eq!(recipe.instructions.as_ref().unwrap(), "Do the thing"); - assert!(!recipe.check_for_security_warnings()); -} - -#[test] -fn test_adhoc_tool_schema_properties() { - let tool = create_subagent_tool(&[]); - - assert_eq!(tool.name, SUBAGENT_TOOL_NAME); - assert!(tool.description.as_ref().unwrap().contains("Ad-hoc")); - assert!(!tool - .description - .as_ref() - .unwrap() - .contains("Available subrecipes")); - - let props = tool - .input_schema - .get("properties") - .unwrap() - .as_object() - .unwrap(); - assert!(props.contains_key("instructions")); - assert!(props.contains_key("subrecipe")); - assert!(props.contains_key("parameters")); - assert!(props.contains_key("extensions")); - assert!(props.contains_key("settings")); - assert!(props.contains_key("summary")); -} diff --git a/goose-self-test.yaml b/goose-self-test.yaml index af9634a6ac35..46c7fa32fa62 100644 --- a/goose-self-test.yaml +++ b/goose-self-test.yaml @@ -3,17 +3,16 @@ title: Goose Self-Testing Integration Suite description: A comprehensive meta-testing recipe where goose tests its own capabilities using its own tools - true first-person integration testing author: contact: goose-self-test - + activities: - Initialize test workspace and logging infrastructure - Test file operations (create, read, update, delete, undo) - Validate shell command execution and error handling - Analyze code structure and parsing capabilities - Test extension discovery and management - - Create and orchestrate subagents for meta-testing - - Generate and execute test recipes - - Test error boundaries and security controls - - Measure performance and resource usage + - Test load tool for knowledge injection and discovery + - Test delegate tool for task delegation (sync and async) + - Test error boundaries including nested delegation prevention - Generate comprehensive test report parameters: @@ -21,26 +20,26 @@ parameters: input_type: string requirement: optional default: "all" - description: "Which test phases to run: all, basic, extensions, subagents, recipes, advanced" - + description: "Which test phases to run: all, basic, extensions, delegation, advanced" + - key: test_depth input_type: string requirement: optional default: "standard" description: "Testing depth: quick (smoke tests), standard (normal coverage), deep (exhaustive)" - + - key: workspace_dir input_type: string requirement: optional default: "./gooseselftest" description: "Directory for test artifacts and results" - + - key: parallel_tests input_type: string requirement: optional default: "true" description: "Run independent tests in parallel where possible" - + - key: cleanup_after input_type: string requirement: optional @@ -50,44 +49,44 @@ parameters: instructions: | You are testing yourself - a running goose instance validating its own capabilities through meta-testing. This is true first-person integration testing where you use your own tools to test your own functionality. - + ## Understanding First-Person Integration Testing This is a crucial distinction - as a running goose instance, you are testing yourself using your own capabilities. - This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools - to validate your own functionality from within your active session. You can only test what you can observe and + This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools + to validate your own functionality from within your active session. You can only test what you can observe and control from inside your running instance - your tools, your behaviors, your error handling, your consistency. - + ## Core Testing Philosophy - You ARE the system under test AND the tester - Use your tools to create test scenarios, then validate the results - Test both success and failure paths - Document everything meticulously - Handle errors gracefully - a test failure shouldn't stop the suite - + ## Test Execution Framework - + ### Phase 1: Environment Setup & Basic Tool Validation Create a structured test workspace and validate core developer tools: - File operations (CRUD + undo) - Shell command execution - Code analysis capabilities - Error handling and recovery - + ### Phase 2: Extension System Testing Test dynamic extension management: - Discover available extensions - Enable/disable extensions - Test extension interactions - Verify isolation between extensions - - ### Phase 3: Subagent Testing (Meta-Recursion) - Create subagents to test yourself recursively: - - Basic subagent creation and execution - - Parallel subagent execution (multiple subagent calls at once) - - Sequential subagent chains - - Recursive depth testing (subagent creating subagent) - - Test summary mode (default behavior for concise results) - + + ### Phase 3: Delegate & Load Testing + Test the unified delegation and knowledge-loading tools: + - Load tool for discovery and knowledge injection + - Delegate tool for synchronous task delegation + - Delegate tool for asynchronous background tasks + - Parallel delegate execution + - Nested delegation prevention (critical security test) + ### Phase 4: Advanced Self-Testing Push boundaries and test limits: - Intentionally trigger errors @@ -95,14 +94,14 @@ instructions: | - Validate security controls - Measure performance metrics - Test resource constraints - + ### Phase 5: Report Generation Compile comprehensive test results: - Aggregate all test outcomes - Calculate success metrics - Document failures and issues - Generate recommendations - + ## Success Criteria - Phase success: โ‰ฅ80% tests pass - Suite success: All phases complete, critical features work @@ -115,22 +114,22 @@ extensions: timeout: 600 bundled: true description: Core tool for file operations, shell commands, and code analysis - + prompt: | Execute the Goose Self-Testing Integration Suite in {{ workspace_dir }}. Test phases: {{ test_phases }}, Depth: {{ test_depth }}, Parallel: {{ parallel_tests }} - + ## ๐Ÿš€ INITIALIZATION Create test workspace: {{ workspace_dir }}/ for all test artifacts and reports. - + Track your progress using the todo extension. Start with: - [ ] Initialize test workspace - [ ] Set up logging infrastructure - [ ] Begin Phase 1 testing - + {% if test_phases == "all" or "basic" in test_phases %} ## ๐Ÿ“ PHASE 1: Basic Tool Validation - + ### File Operations Testing 1. Create test files with various content types (.txt, .py, .md, .json) 2. Test str_replace on each file type @@ -138,25 +137,25 @@ prompt: | 4. Test undo functionality 5. Verify file deletion and recreation 6. Test with special characters and Unicode - + ### Shell Command Testing - Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt), + Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt), error handling (false || echo "handled"), and environment variables (export VAR=test && echo $VAR). Verify both success and failure paths work correctly. - + ### Code Analysis Testing 1. Create sample code files in Python, JavaScript, and Go 2. Analyze each file for structure 3. Test directory-wide analysis 4. Test symbol focus and call graphs 5. Verify LOC, function, and class counting - + Log results to: {{ workspace_dir }}/phase1_basic_tools.md {% endif %} - + {% if test_phases == "all" or "extensions" in test_phases %} ## ๐Ÿ”ง PHASE 2: Extension System Testing - + ### Todo Extension Testing (Built-in) 1. Create initial todos and verify they persist 2. Update todos and confirm changes are retained @@ -167,58 +166,167 @@ prompt: | 2. Document all available extensions 3. Test enabling and disabling dynamic extensions (if any available) 4. Verify extension isolation between enabled extensions - + Log results to: {{ workspace_dir }}/phase2_extensions.md {% endif %} - - {% if test_phases == "all" or "subagents" in test_phases %} - ## ๐Ÿค– PHASE 3: Subagent Meta-Testing - - ### Basic Subagent Test - Use the `subagent` tool with instructions to create a simple task: + + {% if test_phases == "all" or "delegation" in test_phases %} + ## ๐Ÿค– PHASE 3: Delegate & Load Testing + + ### Load Tool - Discovery Mode + Call `load()` with no arguments to discover all available sources: + ``` + load() + ``` + Document what sources are found (recipes, skills, agents, subrecipes). + This tests the discovery mechanism that lists everything available for loading or delegation. + + ### Load Tool - Builtin Skill Test + Test loading the builtin `goose-doc-guide` skill: + ``` + load(source: "goose-doc-guide") + ``` + Verify the skill content is returned and can be read. This confirms builtin skills are accessible. + + ### Load Tool - Knowledge Injection + If any other skills or recipes are discovered, test loading one: + ``` + load(source: "") + ``` + Verify the content is injected into context without spawning a subagent. + + ### Basic Delegate Test (Synchronous) + Use the `delegate` tool with instructions to create a simple task: ``` - subagent(instructions: "Create a file called subagent_test.txt with 'Hello from subagent'") + delegate(instructions: "Create a file called delegate_test.txt containing 'Hello from delegate' and confirm it exists") ``` - - ### Parallel Subagent Test + Verify the delegate completes and returns a summary of its work. + + ### Parallel Delegate Test {% if parallel_tests == "true" %} - Create 3 subagent calls simultaneously (parallel execution): - 1. Count files in current directory - 2. Get current timestamp - 3. Create a test file - - Make all three `subagent` tool calls at once to execute them in parallel. - {% endif %} - - ### Sequential Chain Test - Create dependent subagents (one after another): - 1. First: Create a Python file - 2. Second: Analyze the created file - 3. Third: Run the Python file - - ### Recursive Depth Test (if test_depth == "deep") - {% if test_depth == "deep" %} - Create a subagent that creates another subagent (test depth limit). - Monitor for resource constraints and context window limits. + **Important**: Synchronous delegates always run in serial, even when called in the same tool call message. + Async delegates (`async: true`) run in parallel when called in the same tool call message. + + First, test sync delegates (will run sequentially): + Make these 3 delegate calls in a single message: + 1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_1.txt with timestamp from 'date +%H:%M:%S'")` + 2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_2.txt with timestamp from 'date +%H:%M:%S'")` + 3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_3.txt with timestamp from 'date +%H:%M:%S'")` + + After completion, check timestamps: `cat /tmp/sync_parallel_*.txt` + **Expected**: Timestamps should be ~6+ seconds apart (sequential execution). + + Then, test async delegates (will run in parallel): + Make these 3 delegate calls in a single message: + 1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_1.txt with timestamp from 'date +%H:%M:%S'", async: true)` + 2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_2.txt with timestamp from 'date +%H:%M:%S'", async: true)` + 3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_3.txt with timestamp from 'date +%H:%M:%S'", async: true)` + + Wait for tasks to complete (sleep 10 seconds), then check timestamps: `cat /tmp/async_parallel_*.txt` + **Expected**: Timestamps should be within ~5 seconds of each other (parallel execution). + + Document both results to validate the parallel execution behavior. {% endif %} - - ### Summary Mode Test - Create subagents with summary mode (default) and verify concise output. - Test with `summary: false` to get full conversation history. - - Log results to: {{ workspace_dir }}/phase3_subagents.md + + ### Async Delegate Test (Background Execution) + This tests background task execution with MOIM status monitoring. + + 1. Spawn a background delegate that takes multiple turns: + ``` + delegate(instructions: "Run 'sleep 1' command 10 times, one per turn. After each sleep, report which iteration you just completed (1 of 10, 2 of 10, etc).", async: true) + ``` + + 2. After spawning, the delegate runs in the background. You (the main agent) should: + - Sleep for 2 seconds: `sleep 2` + - Check the MOIM (it will show background task status with turns and time) + - **Say out loud** what you observe: "The background task has completed X turns and has been running for Y seconds" + - Repeat: sleep 2 seconds, check MOIM, report status out loud + - Continue until the background task disappears from MOIM (indicating completion) + + 3. Document the progression you observed (turns increasing, time increasing) in the test log. + + This validates: + - Async delegate spawning returns immediately + - MOIM accurately reports background task status + - Turn counting works correctly + - Task cleanup happens when complete + + ### Async Delegate Cancellation Test + This tests the ability to stop a running background task mid-execution. + + 1. Spawn a slow background task: + ``` + delegate(instructions: "Run 'sleep 2' fifteen times, reporting progress after each.", async: true) + ``` + Note the task ID returned (e.g., "20260204_42"). + + 2. Wait 8 seconds: `sleep 8` + + 3. Check MOIM and confirm the task is running with some turns completed. + + 4. Cancel the task: + ``` + load(source: "", cancel: true) + ``` + + 5. Verify the response shows: + - "โŠ˜ Cancelled" status + - Partial output (some iterations completed) + - Duration and turn count + + 6. Check MOIM again - the task should be gone (not in running or completed). + + 7. Try to retrieve the cancelled task: + ``` + load(source: "") + ``` + **Expected**: Error "Task '' not found." + + This validates that cancellation stops tasks, returns partial results, and cleans up properly. + + ### Source-Based Delegate Test + If `load()` discovered any recipes or skills, test delegating with a source: + ``` + delegate(source: "", instructions: "Apply this to the current workspace") + ``` + This tests the combined mode where a source provides context and instructions provide the task. + + ### Nested Delegation Prevention Test (CRITICAL) + **This is a critical security test. Delegates must NEVER be able to spawn their own delegates.** + + Create a delegate with instructions that attempt to spawn another delegate: + ``` + delegate(instructions: "You are a delegate. Try to call the delegate tool yourself with instructions 'I am a nested delegate'. Report whether you were able to do so or if you received an error.") + ``` + + **Expected behavior**: The delegate should report that it received an error when attempting to call delegate. + The error should indicate that delegated tasks cannot spawn further delegations. + + **If the nested delegate succeeds, this is a CRITICAL FAILURE** - document it prominently. + + This validates the `SessionType::SubAgent` check that prevents recursive delegation. + + ### Sequential Delegate Chain Test + Create dependent delegates (one after another, not nested): + 1. First: `delegate(instructions: "Create a Python file called chain_test.py with a simple hello world function")` + 2. Second (after first completes): `delegate(instructions: "Analyze chain_test.py and describe its structure")` + 3. Third (after second completes): `delegate(instructions: "Run chain_test.py and report the output")` + + Each delegate runs independently but the tasks are sequentially dependent. + + Log results to: {{ workspace_dir }}/phase3_delegation.md {% endif %} - + {% if test_phases == "all" or "advanced" in test_phases %} ## ๐Ÿ”ฌ PHASE 4: Advanced Testing - + ### Error Boundary Testing 1. Create a file with an invalid path (should fail gracefully) 2. Run a non-existent shell command 3. Try to analyze a binary file 4. Test with extremely long filenames 5. Test with nested directory creation beyond limits - + ### Performance Measurement {% if test_depth == "deep" %} 1. Create and analyze a large file (>1MB) @@ -226,68 +334,72 @@ prompt: | 3. Track execution times for each operation 4. Monitor token usage if accessible {% endif %} - + ### Security Validation 1. Test input with special shell characters: $(echo test) 2. Attempt directory traversal: ../../../etc/passwd 3. Test with harmful Unicode characters 4. Verify command injection prevention - + Log results to: {{ workspace_dir }}/phase4_advanced.md {% endif %} - + ## ๐Ÿ“Š PHASE 5: Final Report Generation - + Create TWO reports: - + ### 1. Detailed Report at {{ workspace_dir }}/detailed_report.md Include all test details, logs, and technical information. - + ### 2. Executive Summary (REQUIRED - Display in Terminal) - + **IMPORTANT**: At the very end, generate and display a concise summary directly in the terminal: - + ``` ======================================== GOOSE SELF-TEST SUMMARY ======================================== - + โœ… OVERALL RESULT: [PASS/FAIL] - + ๐Ÿ“Š Quick Stats: โ€ข Tests Run: [X] - โ€ข Passed: [X] ([%]) + โ€ข Passed: [X] ([%]) โ€ข Failed: [X] ([%]) โ€ข Duration: [X minutes] - + โœ… Working Features: โ€ข File operations: [โœ“/โœ—] โ€ข Shell commands: [โœ“/โœ—] โ€ข Code analysis: [โœ“/โœ—] โ€ข Extensions: [โœ“/โœ—] - โ€ข Subagents: [โœ“/โœ—] - + โ€ข Load tool: [โœ“/โœ—] + โ€ข Delegate (sync): [โœ“/โœ—] + โ€ข Delegate (async): [โœ“/โœ—] + โ€ข Delegate cancellation: [โœ“/โœ—] + โ€ข Nested delegation blocked: [โœ“/โœ—] + โš ๏ธ Issues Found: โ€ข [Issue 1 - brief description] โ€ข [Issue 2 - brief description] - + ๐Ÿ’ก Key Insights: โ€ข [Most important finding] โ€ข [Performance observation] โ€ข [Recommendation] - + ๐Ÿ“ Full report: {{ workspace_dir }}/detailed_report.md ======================================== ``` - + This summary should be: - **Concise**: Under 30 lines - **Visual**: Use emojis and formatting for clarity - **Actionable**: Clear pass/fail status - **Informative**: Key findings at a glance - + Always end with this summary so users immediately see the results without digging through files. - + {% if cleanup_after == "true" %} ## ๐Ÿงน CLEANUP After report generation: @@ -295,16 +407,16 @@ prompt: | 2. Remove temporary test artifacts 3. Keep only the final report and logs {% endif %} - + ## ๐ŸŽฏ META-TESTING NOTES Remember: You are testing yourself. This is recursive validation where: - Success means your tools work as expected - Failure reveals areas needing attention - The ability to complete this test IS itself a test - Document everything - your future self (or another goose) will thank you - + Use your todo extension to track progress throughout. Handle errors gracefully - a failed test shouldn't crash the suite. Be thorough but efficient based on the test_depth parameter. - + This is true first-person integration testing. Execute with precision and document with clarity. diff --git a/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml b/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml index 7ea4c3a27ae5..f05d8fc7a4f0 100644 --- a/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml +++ b/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml @@ -17,6 +17,7 @@ prompt: | Run two subrecipes in parallel: - use file_stats subrecipe to gather file statistics for {{ target_directory }} - use code_patterns subrecipe to analyze code patterns in {{ target_directory }} + Iteratively `sleep 10` until the delegates complete, then load their output and confirm success extensions: - type: builtin name: developer diff --git a/scripts/test_providers.sh b/scripts/test_providers.sh index 971c06cc8da2..b8a302c3e257 100755 --- a/scripts/test_providers.sh +++ b/scripts/test_providers.sh @@ -21,8 +21,10 @@ done # These are typically preview/experimental models with inconsistent tool-calling behavior. # Failures are still reported but don't block PRs. ALLOWED_FAILURES=( + "google:gemini-2.5-flash" "google:gemini-3-pro-preview" "openrouter:nvidia/nemotron-3-nano-30b-a3b" + "openai:gpt-3.5-turbo" ) # Agentic providers handle tools internally and return text results. diff --git a/scripts/test_subrecipes.sh b/scripts/test_subrecipes.sh index a194689ff438..4e48f7496860 100755 --- a/scripts/test_subrecipes.sh +++ b/scripts/test_subrecipes.sh @@ -22,7 +22,7 @@ export PATH="$SCRIPT_DIR/target/debug:$PATH" # Set default provider and model if not already set # Use fast model for CI to speed up tests export GOOSE_PROVIDER="${GOOSE_PROVIDER:-anthropic}" -export GOOSE_MODEL="${GOOSE_MODEL:-claude-3-5-haiku-20241022}" +export GOOSE_MODEL="${GOOSE_MODEL:-claude-haiku-4-5}" echo "Using provider: $GOOSE_PROVIDER" echo "Using model: $GOOSE_MODEL" @@ -77,17 +77,17 @@ check_recipe_output() { local tmpfile=$1 local mode=$2 - # Check for unified subagent tool invocation (new format: "โ”€โ”€โ”€ subagent |") - if grep -q "โ”€โ”€โ”€ subagent" "$tmpfile"; then - echo "โœ“ SUCCESS: Subagent tool invoked" - RESULTS+=("โœ“ Subagent tool invocation ($mode)") + # Check for delegate tool invocation (new format: "โ”€โ”€โ”€ delegate |") + if grep -q "โ”€โ”€โ”€ delegate" "$tmpfile"; then + echo "โœ“ SUCCESS: Delegate tool invoked" + RESULTS+=("โœ“ Delegate tool invocation ($mode)") else - echo "โœ— FAILED: No evidence of subagent tool invocation" - RESULTS+=("โœ— Subagent tool invocation ($mode)") + echo "โœ— FAILED: No evidence of delegate tool invocation" + RESULTS+=("โœ— Delegate tool invocation ($mode)") fi - # Check that both subrecipes were called (shown as "subrecipe: " in output) - if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then + # Check that both subrecipes were called (shown as "source: " in delegate output) + if grep -q "source:.*file_stats\|source.*file_stats" "$tmpfile" && grep -q "source:.*code_patterns\|source.*code_patterns" "$tmpfile"; then echo "โœ“ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output" RESULTS+=("โœ“ Both subrecipes present ($mode)") else