From e66ec16febe5feb17281972327bac40460430d83 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 18 Nov 2025 11:39:53 +1100 Subject: [PATCH 1/2] remove autopilot experimental feature --- crates/goose/src/agents/agent.rs | 16 - crates/goose/src/agents/mod.rs | 1 - .../goose/src/agents/model_selector/README.md | 28 - .../src/agents/model_selector/autopilot.rs | 1366 ----------------- crates/goose/src/agents/model_selector/mod.rs | 1 - .../agents/model_selector/premade_roles.yaml | 181 --- .../docs/guides/multi-model/autopilot.md | 127 -- .../docs/guides/multi-model/creating-plans.md | 8 +- documentation/docs/tutorials/lead-worker.md | 6 +- 9 files changed, 4 insertions(+), 1730 deletions(-) delete mode 100644 crates/goose/src/agents/model_selector/README.md delete mode 100644 crates/goose/src/agents/model_selector/autopilot.rs delete mode 100644 crates/goose/src/agents/model_selector/mod.rs delete mode 100644 crates/goose/src/agents/model_selector/premade_roles.yaml delete mode 100644 documentation/docs/guides/multi-model/autopilot.md diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index d0551990b501..ed28ea8f1463 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -56,7 +56,6 @@ use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, instrument, warn}; use super::final_output_tool::FinalOutputTool; -use super::model_selector::autopilot::AutoPilot; use super::platform_tools; use super::tool_execution::{ToolCallResult, CHAT_MODE_TOOL_SKIPPED_RESPONSE, DECLINED_RESPONSE}; use crate::agents::subagent_task_config::TaskConfig; @@ -105,7 +104,6 @@ pub struct Agent { pub(super) scheduler_service: Mutex>>, pub(super) retry_manager: RetryManager, pub(super) tool_inspection_manager: ToolInspectionManager, - pub(super) autopilot: Mutex, } #[derive(Clone, Debug)] @@ -180,7 +178,6 @@ impl Agent { scheduler_service: Mutex::new(None), retry_manager: RetryManager::new(), tool_inspection_manager: Self::create_default_tool_inspection_manager(), - autopilot: Mutex::new(AutoPilot::new()), } } @@ -933,19 +930,6 @@ impl Agent { break; } - { - let mut autopilot = self.autopilot.lock().await; - if let Some((new_provider, role, model)) = autopilot.check_for_switch(&conversation, self.provider().await?).await? { - debug!("AutoPilot switching to {} role with model {}", role, model); - self.update_provider(new_provider).await?; - - yield AgentEvent::ModelChange { - model: model.clone(), - mode: format!("autopilot:{}", role), - }; - } - } - let conversation_with_moim = super::moim::inject_moim( conversation.clone(), &self.extension_manager, diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs index 9c80f1d64bf0..0a17c4b853ac 100644 --- a/crates/goose/src/agents/mod.rs +++ b/crates/goose/src/agents/mod.rs @@ -7,7 +7,6 @@ pub mod extension_manager_extension; pub mod final_output_tool; mod large_response_handler; pub mod mcp_client; -pub mod model_selector; pub mod moim; pub mod platform_tools; pub mod prompt_manager; diff --git a/crates/goose/src/agents/model_selector/README.md b/crates/goose/src/agents/model_selector/README.md deleted file mode 100644 index ac44ce70f615..000000000000 --- a/crates/goose/src/agents/model_selector/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Autopilot model selector - -This is an advanced feature (config of which may change, use with caution for now) -which lets goose automatically rotate through many providers and models based on rules that trigger as part of its work. - -Models can change at any time, and can help (similar to lead/worker) solve persistent issues, get an advanced plan, a second opinion or more. - -`premade_roles.yaml` are the out of the box configurations, which can be used in the `~/.config/goose/config.yaml` like so: - - -```yaml -x-advanced-models: -- provider: databricks - model: goose-gpt-5 - role: reviewer -- provider: anthropic - model: claude-opus-4-1-20250805 - role: deep-thinker -``` - -in this case, when there is some complex activity or planning or thining required, it will automatically switch to opus for a while, likewise when code changes have been made, it will use the reviewer model. - -## Use cases - -You can do a lead/worker like combo, or you can default to a low cost model and only in some cases use a frontier model. -You could default to a local model, and only intermittently switch when needed. - -use `--debug` flag if you want to see it logging when it changes. \ No newline at end of file diff --git a/crates/goose/src/agents/model_selector/autopilot.rs b/crates/goose/src/agents/model_selector/autopilot.rs deleted file mode 100644 index 253ddffec638..000000000000 --- a/crates/goose/src/agents/model_selector/autopilot.rs +++ /dev/null @@ -1,1366 +0,0 @@ -use anyhow::Result; -use once_cell::sync::Lazy; -use regex::Regex; -use serde::Deserialize; -use std::collections::HashMap; -use std::sync::Arc; -use tracing::{debug, warn}; - -use crate::config::Config; -use crate::conversation::message::MessageContent; -use crate::conversation::Conversation; -use crate::providers; - -// Embedded YAML content for pre-made roles -const PREMADE_ROLES_YAML: &str = include_str!("premade_roles.yaml"); - -#[derive(Debug, Clone, Default, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum MatchType { - #[default] - Any, - All, -} - -#[derive(Debug, Clone, Default, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum TriggerSource { - Human, // Only trigger on human messages - Machine, // Only trigger on machine-generated events - #[default] - Any, // Trigger on either -} - -#[derive(Debug, Clone, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ComplexityLevel { - Low, - Medium, - High, -} - -#[derive(Debug, Clone, Deserialize)] -pub struct TriggerRules { - /// Keywords to match in user messages - #[serde(default)] - pub keywords: Vec, - - /// How to match keywords - "any" or "all" - #[serde(default)] - pub match_type: MatchType, - - /// Trigger after a tool execution failure - #[serde(default)] - pub on_failure: bool, - - /// Trigger after any tool usage - #[serde(default)] - pub after_tool_use: bool, - - /// Trigger after N consecutive tool uses - #[serde(default)] - pub consecutive_tools: Option, - - /// Trigger after N consecutive failures - #[serde(default)] - pub consecutive_failures: Option, - - /// Trigger after N consecutive machine messages (no human input) - #[serde(default)] - pub machine_messages_without_human: Option, - - /// Trigger after N total tool calls since last human message - #[serde(default)] - pub tools_since_human: Option, - - /// Trigger after N messages since last human input - #[serde(default)] - pub messages_since_human: Option, - - /// Complexity analysis threshold - #[serde(default)] - pub complexity_threshold: Option, - - /// Trigger on the first turn of a conversation - #[serde(default)] - pub first_turn: bool, - - /// Source of trigger (human, machine, or any) - #[serde(default)] - pub source: TriggerSource, -} - -#[derive(Debug, Clone, Deserialize)] -pub struct Rules { - pub triggers: TriggerRules, - - /// Number of turns this model stays active once triggered - #[serde(default = "default_active_turns")] - pub active_turns: usize, - - /// Priority when multiple models match (higher = more important) - #[serde(default)] - pub priority: i32, -} - -fn default_active_turns() -> usize { - 5 -} - -#[derive(Debug, Clone, Deserialize)] -pub struct ModelConfig { - pub provider: String, - pub model: String, - pub role: String, - #[serde(default)] - pub rules: Option, // Optional - can inherit from premade -} - -#[derive(Debug, Clone, Deserialize)] -struct PremadeRole { - pub role: String, - pub rules: Rules, -} - -#[derive(Debug, Clone, Deserialize)] -struct PremadeRoles { - roles: Vec, -} - -// Complete model config with rules (after merging) -#[derive(Debug, Clone)] -struct CompleteModelConfig { - pub provider: String, - pub model: String, - pub role: String, - pub rules: Rules, -} - -/// Tracks the state of a specific model's usage -#[derive(Debug, Clone, Default)] -struct ModelState { - last_invoked_turn: Option, - invocation_count: usize, -} - -/// AutoPilot manages automatic model switching based on conversation context -pub struct AutoPilot { - model_configs: Vec, - model_states: HashMap, - original_provider: Option>, - switch_active: bool, - current_role: Option, -} - -impl AutoPilot { - /// Load pre-made role rules from embedded YAML - fn load_premade_rules() -> HashMap { - match serde_yaml::from_str::(PREMADE_ROLES_YAML) { - Ok(premade) => { - debug!("Loaded {} pre-made role rules", premade.roles.len()); - premade - .roles - .into_iter() - .map(|r| (r.role, r.rules)) - .collect() - } - Err(e) => { - warn!("Failed to load pre-made roles: {}", e); - HashMap::new() - } - } - } - - /// Merge user configs with pre-made rules - /// User must provide provider and model, but rules are optional (inherit from premade) - fn merge_configs( - premade_rules: HashMap, - user_configs: Vec, - ) -> Vec { - let mut complete_configs = Vec::new(); - - for user_config in user_configs { - // Get the rules - either from user config or premade - let rules = if let Some(user_rules) = user_config.rules { - // User provided custom rules for this role - user_rules - } else if let Some(premade_rules) = premade_rules.get(&user_config.role) { - // Use premade rules for this role - premade_rules.clone() - } else { - // No premade rules and no user rules - skip this config - warn!( - "No rules found for role '{}' - neither in user config nor premade. Skipping.", - user_config.role - ); - continue; - }; - - complete_configs.push(CompleteModelConfig { - provider: user_config.provider, - model: user_config.model, - role: user_config.role, - rules, - }); - } - - complete_configs - } - - /// Create a new AutoPilot instance, loading model configurations from config - pub fn new() -> Self { - let config = Config::global(); - - // Load pre-made role rules - let premade_rules = Self::load_premade_rules(); - - // Try to load user models configuration from config.yaml - let user_models: Vec = config - .get_param("x-advanced-models") - .unwrap_or_else(|_| Vec::new()); - - // Merge configs - user provides provider/model, rules come from premade or user override - let models = Self::merge_configs(premade_rules, user_models); - - let mut model_states = HashMap::new(); - for model in &models { - model_states.insert(model.role.clone(), ModelState::default()); - } - - if !models.is_empty() { - debug!( - "AutoPilot initialized with {} model configurations", - models.len() - ); - for model in &models { - debug!( - "Role '{}': {}/{} (priority: {})", - model.role, model.provider, model.model, model.rules.priority - ); - } - } else { - debug!("AutoPilot: No model configurations found in config"); - } - - Self { - model_configs: models, - model_states, - original_provider: None, - switch_active: false, - current_role: None, - } - } -} - -impl Default for AutoPilot { - fn default() -> Self { - Self::new() - } -} - -impl AutoPilot { - /// Count the current turn number (number of user messages) - fn count_turns(&self, conversation: &Conversation) -> usize { - conversation - .messages() - .iter() - .filter(|msg| msg.role == rmcp::model::Role::User) - .count() - } - - /// Check if keywords match based on match_type - fn check_keywords(text: &str, keywords: &[String], match_type: &MatchType) -> bool { - if keywords.is_empty() { - return false; - } - - let text_lower = text.to_lowercase(); - match match_type { - MatchType::Any => keywords - .iter() - .any(|kw| text_lower.contains(&kw.to_lowercase())), - MatchType::All => keywords - .iter() - .all(|kw| text_lower.contains(&kw.to_lowercase())), - } - } - - /// Score the complexity of a paragraph/sentence as Low / Medium / High. - /// This uses a variety of simple (but known) fast algorithms. - /// Looks like generated code, only partly is, mic did work over it. - /// It appears complex, but the idea is to have a fast way to know if some body of text is hard to read or complex in any way. - /// - /// Algorithms included: - /// - **Flesch Reading Ease (FRE)** → higher = simpler - /// - **Flesch–Kincaid Grade Level (FKGL)** → higher = harder - /// - **Gunning Fog Index (FOG)** → higher = harder - /// - **Coleman–Liau Index (CLI)** → higher = harder - /// - **Automated Readability Index (ARI)** → higher = harder - /// - **LIX (Läsbarhetsindex)** → higher = harder - /// - /// some features layered on top of the formulas: - /// - **Long-word ratio** (>6 letters): jargon proxy → penalizes if high - /// - **Clause density** (commas, semicolons, parentheses per sentence): proxy for syntactic load → penalizes if high - /// - **Instructional boost**: if sentences are short, long-word ratio is low, and clauses are few, give a small positive bump (to better classify "simple instruction" style text) - /// - /// The formulas are normalized into a 0–100 "simplicity" scale, then blended with weights. - /// Heuristic penalties/bonuses are applied, and the final result is bucketed in to the following - /// >70 = Low (simple), 40–70 = Medium, <40 = High (complex). - pub fn analyze_complexity(text: &str) -> ComplexityLevel { - // --- tokenization --- - static RE_WORD: Lazy = - Lazy::new(|| Regex::new(r"[A-Za-z]+(?:'[A-Za-z]+)?").unwrap()); - static RE_SENT: Lazy = Lazy::new(|| Regex::new(r"[.!?]+").unwrap()); - static RE_CLAUSE: Lazy = Lazy::new(|| Regex::new(r"[,:;()—-]").unwrap()); - - let words: Vec<&str> = RE_WORD.find_iter(text).map(|m| m.as_str()).collect(); - let w = words.len().max(1); - - // Automatically classify anything less than 4 words as Low complexity - if w < 4 { - return ComplexityLevel::Low; - } - let s = RE_SENT.find_iter(text).count().max(1); - - let letters = text.chars().filter(|c| c.is_alphabetic()).count(); - let chars_no_space = text.chars().filter(|c| !c.is_whitespace()).count(); - let clauses = RE_CLAUSE.find_iter(text).count(); - - // syllable, long-word, polysyllable counts - let mut syl = 0usize; - let mut polys = 0usize; - let mut longw = 0usize; - for &wd in &words { - let sy = Self::syllables(wd); - syl += sy; - if sy >= 3 { - polys += 1; - } - if wd.len() > 6 { - longw += 1; - } - } - - // --- readability formulas --- - let avg_wps = w as f32 / s as f32; // words per sentence - let avg_syl = syl as f32 / w as f32; - - // 1. Flesch Reading Ease (FRE) - let fre = 206.835 - 1.015 * avg_wps - 84.6 * avg_syl; - - // 2. Flesch–Kincaid Grade Level (FKGL) - let fkgl = 0.39 * avg_wps + 11.8 * avg_syl - 15.59; - - // 3. Gunning Fog Index - let fog = 0.4 * (avg_wps + 100.0 * (polys as f32 / w as f32)); - - // 4. Coleman–Liau Index (CLI) - let cli = { - let l = 100.0 * (letters as f32 / w as f32); - let s100 = 100.0 * (s as f32 / w as f32); - 0.0588 * l - 0.296 * s100 - 15.8 - }; - - // 5. Automated Readability Index (ARI) - let ari = 4.71 * (chars_no_space as f32 / w as f32) + 0.5 * avg_wps - 21.43; - - // 6. LIX (Läsbarhetsindex) - let lix = avg_wps + 100.0 * (longw as f32 / w as f32); - - // --- normalize into 0..100 simplicity --- - let clamp01 = |x: f32| x.clamp(0.0, 1.0); - let inv_grade = |g: f32| 100.0 * (1.0 - clamp01(g / 18.0)); // 0 grade→100 simple, 18+→0 - let f_fre = 100.0 * clamp01(fre / 100.0); - let f_fkgl = inv_grade(fkgl); - let f_fog = inv_grade(fog); - let f_cli = inv_grade(cli); - let f_ari = inv_grade(ari); - let f_lix = 100.0 * (1.0 - clamp01((lix - 20.0) / 40.0)); // LIX 20..60 → 100..0 - - // Weighted blend of formulas (tuned weights, sum < 1.0) - let mut simplicity = 0.30 * f_fre - + 0.16 * f_fkgl - + 0.12 * f_fog - + 0.10 * f_cli - + 0.07 * f_ari - + 0.08 * f_lix; - - // --- heuristic adjustments --- - let long_ratio = longw as f32 / w as f32; - let clause_density = clauses as f32 / s as f32; - - // Penalty for jargon-ish long words (up to -20) - simplicity -= (long_ratio * 20.0).min(20.0); - - // Penalty for heavy clause punctuation (up to -15 when clauses/sentence ≳ 3) - simplicity -= ((clause_density / 3.0) * 15.0).min(15.0); - - // Boost if text looks like simple instructions: - // short sentences, few long words, low clause punctuation - if avg_wps < 14.0 && long_ratio < 0.12 && clause_density < 0.8 { - simplicity += 5.0; - } - - // --- final bucketing --- - let score = simplicity.clamp(0.0, 100.0); - if score > 70.0 { - ComplexityLevel::Low - } else if score >= 40.0 { - ComplexityLevel::Medium - } else { - ComplexityLevel::High - } - } - - /// Tiny syllable guesser (used by FRE, FKGL, Fog) - fn syllables(word: &str) -> usize { - let w = word.to_lowercase(); - let mut count = 0usize; - let mut prev_v = false; - for c in w.chars() { - let v = matches!(c, 'a' | 'e' | 'i' | 'o' | 'u' | 'y'); - if v && !prev_v { - count += 1; - } - prev_v = v; - } - if w.ends_with('e') && count > 1 { - count -= 1; - } - count.max(1) - } - - /// Check if the trigger source matches the last message - fn check_source(&self, conversation: &Conversation, source: &TriggerSource) -> bool { - let last_msg = conversation.messages().last(); - - match source { - TriggerSource::Human => { - // Check if the last message is from a human - last_msg.is_some_and(|msg| msg.role == rmcp::model::Role::User) - } - TriggerSource::Machine => { - // Check if the last message is from the assistant - last_msg.is_some_and(|msg| msg.role == rmcp::model::Role::Assistant) - } - TriggerSource::Any => true, - } - } - - /// Count consecutive tool uses at the end of the conversation - fn count_consecutive_tools(&self, conversation: &Conversation) -> usize { - let messages = conversation.messages(); - let mut count = 0; - - // Work backwards through assistant messages - for msg in messages.iter().rev() { - if msg.role != rmcp::model::Role::Assistant { - continue; - } - - let has_tool = msg - .content - .iter() - .any(|content| matches!(content, MessageContent::ToolRequest(_))); - - if has_tool { - count += 1; - } else { - break; // Stop at first non-tool message - } - } - - count - } - - /// Count consecutive tool failures - fn count_consecutive_failures(&self, conversation: &Conversation) -> usize { - let messages = conversation.messages(); - let mut count = 0; - - // Work backwards looking for tool responses - for msg in messages.iter().rev() { - let has_failure = msg.content.iter().any(|content| { - if let MessageContent::ToolResponse(response) = content { - response.tool_result.is_err() - } else { - false - } - }); - - if has_failure { - count += 1; - } else if msg - .content - .iter() - .any(|c| matches!(c, MessageContent::ToolResponse(_))) - { - // Found a successful tool response, stop counting - break; - } - } - - count - } - - /// Count messages since last human input - fn count_messages_since_human(&self, conversation: &Conversation) -> usize { - let messages = conversation.messages(); - let mut count = 0; - - // Work backwards counting messages until we find a User message - for msg in messages.iter().rev() { - if msg.role == rmcp::model::Role::User { - break; - } - count += 1; - } - - count - } - - /// Count tool calls since last human message - fn count_tools_since_human(&self, conversation: &Conversation) -> usize { - let messages = conversation.messages(); - let mut tool_count = 0; - - // Work backwards counting tool requests until we find a User message - for msg in messages.iter().rev() { - if msg.role == rmcp::model::Role::User { - break; - } - - // Count tool requests in this message - tool_count += msg - .content - .iter() - .filter(|content| matches!(content, MessageContent::ToolRequest(_))) - .count(); - } - - tool_count - } - - /// Count consecutive machine messages (assistant messages without human interruption) - fn count_machine_messages_without_human(&self, conversation: &Conversation) -> usize { - let messages = conversation.messages(); - let mut count = 0; - - // Work backwards counting assistant messages until we find a user message - for msg in messages.iter().rev() { - match msg.role { - rmcp::model::Role::User => break, - rmcp::model::Role::Assistant => count += 1, - } - } - - count - } - - /// Check if there was a recent tool failure - fn check_recent_failure(&self, conversation: &Conversation) -> bool { - // Look for actual tool failures in recent messages - conversation - .messages() - .iter() - .rev() - .take(3) // Check last 3 messages - .any(|msg| { - msg.content.iter().any(|content| { - if let MessageContent::ToolResponse(response) = content { - response.tool_result.is_err() - } else { - false - } - }) - }) - } - - /// Evaluate if a model's rules are satisfied - fn evaluate_rules( - &self, - model: &CompleteModelConfig, - conversation: &Conversation, - current_turn: usize, - ) -> bool { - if !self.check_source(conversation, &model.rules.triggers.source) { - return false; - } - - let triggers = &model.rules.triggers; - let mut triggered = false; - - if triggers.first_turn && current_turn == 1 { - debug!("AutoPilot: '{}' role triggering on first turn", model.role); - triggered = true; - } - - if !triggers.keywords.is_empty() { - if let Some(text) = conversation - .messages() - .iter() - .rev() - .find(|msg| msg.role == rmcp::model::Role::User) - .and_then(|msg| msg.content.first()) - .and_then(|content| content.as_text()) - { - if Self::check_keywords(text, &triggers.keywords, &triggers.match_type) { - triggered = true; - } - } - } - - if triggers.on_failure && self.check_recent_failure(conversation) { - triggered = true; - } - - if let Some(threshold) = triggers.consecutive_failures { - if self.count_consecutive_failures(conversation) >= threshold { - triggered = true; - } - } - - if triggers.after_tool_use { - let has_recent_tool = conversation - .messages() - .iter() - .rev() - .find(|msg| msg.role == rmcp::model::Role::Assistant) - .map(|msg| { - msg.content - .iter() - .any(|content| matches!(content, MessageContent::ToolRequest(_))) - }) - .unwrap_or(false); - - if has_recent_tool { - triggered = true; - } - } - - if let Some(threshold) = triggers.consecutive_tools { - if self.count_consecutive_tools(conversation) >= threshold { - triggered = true; - } - } - - if let Some(threshold) = triggers.machine_messages_without_human { - if self.count_machine_messages_without_human(conversation) >= threshold { - triggered = true; - } - } - - if let Some(threshold) = triggers.tools_since_human { - if self.count_tools_since_human(conversation) >= threshold { - triggered = true; - } - } - - if let Some(threshold) = triggers.messages_since_human { - if self.count_messages_since_human(conversation) >= threshold { - triggered = true; - } - } - - if let Some(ref threshold) = triggers.complexity_threshold { - if let Some(text) = conversation - .messages() - .iter() - .rev() - .find(|msg| msg.role == rmcp::model::Role::User) - .and_then(|msg| msg.content.first()) - .and_then(|content| content.as_text()) - { - let complexity = Self::analyze_complexity(text); - - matches!( - (threshold, complexity), - (ComplexityLevel::Low, ComplexityLevel::Medium) - | (ComplexityLevel::Low, ComplexityLevel::High) - | (ComplexityLevel::Medium, ComplexityLevel::Medium) - | (ComplexityLevel::Medium, ComplexityLevel::High) - | (ComplexityLevel::High, ComplexityLevel::High) - ); - } - } - - triggered - } - - /// Check if a model switch should occur based on the conversation - /// Returns Some((provider, role, model)) if a switch should happen, None otherwise - pub async fn check_for_switch( - &mut self, - conversation: &Conversation, - current_provider: Arc, - ) -> Result, String, String)>> { - debug!("AutoPilot: Checking conversation for model switch"); - - let current_turn = self.count_turns(conversation); - - // If we already switched, evaluate if we should switch to a different model - // (including potentially switching back to original eg when turns are done) - if self.switch_active { - debug!( - "AutoPilot: Currently switched to '{}', evaluating alternatives", - self.current_role.as_deref().unwrap_or("unknown") - ); - - let should_switch = self.should_switch_from_current(conversation, current_turn); - - if let Some((new_provider, new_role, new_model)) = should_switch? { - debug!( - "AutoPilot: Switching from '{}' to '{}'", - self.current_role.as_deref().unwrap_or("unknown"), - new_role - ); - - if new_role == "original" { - self.switch_active = false; - self.current_role = None; - self.original_provider = None; - } else { - self.current_role = Some(new_role.clone()); - } - - return Ok(Some((new_provider, new_role, new_model))); - } - return Ok(None); - } - - // Evaluate all models to use based on the rules - // Get candidates and find the best match, if any, to switch to - let mut candidates: Vec<(&CompleteModelConfig, i32)> = Vec::new(); - - for model in &self.model_configs { - if self.evaluate_rules(model, conversation, current_turn) { - candidates.push((model, model.rules.priority)); - } - } - - candidates.sort_by_key(|(_, priority)| -priority); - - if let Some((best_model, priority)) = candidates.first() { - debug!( - "AutoPilot: Switching to '{}' role with {} model {} (priority: {})", - best_model.role, best_model.provider, best_model.model, priority - ); - - let state = self.model_states.get_mut(&best_model.role).unwrap(); - state.last_invoked_turn = Some(current_turn); - state.invocation_count += 1; - - self.original_provider = Some(current_provider); - self.switch_active = true; - self.current_role = Some(best_model.role.clone()); - - let model = crate::model::ModelConfig::new_or_fail(&best_model.model); - let new_provider = providers::create(&best_model.provider, model).await?; - - return Ok(Some(( - new_provider, - best_model.role.clone(), - best_model.model.clone(), - ))); - } - - Ok(None) - } - - /// Determine if we should switch from the current model to another (including back to original) - #[allow(clippy::type_complexity)] - fn should_switch_from_current( - &self, - _conversation: &Conversation, - current_turn: usize, - ) -> Result, String, String)>> { - // Strategy: Stay in the current role until its cooldown period has elapsed - // This ensures the specialized model gets to complete its work - - let current_role = self.current_role.as_ref().unwrap(); - let current_model = self.model_configs.iter().find(|m| &m.role == current_role); - let current_state = &self.model_states[current_role]; - - if let (Some(current_model), Some(last_invoked_turn)) = - (current_model, current_state.last_invoked_turn) - { - let turns_since_invoked = current_turn.saturating_sub(last_invoked_turn); - - debug!("AutoPilot: Current model '{}' invoked at turn {}, current turn {}, turns since: {}, active_turns: {}", - current_role, last_invoked_turn, current_turn, turns_since_invoked, current_model.rules.active_turns); - - // If we're still within the active period, stay with current model - if turns_since_invoked < current_model.rules.active_turns { - debug!( - "AutoPilot: Still within active period for '{}', staying", - current_role - ); - return Ok(None); - } - - // Active period has elapsed, switch back to original - debug!( - "AutoPilot: Active period elapsed for '{}', switching back to original", - current_role - ); - if let Some(original) = &self.original_provider { - let original_model = original.get_active_model_name(); - return Ok(Some(( - Arc::clone(original), - "original".to_string(), - original_model, - ))); - } - } - - // Fallback: if we can't determine the state, switch back to original - debug!("AutoPilot: Unable to determine current model state, switching back to original"); - if let Some(original) = &self.original_provider { - let original_model = original.get_active_model_name(); - return Ok(Some(( - Arc::clone(original), - "original".to_string(), - original_model, - ))); - } - - Ok(None) - } - - /// Check if autopilot is currently in a switched state - #[allow(dead_code)] - pub fn is_switched(&self) -> bool { - self.switch_active - } - - /// Get the current role if switched - #[allow(dead_code)] - pub fn current_role(&self) -> Option<&str> { - self.current_role.as_deref() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::conversation::message::Message; - use rmcp::model::{Content, ErrorCode}; - use rmcp::ErrorData; - use std::borrow::Cow; - - fn create_test_configs() -> Vec { - vec![ - CompleteModelConfig { - provider: "openai".to_string(), - model: "o1-preview".to_string(), - role: "thinker".to_string(), - rules: Rules { - triggers: TriggerRules { - keywords: vec!["think".to_string(), "analyze".to_string()], - match_type: MatchType::Any, - on_failure: false, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: None, - complexity_threshold: None, - source: TriggerSource::Human, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - first_turn: false, - }, - active_turns: 0, - priority: 10, - }, - }, - CompleteModelConfig { - provider: "anthropic".to_string(), - model: "claude-sonnet-4-20250514".to_string(), - role: "helper".to_string(), - rules: Rules { - triggers: TriggerRules { - keywords: vec!["help".to_string()], - match_type: MatchType::Any, - on_failure: true, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: None, - complexity_threshold: None, - source: TriggerSource::Any, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - first_turn: false, - }, - active_turns: 5, - priority: 5, - }, - }, - CompleteModelConfig { - provider: "openai".to_string(), - model: "gpt-4o".to_string(), - role: "recovery".to_string(), - rules: Rules { - triggers: TriggerRules { - keywords: vec![], - match_type: MatchType::Any, - on_failure: false, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: Some(2), - complexity_threshold: None, - source: TriggerSource::Machine, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - first_turn: false, - }, - active_turns: 10, - priority: 20, - }, - }, - ] - } - - #[test] - fn test_keyword_matching_any() { - let keywords = vec!["think".to_string(), "analyze".to_string()]; - assert!(AutoPilot::check_keywords( - "I need to think about this", - &keywords, - &MatchType::Any - )); - assert!(AutoPilot::check_keywords( - "Please analyze the data", - &keywords, - &MatchType::Any - )); - assert!(!AutoPilot::check_keywords( - "Just do it", - &keywords, - &MatchType::Any - )); - } - - #[test] - fn test_complexity() { - // Test <4 words rule - assert!(matches!( - AutoPilot::analyze_complexity("Hello"), - ComplexityLevel::Low - )); - - // Test complex text - let complex_text = "I need help understanding this extremely complex distributed system architecture. \ - How does the authentication and authorization flow work across multiple microservices? \ - What are the security implications of our current design? Can you explain the database schema in detail? \ - Also, I'm seeing various errors in the production logs and need to debug the API endpoints systematically. \ - The performance seems significantly degraded and I'm wondering if we need to optimize the database queries. \ - Additionally, there are concerns about scalability and high availability. \ - Can you review the caching strategy and suggest improvements? \ - We also need to consider the disaster recovery plan and backup procedures. \ - What monitoring and alerting mechanisms should we implement? \ - How can we ensure data consistency across services? \ - Please provide detailed recommendations for each area."; - - assert!(matches!( - AutoPilot::analyze_complexity(complex_text), - ComplexityLevel::High - )); - } - - #[test] - fn test_keyword_matching_all() { - let keywords = vec!["think".to_string(), "analyze".to_string()]; - assert!(AutoPilot::check_keywords( - "Think about and analyze this problem", - &keywords, - &MatchType::All - )); - assert!(!AutoPilot::check_keywords( - "Just think about it", - &keywords, - &MatchType::All - )); - } - - #[test] - fn test_complexity_analysis() { - assert!(matches!( - AutoPilot::analyze_complexity("Hello"), - ComplexityLevel::Low - )); - assert!(matches!( - AutoPilot::analyze_complexity("Yes please"), - ComplexityLevel::Low - )); - assert!(matches!( - AutoPilot::analyze_complexity("No thank you"), - ComplexityLevel::Low - )); - - // Medium complexity - 50+ words with questions - let medium_text = "Can you help me understand how this complex system works? \ - I need detailed information about the implementation. \ - There are several components that interact with each other. \ - What are the main design patterns used? \ - How does the data flow through the system? \ - Can you also explain the error handling approach?"; - assert!(matches!( - AutoPilot::analyze_complexity(medium_text), - ComplexityLevel::Medium - )); - - // High complexity - Very long text with multiple questions - let complex_text = "I need help understanding this extremely complex distributed system architecture. \ - How does the authentication and authorization flow work across multiple microservices? \ - What are the security implications of our current design? Can you explain the database schema in detail? \ - Also, I'm seeing various errors in the production logs and need to debug the API endpoints systematically. \ - The performance seems significantly degraded and I'm wondering if we need to optimize the database queries. \ - Additionally, there are concerns about scalability and high availability. \ - Can you review the caching strategy and suggest improvements? \ - We also need to consider the disaster recovery plan and backup procedures. \ - What monitoring and alerting mechanisms should we implement? \ - How can we ensure data consistency across services? \ - Please provide detailed recommendations for each area."; - // This should definitely be high complexity with 100+ words and many questions - let complexity = AutoPilot::analyze_complexity(complex_text); - assert!(matches!( - complexity, - ComplexityLevel::High | ComplexityLevel::Medium - )); - } - - #[test] - fn test_source_filtering() { - let mut autopilot = AutoPilot { - model_configs: create_test_configs(), - model_states: HashMap::new(), - original_provider: None, - switch_active: false, - current_role: None, - }; - - // Initialize states - for model in &autopilot.model_configs { - autopilot - .model_states - .insert(model.role.clone(), ModelState::default()); - } - - // Test human source - should trigger "thinker" - let user_msg = Message::user().with_text("I need to think about this"); - let conversation = Conversation::new(vec![user_msg]).unwrap(); - - let thinker_model = &autopilot.model_configs[0]; - assert!(autopilot.evaluate_rules(thinker_model, &conversation, 1)); - - // Test machine source filtering - // Human message as last - should NOT match Machine source filter - let human_conversation = - Conversation::new(vec![Message::user().with_text("test")]).unwrap(); - assert!(!autopilot.check_source(&human_conversation, &TriggerSource::Machine)); - - // Assistant message as last - should match Machine source filter - // Use new_unvalidated since a conversation ending with assistant is technically invalid - let machine_conversation = Conversation::new_unvalidated(vec![ - Message::user().with_text("test"), - Message::assistant().with_text("response"), - ]); - assert!(autopilot.check_source(&machine_conversation, &TriggerSource::Machine)); - } - - #[test] - fn test_active_turns_mechanism() { - let mut autopilot = AutoPilot { - model_configs: create_test_configs(), - model_states: HashMap::new(), - original_provider: None, - switch_active: false, - current_role: None, - }; - - // Initialize states - for model in &autopilot.model_configs { - autopilot - .model_states - .insert(model.role.clone(), ModelState::default()); - } - - // Create a conversation with "help" keyword - let message = Message::user().with_text("I need help"); - let conversation = Conversation::new(vec![message]).unwrap(); - - // The helper model should trigger based on keyword matching - let model = &autopilot.model_configs[1]; // helper model - assert!(autopilot.evaluate_rules(model, &conversation, 6)); - - // Test the active turns logic directly in should_switch_from_current - autopilot.switch_active = true; - autopilot.current_role = Some("helper".to_string()); - autopilot - .model_states - .get_mut("helper") - .unwrap() - .last_invoked_turn = Some(5); - - // At turn 6 (within active period of 5 turns), should stay - // Since we don't have an original provider, it should return None (stay) - let result = autopilot.should_switch_from_current(&conversation, 6); - assert!(result.unwrap().is_none()); // Should stay with current model - - // At turn 11 (active period elapsed), should try to switch back but fail without provider - let result = autopilot.should_switch_from_current(&conversation, 11); - assert!(result.unwrap().is_none()); // No original provider, so can't switch back - } - - #[test] - fn test_consecutive_failures_trigger() { - let autopilot = AutoPilot { - model_configs: create_test_configs(), - model_states: HashMap::new(), - original_provider: None, - switch_active: false, - current_role: None, - }; - - // Create messages with consecutive failures - // Simulate a pattern where we have tool responses that failed - // The count_consecutive_failures function looks at tool responses in messages - - // Mock data - can't actually test this properly without real tool responses in the conversation - // Since tool responses are part of the message content, not separate messages - // This test would need a different approach or mock conversation - - // For now, just test the counting logic works with empty conversation - let messages = vec![ - Message::user().with_text("do something"), - Message::assistant().with_text("I'll try"), - ]; - - let conversation = Conversation::new_unvalidated(messages); - - // Should detect 0 failures in this simple conversation - assert_eq!(autopilot.count_consecutive_failures(&conversation), 0); - } - - #[test] - fn test_premade_rules_loading() { - // This tests that pre-made role rules can be loaded - let premade = AutoPilot::load_premade_rules(); - assert!(!premade.is_empty()); - - // Check that specific roles exist - assert!(premade.contains_key("deep-thinker")); - assert!(premade.contains_key("debugger")); - assert!(premade.contains_key("coder")); - assert!(premade.contains_key("second-opinion")); - } - - #[test] - fn test_config_merging() { - let mut premade_rules = HashMap::new(); - premade_rules.insert( - "helper".to_string(), - Rules { - triggers: TriggerRules::default(), - active_turns: 5, - priority: 5, - }, - ); - - // User config with custom rules - let user_with_rules = vec![ModelConfig { - provider: "anthropic".to_string(), - model: "claude".to_string(), - role: "helper".to_string(), - rules: Some(Rules { - triggers: TriggerRules::default(), - active_turns: 3, - priority: 10, - }), - }]; - - let merged = AutoPilot::merge_configs(premade_rules.clone(), user_with_rules); - assert_eq!(merged.len(), 1); - assert_eq!(merged[0].provider, "anthropic"); - assert_eq!(merged[0].rules.priority, 10); // User rules override - - // User config without rules (inherit from premade) - let user_without_rules = vec![ModelConfig { - provider: "openai".to_string(), - model: "gpt-4".to_string(), - role: "helper".to_string(), - rules: None, // No rules, should inherit from premade - }]; - - let merged = AutoPilot::merge_configs(premade_rules, user_without_rules); - assert_eq!(merged.len(), 1); - assert_eq!(merged[0].provider, "openai"); - assert_eq!(merged[0].rules.priority, 5); // Inherited from premade - } - - #[test] - fn test_first_turn_trigger() { - let mut autopilot = AutoPilot { - model_configs: vec![ - CompleteModelConfig { - provider: "openai".to_string(), - model: "o1-preview".to_string(), - role: "lead".to_string(), - rules: Rules { - triggers: TriggerRules { - keywords: vec![], - match_type: MatchType::Any, - on_failure: false, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: Some(2), - complexity_threshold: None, - first_turn: true, // This should trigger on first turn - source: TriggerSource::Any, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - }, - active_turns: 3, - priority: 30, - }, - }, - CompleteModelConfig { - provider: "anthropic".to_string(), - model: "claude-sonnet-4-20250514".to_string(), - role: "helper".to_string(), - rules: Rules { - triggers: TriggerRules { - keywords: vec!["help".to_string()], - match_type: MatchType::Any, - on_failure: false, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: None, - complexity_threshold: None, - first_turn: false, // This should NOT trigger on first turn - source: TriggerSource::Any, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - }, - active_turns: 5, - priority: 5, - }, - }, - ], - model_states: HashMap::new(), - original_provider: None, - switch_active: false, - current_role: None, - }; - - // Initialize states - for model in &autopilot.model_configs { - autopilot - .model_states - .insert(model.role.clone(), ModelState::default()); - } - - // Test first turn - only "lead" role should trigger - let first_message = Message::user().with_text("Hello, this is the first message"); - let conversation = Conversation::new(vec![first_message]).unwrap(); - - let lead_model = &autopilot.model_configs[0]; // lead model - let helper_model = &autopilot.model_configs[1]; // helper model - - // Lead model should trigger on first turn (current_turn = 1) - assert!(autopilot.evaluate_rules(lead_model, &conversation, 1)); - - // Helper model should NOT trigger on first turn (no first_turn: true and no "help" keyword) - assert!(!autopilot.evaluate_rules(helper_model, &conversation, 1)); - - // Test second turn - lead should NOT trigger on first_turn anymore - let second_message = Message::user().with_text("This is the second message"); - let conversation_turn2 = Conversation::new(vec![ - Message::user().with_text("Hello, this is the first message"), - Message::assistant().with_text("Hello! How can I help you?"), - second_message, - ]) - .unwrap(); - - // Lead model should NOT trigger on second turn (current_turn = 2, first_turn only works on turn 1) - assert!(!autopilot.evaluate_rules(lead_model, &conversation_turn2, 2)); - - // Test that helper model can still trigger on keyword even on first turn - let help_message = Message::user().with_text("I need help with something"); - let help_conversation = Conversation::new(vec![help_message]).unwrap(); - - // Helper model should trigger on "help" keyword, even on first turn - assert!(autopilot.evaluate_rules(helper_model, &help_conversation, 1)); - } - - #[test] - fn test_tool_failure_detection() { - let autopilot = AutoPilot { - model_configs: create_test_configs(), - model_states: HashMap::new(), - original_provider: None, - switch_active: false, - current_role: None, - }; - - // Create a conversation with a tool failure - let messages = vec![ - Message::user().with_text("test"), - Message::user().with_tool_response( - "test_tool", - Err(ErrorData { - code: ErrorCode(-32000), - message: Cow::Borrowed("Tool execution failed"), - data: None, - }), - ), - Message::assistant().with_text("The tool failed"), - ]; - - let conversation = Conversation::new_unvalidated(messages); - assert!(autopilot.check_recent_failure(&conversation)); - - // Test with successful tool response - let success_messages = vec![ - Message::user().with_text("test"), - Message::user().with_tool_response("test_tool", Ok(vec![Content::text("Success!")])), - Message::assistant().with_text("The tool succeeded"), - ]; - - let success_conversation = Conversation::new_unvalidated(success_messages); - assert!(!autopilot.check_recent_failure(&success_conversation)); - - // Create a conversation without tool failures - let messages = vec![ - Message::user().with_text("test"), - Message::assistant().with_text("Let me help"), - ]; - - let conversation = Conversation::new_unvalidated(messages); - // Should not detect any failures - assert!(!autopilot.check_recent_failure(&conversation)); - } - - impl TriggerRules { - fn default() -> Self { - Self { - keywords: vec![], - match_type: MatchType::Any, - on_failure: false, - after_tool_use: false, - consecutive_tools: None, - consecutive_failures: None, - machine_messages_without_human: None, - tools_since_human: None, - messages_since_human: None, - complexity_threshold: None, - first_turn: false, - source: TriggerSource::Any, - } - } - } -} diff --git a/crates/goose/src/agents/model_selector/mod.rs b/crates/goose/src/agents/model_selector/mod.rs deleted file mode 100644 index 97e6ef30e490..000000000000 --- a/crates/goose/src/agents/model_selector/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod autopilot; diff --git a/crates/goose/src/agents/model_selector/premade_roles.yaml b/crates/goose/src/agents/model_selector/premade_roles.yaml deleted file mode 100644 index 22a953ea327d..000000000000 --- a/crates/goose/src/agents/model_selector/premade_roles.yaml +++ /dev/null @@ -1,181 +0,0 @@ -# Pre-made AutoPilot roles with default rules -# These define the default behaviors for common roles -# Users must specify the provider and model in their config.yaml - -roles: - # Lead model - high-capability model for initial turns and failure recovery - - role: "lead" - rules: - triggers: - # Triggers at conversation start AND on consecutive failures - first_turn: true # Trigger on first turn - consecutive_failures: 2 # Same as GOOSE_LEAD_FAILURE_THRESHOLD default - source: "any" # Can trigger on both human (start) and machine (failures) - active_turns: 3 # Same as GOOSE_LEAD_TURNS default (initial) and GOOSE_LEAD_FALLBACK_TURNS for failures - priority: 30 # Highest priority to ensure it always triggers first - - - role: "second-opinion" - rules: - triggers: - keywords: ["help"] - match_type: "any" - after_tool_use: true - source: "human" - active_turns: 5 - priority: 5 - - # Deep reasoning and analysis - - role: "deep-thinker" - rules: - triggers: - keywords: ["think", "reason", "analyze", "explain why", "how does", "what if"] - match_type: "any" - complexity_threshold: "high" - source: "human" # Only trigger on human messages - active_turns: 3 - priority: 10 - - # Consult the oracle - - role: "oracle" - rules: - triggers: - keywords: ["think", "reason", "analyze", "explain why", "what if"] - match_type: "any" - complexity_threshold: "medium" - source: "human" # Only trigger on human messages - active_turns: 5 - priority: 15 - - # Consult the planner - - role: "planner" - rules: - triggers: - keywords: ["think", "plan", "help me", "look at", "consider"] - match_type: "any" - complexity_threshold: "low" - source: "any" # Only trigger on human messages - active_turns: 3 - priority: 5 - - - - # Code debugging and error recovery - - role: "debugger" - rules: - triggers: - keywords: ["error", "bug", "broken", "failed", "exception"] - match_type: "any" - on_failure: true - source: "any" # Can trigger on both human and machine failures - active_turns: 2 - priority: 15 # High priority for error handling - - # Code implementation specialist - - role: "coder" - rules: - triggers: - keywords: ["implement", "code", "function", "class", "refactor", "optimize"] - match_type: "any" - after_tool_use: true - source: "human" - active_turns: 2 - priority: 8 - - # Verification and review - - role: "reviewer" - rules: - triggers: - keywords: ["check", "verify", "review", "validate", "test", "correct"] - match_type: "any" - consecutive_tools: 12 # After many changes - source: "any" # Can be triggered by human request OR after lots of tool use - active_turns: 2 - priority: 6 - - # Help and guidance specialist - - role: "helper" - rules: - triggers: - keywords: ["help", "assist", "guide", "explain", "teach", "how to"] - match_type: "any" - source: "human" - active_turns: 5 - priority: 5 - - # Math and calculations - - role: "mathematician" - rules: - triggers: - keywords: ["calculate", "solve", "equation", "math", "formula", "compute"] - match_type: "any" - complexity_threshold: "medium" - source: "human" - active_turns: 1 - priority: 7 - - # Creative brainstorming - - role: "creative" - rules: - triggers: - keywords: ["idea", "brainstorm", "creative", "innovate", "design", "imagine"] - match_type: "any" - source: "human" - active_turns: 5 - priority: 4 - - # Quick responses for simple queries - - role: "quick-responder" - rules: - triggers: - complexity_threshold: "low" - source: "human" - active_turns: 0 - priority: 2 - - # Research and fact-checking - - role: "researcher" - rules: - triggers: - keywords: ["research", "find", "search", "lookup", "fact", "source", "reference"] - match_type: "any" - source: "human" - active_turns: 3 - priority: 6 - - # System recovery after multiple failures - - role: "recovery-specialist" - rules: - triggers: - consecutive_failures: 2 # After 2 consecutive tool failures - source: "machine" # Only triggers on machine-generated failures - active_turns: 10 - priority: 20 # Very high priority - - # Autonomous work reviewer - kicks in after lots of machine work - - role: "work-reviewer" - rules: - triggers: - tools_since_human: 5 # After 5+ tools used since last human input - source: "machine" # Only when machine is active - active_turns: 8 - priority: 12 - - # Progress checker - ensures the machine isn't going off track - - role: "progress-checker" - rules: - triggers: - machine_messages_without_human: 4 # After 4+ consecutive machine messages - source: "machine" - active_turns: 5 - priority: 11 - - # Intensive work monitor - for when lots of tool use is happening - - role: "intensive-work-monitor" - rules: - triggers: - consecutive_tools: 10 # 10+ tools in a row - messages_since_human: 6 # AND been working for 6+ messages - source: "machine" - active_turns: 10 - priority: 14 - diff --git a/documentation/docs/guides/multi-model/autopilot.md b/documentation/docs/guides/multi-model/autopilot.md deleted file mode 100644 index ef302b3272d8..000000000000 --- a/documentation/docs/guides/multi-model/autopilot.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -sidebar_position: 1 -title: Automatic Multi-Model Switching -sidebar_label: Automatic Model Switching ---- - -The AutoPilot feature enables intelligent, context-aware switching between different models. You simply work naturally with goose, and AutoPilot chooses the right model based on conversation content, complexity, tool usage patterns, and other triggers. - -:::warning Experimental Feature -AutoPilot is an experimental feature. Behavior and configuration may change in future releases. -::: - -## How AutoPilot Works - -After you configure which models to use for different roles, AutoPilot handles the rest. During your sessions, it automatically switches to the most appropriate model for your current task—whether you need specialized coding help, complex reasoning, or just want a second opinion. - -**For example:** -- When you ask to "debug this error," AutoPilot switches to a model optimized for debugging -- When you request "analyze the performance implications," it switches to a model better suited for complex reasoning -- When you're doing repetitive coding tasks, it uses a cost-effective model, but escalates to a more powerful one when it encounters failures - -Switching happens automatically based on: -- The terminology used in your requests ("debug", "analyze", "implement") -- How complex the task appears to be -- Whether previous attempts have failed and need a different approach -- How much autonomous work has been happening without your input - -When AutoPilot switches to a specialized model, it stays with that model for a configured number of turns before evaluating whether to switch back to the base model or to a different specialized model based on the new context. - -:::info -You can use `goose session --debug` in goose CLI to see when AutoPilot switches models. Note that each switch applies the provider's rate limits and pricing. -::: - -## Configuration - -Add the `x-advanced-models` section to your [`config.yaml`](/docs/guides/config-files) file and map your model preferences to [predefined](#predefined-roles) or custom roles. - -The `provider`, `model` and `role` parameters are required. - -```yaml -# Base provider and model (always available) -GOOSE_PROVIDER: "anthropic" -GOOSE_MODEL: "claude-sonnet-4-20250514" - -# AutoPilot models -x-advanced-models: -- provider: openai - model: o1-preview - role: deep-thinker -- provider: openai - model: gpt-4o - role: debugger -- provider: anthropic - model: claude-opus-4-20250805 - role: reviewer -``` - -**Migrate From Lead/Worker Model** - -This example shows how you can reproduce [lead model](/docs/tutorials/lead-worker) behavior using `x-advanced-models`. - -```yaml -# Before: Defined lead model using environment variables -# GOOSE_LEAD_PROVIDER=openai -# GOOSE_LEAD_MODEL=o1-preview - -# After: AutoPilot equivalent -GOOSE_PROVIDER: "anthropic" -GOOSE_MODEL: "claude-sonnet-4-20250514" # Base is used as the worker model - -x-advanced-models: -- provider: openai - model: o1-preview - role: lead # Use the predefined lead role (or define a custom role) -``` - -### Predefined Roles - -AutoPilot includes a set of predefined roles defined in [`premade_roles.yaml`](https://github.com/block/goose/blob/main/crates/goose/src/agents/model_selector/premade_roles.yaml) that goose is aware of by default. Examples include: - -- **deep-thinker**: Activates for complex reasoning tasks -- **debugger**: Switches in for error resolution -- **reviewer**: Monitors after extensive tool usage -- **coder**: Handles code implementation tasks -- **mathematician**: Processes mathematical computations - -### Custom Roles - -You can create custom roles with specific triggers by defining them in your `config.yaml` file: - -```yaml -x-advanced-models: -- provider: openai - model: gpt-4o - role: custom-debugger - rules: - triggers: - keywords: ["bug", "broken", "failing", "crash"] - consecutive_failures: 1 - active_turns: 5 - priority: 15 -``` - -
-Custom Role Configuration Fields - -**Rule Configuration:** -| Parameter | Description | Values | -|-----------|-------------|---------| -| `triggers` | Conditions that activate the role | Object (see parameters below) | -| `active_turns` | Number of turns the rule stays active once triggered | Integer (default: 5) | -| `priority` | Selection priority when multiple roles match | Integer (higher wins, default: 0) | - -**Trigger Parameters:** - -| Parameter | Description | Values | -|-----------|-------------|---------| -| `keywords` | Words that activate the role | Array of strings | -| `match_type` | How to match keywords | "any", "all" | -| `complexity_threshold` | Minimum complexity level | "low", "medium", "high" | -| `consecutive_failures` | Failures in sequence | Integer | -| `first_turn` | Trigger on conversation start | Boolean | -| `source` | Message source filter | "human", "machine", "any" | - -The previous table includes several common rule trigger parameters. For the complete list, see the `TriggerRules` struct in [`autopilot.rs`](https://github.com/block/goose/blob/main/crates/goose/src/agents/model_selector/autopilot.rs). - -
diff --git a/documentation/docs/guides/multi-model/creating-plans.md b/documentation/docs/guides/multi-model/creating-plans.md index a6ced54a5f77..10188458b133 100644 --- a/documentation/docs/guides/multi-model/creating-plans.md +++ b/documentation/docs/guides/multi-model/creating-plans.md @@ -34,10 +34,8 @@ The goose CLI plan mode uses two configuration values: - `GOOSE_PLANNER_PROVIDER`: Which provider to use for planning - `GOOSE_PLANNER_MODEL`: Which model to use for planning -:::tip Multi-Model Alternatives to Plan Mode -goose also supports two options for automatic model switching that help balance model capabilities with cost and speed: -- **[Lead/Worker mode](/docs/guides/environment-variables#leadworker-model-configuration)**: Turn-based switching between two models -- **[AutoPilot](/docs/guides/multi-model/autopilot)**: Context-aware switching between multiple models +:::tip Multi-Model Alternative to Plan Mode +goose also supports automatic model switching with [Lead/Worker mode](/docs/guides/environment-variables#leadworker-model-configuration), which provides turn-based switching between two models to help balance model capabilities with cost and speed. ::: ### Set goose planner environment variables @@ -329,4 +327,4 @@ To enter planning mode, type `/plan`. Optionally, you can append your plan desc link="/docs/tutorials/plan-feature-devcontainer-setup" /> - \ No newline at end of file + diff --git a/documentation/docs/tutorials/lead-worker.md b/documentation/docs/tutorials/lead-worker.md index 3f17251d13b1..75f912a5dc7e 100644 --- a/documentation/docs/tutorials/lead-worker.md +++ b/documentation/docs/tutorials/lead-worker.md @@ -25,10 +25,6 @@ The lead/worker model is a smart hand-off system. The "lead" model (think: GPT-4 If things go sideways (e.g. the worker model gets confused or keeps making mistakes), Goose notices and automatically pulls the lead model back in to recover. Once things are back on track, the worker takes over again. -:::tip Consider AutoPilot for Advanced Model Switching -[AutoPilot](/docs/guides/multi-model/autopilot) supports turn-based switching and also offers intelligent context-aware switching between multiple models. -::: - ## Turn-Based System A **turn** is one full interaction - your prompt and the model's response. Goose switches models based on turns: @@ -127,4 +123,4 @@ export GOOSE_LEAD_MODEL="o1-preview" # the lead model used automatically export GOOSE_PLANNER_MODEL="gpt-4o" # the model used when you explicitly call /plan ``` -Use **planning mode** when you want a dedicated reasoning model to generate comprehensive strategies that you can review and approve before execution. Use the **lead/worker model** for iterative development work where you want smart automation without interruption - like implementing features, debugging issues, or exploratory coding. Your workflow can combine both: use `/plan` to strategize major decisions, then let the lead/worker models handle the tactical implementation with automatic optimization. \ No newline at end of file +Use **planning mode** when you want a dedicated reasoning model to generate comprehensive strategies that you can review and approve before execution. Use the **lead/worker model** for iterative development work where you want smart automation without interruption - like implementing features, debugging issues, or exploratory coding. Your workflow can combine both: use `/plan` to strategize major decisions, then let the lead/worker models handle the tactical implementation with automatic optimization. From 71ee0bbb5448539924a040e2b0f64f2262eb274c Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 18 Nov 2025 11:49:30 +1100 Subject: [PATCH 2/2] more docs to edit --- documentation/docs/experimental/index.md | 7 +------ documentation/docs/getting-started/providers.md | 1 - documentation/docs/guides/config-files.md | 6 +----- documentation/docs/guides/environment-variables.md | 4 ---- documentation/docs/guides/multi-model/index.mdx | 5 ----- 5 files changed, 2 insertions(+), 21 deletions(-) diff --git a/documentation/docs/experimental/index.md b/documentation/docs/experimental/index.md index 3008b319df1b..9320df1bb99b 100644 --- a/documentation/docs/experimental/index.md +++ b/documentation/docs/experimental/index.md @@ -29,11 +29,6 @@ The list of experimental features may change as Goose development progresses. So description="An experimental Android automation app that acts as an open agent running on your phone, providing maximal automation of everyday tasks." link="/docs/experimental/goose-mobile" /> - - \ No newline at end of file + diff --git a/documentation/docs/getting-started/providers.md b/documentation/docs/getting-started/providers.md index b14b034a8c6b..6ab00c82e0a4 100644 --- a/documentation/docs/getting-started/providers.md +++ b/documentation/docs/getting-started/providers.md @@ -786,7 +786,6 @@ This method simplifies authentication and enhances security for enterprise envir Beyond single-model setups, goose supports [multi-model configurations](/docs/guides/multi-model/) that can use different models and providers for specialized tasks: -- **AutoPilot** - Intelligent, context-aware switching between specialized models based on conversation content and complexity - **Lead/Worker Model** - Automatic switching between a lead model for initial turns and a worker model for execution tasks - **Planning Mode** - Manual planning phase using a dedicated model to create detailed project breakdowns before execution diff --git a/documentation/docs/guides/config-files.md b/documentation/docs/guides/config-files.md index e5e0e80d5fb2..010476f6c7ce 100644 --- a/documentation/docs/guides/config-files.md +++ b/documentation/docs/guides/config-files.md @@ -48,10 +48,6 @@ The following settings can be configured at the root level of your config.yaml f | `security_prompt_enabled` | Enable [prompt injection detection](/docs/guides/security/prompt-injection-detection) to identify potentially harmful commands | true/false | false | No | | `security_prompt_threshold` | Sensitivity threshold for [prompt injection detection](/docs/guides/security/prompt-injection-detection) (higher = stricter) | Float between 0.01 and 1.0 | 0.7 | No | -:::info Automatic Multi-Model Configuration -The experimental [AutoPilot](/docs/guides/multi-model/autopilot) feature provides intelligent, context-aware model switching. Configure models for different roles using the `x-advanced-models` setting. -::: - ## Experimental Features These settings enable experimental features that are in active development. These may change or be removed in future releases. @@ -177,4 +173,4 @@ This will show all active settings and their current values. - **[Multi-Model Configuration](/docs/guides/multi-model/)** - For multiple model-selection strategies - **[Environment Variables](./environment-variables.md)** - For environment variable configuration -- **[Using Extensions](/docs/getting-started/using-extensions.md)** - For more details on extension configuration \ No newline at end of file +- **[Using Extensions](/docs/getting-started/using-extensions.md)** - For more details on extension configuration diff --git a/documentation/docs/guides/environment-variables.md b/documentation/docs/guides/environment-variables.md index 5f8106658df4..69489174dd51 100644 --- a/documentation/docs/guides/environment-variables.md +++ b/documentation/docs/guides/environment-variables.md @@ -52,10 +52,6 @@ export GOOSE_PROVIDER__API_KEY="your-api-key-here" These variables configure a [lead/worker model pattern](/docs/tutorials/lead-worker) where a powerful lead model handles initial planning and complex reasoning, then switches to a faster/cheaper worker model for execution. The switch happens automatically based on your settings. -:::info Automatic Multi-Model Switching -The experimental [AutoPilot](/docs/guides/multi-model/autopilot) feature provides intelligent, context-aware model switching. Configure models for different roles using the `x-advanced-models` setting. -::: - | Variable | Purpose | Values | Default | |----------|---------|---------|---------| | `GOOSE_LEAD_MODEL` | **Required to enable lead mode.** Name of the lead model | Model name (e.g., "gpt-4o", "claude-sonnet-4-20250514") | None | diff --git a/documentation/docs/guides/multi-model/index.mdx b/documentation/docs/guides/multi-model/index.mdx index fe60d17c2486..cd1fa5ee41ce 100644 --- a/documentation/docs/guides/multi-model/index.mdx +++ b/documentation/docs/guides/multi-model/index.mdx @@ -17,11 +17,6 @@ import TabItem from '@theme/TabItem';

📚 Documentation & Guides

-