diff --git a/crates/goose-cli/src/session/input.rs b/crates/goose-cli/src/session/input.rs index 987415d1251f..081be75f9862 100644 --- a/crates/goose-cli/src/session/input.rs +++ b/crates/goose-cli/src/session/input.rs @@ -20,7 +20,7 @@ pub enum InputResult { EndPlan, Clear, Recipe(Option), - Summarize, + Compact, } #[derive(Debug)] @@ -120,7 +120,8 @@ fn handle_slash_command(input: &str) -> Option { const CMD_ENDPLAN: &str = "/endplan"; const CMD_CLEAR: &str = "/clear"; const CMD_RECIPE: &str = "/recipe"; - const CMD_SUMMARIZE: &str = "/summarize"; + const CMD_COMPACT: &str = "/compact"; + const CMD_SUMMARIZE_DEPRECATED: &str = "/summarize"; match input { "/exit" | "/quit" => Some(InputResult::Exit), @@ -180,7 +181,11 @@ fn handle_slash_command(input: &str) -> Option { s if s == CMD_ENDPLAN => Some(InputResult::EndPlan), s if s == CMD_CLEAR => Some(InputResult::Clear), s if s.starts_with(CMD_RECIPE) => parse_recipe_command(s), - s if s == CMD_SUMMARIZE => Some(InputResult::Summarize), + s if s == CMD_COMPACT => Some(InputResult::Compact), + s if s == CMD_SUMMARIZE_DEPRECATED => { + println!("{}", console::style("⚠️ Note: /summarize has been renamed to /compact and will be removed in a future release.").yellow()); + Some(InputResult::Compact) + } _ => None, } } @@ -305,7 +310,7 @@ fn print_help() { /endplan - Exit plan mode and return to 'normal' goose mode. /recipe [filepath] - Generate a recipe from the current conversation and save it to the specified filepath (must end with .yaml). If no filepath is provided, it will be saved to ./recipe.yaml. -/summarize - Summarize the current conversation to reduce context length while preserving key information. +/compact - Compact the current conversation to reduce context length while preserving key information. /? or /help - Display this help message /clear - Clears the current chat history @@ -541,17 +546,6 @@ mod tests { assert!(matches!(result, Some(InputResult::Retry))); } - #[test] - fn test_summarize_command() { - // Test the summarize command - let result = handle_slash_command("/summarize"); - assert!(matches!(result, Some(InputResult::Summarize))); - - // Test with whitespace - let result = handle_slash_command(" /summarize "); - assert!(matches!(result, Some(InputResult::Summarize))); - } - #[test] fn test_get_input_prompt_string() { let prompt = get_input_prompt_string(); diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs index 529fff3a1fa7..bed33e87fa9c 100644 --- a/crates/goose-cli/src/session/mod.rs +++ b/crates/goose-cli/src/session/mod.rs @@ -28,7 +28,7 @@ use anyhow::{Context, Result}; use completion::GooseCompleter; use goose::agents::extension::{Envs, ExtensionConfig}; use goose::agents::types::RetryConfig; -use goose::agents::{Agent, SessionConfig}; +use goose::agents::{Agent, SessionConfig, MANUAL_COMPACT_TRIGGER}; use goose::config::{Config, GooseMode}; use goose::providers::pricing::initialize_pricing_cache; use goose::session::SessionManager; @@ -641,16 +641,16 @@ impl CliSession { continue; } - InputResult::Summarize => { + InputResult::Compact => { save_history(&mut editor); - let prompt = "Are you sure you want to summarize this conversation? This will condense the message history."; + let prompt = "Are you sure you want to compact this conversation? This will condense the message history."; let should_summarize = match cliclack::confirm(prompt).initial_value(true).interact() { Ok(choice) => choice, Err(e) => { if e.kind() == std::io::ErrorKind::Interrupted { - false // If interrupted, set should_summarize to false + false } else { return Err(e.into()); } @@ -658,81 +658,13 @@ impl CliSession { }; if should_summarize { - println!("{}", console::style("Summarizing conversation...").yellow()); + self.push_message(Message::user().with_text(MANUAL_COMPACT_TRIGGER)); output::show_thinking(); - - let (summarized_messages, _token_counts, summarization_usage) = - goose::context_mgmt::compact_messages( - &self.agent, - &self.messages, - false, - ) + self.process_agent_response(true, CancellationToken::default()) .await?; - - // Update the session messages with the summarized ones - self.messages = summarized_messages.clone(); - - // Persist the summarized messages and update session metadata - if let Some(session_id) = &self.session_id { - // Replace all messages with the summarized version - SessionManager::replace_conversation(session_id, &summarized_messages) - .await?; - - // Update session metadata with the new token counts from summarization - if let Some(usage) = summarization_usage { - let session = - SessionManager::get_session(session_id, false).await?; - - // Update token counts with the summarization usage - let summary_tokens = usage.usage.output_tokens.unwrap_or(0); - - // Update accumulated tokens (add the summarization cost) - let accumulate = |a: Option, b: Option| -> Option { - match (a, b) { - (Some(x), Some(y)) => Some(x + y), - _ => a.or(b), - } - }; - - let accumulated_total = accumulate( - session.accumulated_total_tokens, - usage.usage.total_tokens, - ); - let accumulated_input = accumulate( - session.accumulated_input_tokens, - usage.usage.input_tokens, - ); - let accumulated_output = accumulate( - session.accumulated_output_tokens, - usage.usage.output_tokens, - ); - - SessionManager::update_session(session_id) - .total_tokens(Some(summary_tokens)) - .input_tokens(None) - .output_tokens(Some(summary_tokens)) - .accumulated_total_tokens(accumulated_total) - .accumulated_input_tokens(accumulated_input) - .accumulated_output_tokens(accumulated_output) - .apply() - .await?; - } - } - output::hide_thinking(); - println!( - "{}", - console::style("Conversation has been summarized.").green() - ); - println!( - "{}", - console::style( - "Key information has been preserved while reducing context length." - ) - .green() - ); } else { - println!("{}", console::style("Summarization cancelled.").yellow()); + println!("{}", console::style("Compaction cancelled.").yellow()); } continue; } diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 444c7666f13a..f958397046c3 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -65,7 +65,7 @@ use crate::session::SessionManager; const DEFAULT_MAX_TURNS: u32 = 1000; const COMPACTION_THINKING_TEXT: &str = "goose is compacting the conversation..."; -const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation"; +pub const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation"; /// Context needed for the reply function pub struct ReplyContext { @@ -803,9 +803,10 @@ impl Agent { ); match crate::context_mgmt::compact_messages(self, &conversation_to_compact, false).await { - Ok((compacted_conversation, _token_counts, _summarization_usage)) => { + Ok((compacted_conversation, summarization_usage)) => { if let Some(session_to_store) = &session { SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?; + Self::update_session_metrics(session_to_store, &summarization_usage, true).await?; } yield AgentEvent::HistoryReplaced(compacted_conversation.clone()); @@ -991,7 +992,7 @@ impl Agent { // Record usage for the session if let Some(ref session_config) = &session { if let Some(ref usage) = usage { - Self::update_session_metrics(session_config, usage).await?; + Self::update_session_metrics(session_config, usage, false).await?; } } @@ -1166,9 +1167,10 @@ impl Agent { ); match crate::context_mgmt::compact_messages(self, &conversation, true).await { - Ok((compacted_conversation, _token_counts, _usage)) => { + Ok((compacted_conversation, usage)) => { if let Some(session_to_store) = &session { - SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await? + SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?; + Self::update_session_metrics(session_to_store, &usage, true).await?; } conversation = compacted_conversation; diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs index 5fa9f0ce109c..0f633307c747 100644 --- a/crates/goose/src/agents/mod.rs +++ b/crates/goose/src/agents/mod.rs @@ -26,7 +26,7 @@ mod tool_route_manager; mod tool_router_index_manager; pub mod types; -pub use agent::{Agent, AgentEvent}; +pub use agent::{Agent, AgentEvent, MANUAL_COMPACT_TRIGGER}; pub use extension::ExtensionConfig; pub use extension_manager::ExtensionManager; pub use prompt_manager::PromptManager; diff --git a/crates/goose/src/agents/reply_parts.rs b/crates/goose/src/agents/reply_parts.rs index 1d8fdaea06b4..86cb8095c472 100644 --- a/crates/goose/src/agents/reply_parts.rs +++ b/crates/goose/src/agents/reply_parts.rs @@ -255,6 +255,7 @@ impl Agent { pub(crate) async fn update_session_metrics( session_config: &crate::agents::types::SessionConfig, usage: &ProviderUsage, + is_compaction_usage: bool, ) -> Result<()> { let session_id = session_config.id.as_str(); let session = SessionManager::get_session(session_id, false).await?; @@ -273,11 +274,23 @@ impl Agent { let accumulated_output = accumulate(session.accumulated_output_tokens, usage.usage.output_tokens); + let (current_total, current_input, current_output) = if is_compaction_usage { + // After compaction: summary output becomes new input context + let new_input = usage.usage.output_tokens; + (new_input, new_input, None) + } else { + ( + usage.usage.total_tokens, + usage.usage.input_tokens, + usage.usage.output_tokens, + ) + }; + SessionManager::update_session(session_id) .schedule_id(session_config.schedule_id.clone()) - .total_tokens(usage.usage.total_tokens) - .input_tokens(usage.usage.input_tokens) - .output_tokens(usage.usage.output_tokens) + .total_tokens(current_total) + .input_tokens(current_input) + .output_tokens(current_output) .accumulated_total_tokens(accumulated_total) .accumulated_input_tokens(accumulated_input) .accumulated_output_tokens(accumulated_output) diff --git a/crates/goose/src/context_mgmt/mod.rs b/crates/goose/src/context_mgmt/mod.rs index 03b2871b017e..57e43a706a75 100644 --- a/crates/goose/src/context_mgmt/mod.rs +++ b/crates/goose/src/context_mgmt/mod.rs @@ -31,13 +31,12 @@ struct SummarizeContext { /// # Returns /// * A tuple containing: /// - `Conversation`: The compacted messages -/// - `Vec`: Token counts for each message -/// - `Option`: Provider usage from summarization +/// - `ProviderUsage`: Provider usage from summarization pub async fn compact_messages( agent: &Agent, conversation: &Conversation, preserve_last_user_message: bool, -) -> Result<(Conversation, Vec, Option)> { +) -> Result<(Conversation, ProviderUsage)> { info!("Performing message compaction"); let messages = conversation.messages(); @@ -99,16 +98,8 @@ pub async fn compact_messages( }; let provider = agent.provider().await?; - let summary = do_compact(provider.clone(), messages_to_compact).await?; - - let (summary_message, summarization_usage) = match summary { - Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)), - None => { - // No summary was generated (empty input) - tracing::warn!("Summarization failed. Returning empty messages."); - return Ok((Conversation::empty(), vec![], None)); - } - }; + let (summary_message, summarization_usage) = + do_compact(provider.clone(), messages_to_compact).await?; // Create the final message list with updated visibility metadata: // 1. Original messages become user_visible but not agent_visible @@ -116,27 +107,17 @@ pub async fn compact_messages( // 3. Assistant messages to continue the conversation remain both user_visible and agent_visible let mut final_messages = Vec::new(); - let mut final_token_counts = Vec::new(); // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false) for msg in messages_to_compact.iter().cloned() { let updated_metadata = msg.metadata.with_agent_invisible(); let updated_msg = msg.with_metadata(updated_metadata); final_messages.push(updated_msg); - // Token count doesn't matter for agent_visible=false messages, but we'll use 0 - final_token_counts.push(0); } // Add the summary message (agent_visible=true, user_visible=false) let summary_msg = summary_message.with_metadata(MessageMetadata::agent_only()); - // For token counting purposes, we use the output tokens (the actual summary content) - // since that's what will be in the context going forward - let summary_tokens = summarization_usage - .as_ref() - .and_then(|usage| usage.usage.output_tokens) - .unwrap_or(0) as usize; final_messages.push(summary_msg); - final_token_counts.push(summary_tokens); // Add an assistant message to continue the conversation (agent_visible=true, user_visible=false) let assistant_message = Message::assistant() @@ -146,9 +127,7 @@ Do not mention that you read a summary or that conversation summarization occurr Just continue the conversation naturally based on the summarized context" ) .with_metadata(MessageMetadata::agent_only()); - let assistant_message_tokens: usize = 0; // Not counted since it's for agent context only final_messages.push(assistant_message); - final_token_counts.push(assistant_message_tokens); // Add back the preserved user message if it exists if let Some(user_text) = preserved_user_text { @@ -157,7 +136,6 @@ Just continue the conversation naturally based on the summarized context" Ok(( Conversation::new_unvalidated(final_messages), - final_token_counts, summarization_usage, )) } @@ -222,7 +200,7 @@ pub async fn check_if_compaction_needed( async fn do_compact( provider: Arc, messages: &[Message], -) -> Result, anyhow::Error> { +) -> Result<(Message, ProviderUsage), anyhow::Error> { let agent_visible_messages: Vec<&Message> = messages .iter() .filter(|msg| msg.is_agent_visible()) @@ -255,7 +233,7 @@ async fn do_compact( .await .map_err(|e| anyhow::anyhow!("Failed to ensure usage tokens: {}", e))?; - Ok(Some((response, provider_usage))) + Ok((response, provider_usage)) } fn format_message_for_compacting(msg: &Message) -> String {