diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs index ea32ef089c53..133b3aef4886 100644 --- a/crates/goose-cli/src/session/mod.rs +++ b/crates/goose-cli/src/session/mod.rs @@ -655,26 +655,11 @@ impl CliSession { println!("{}", console::style("Summarizing conversation...").yellow()); output::show_thinking(); - // Get the provider for summarization - let _provider = self.agent.provider().await?; - - // Get session metadata if available - let session_metadata_for_compact = - if let Some(ref session_id) = self.session_id { - SessionManager::get_session(session_id, false).await.ok() - } else { - None - }; - - // Call the summarize_context method - let (_, summarized_messages, _token_counts, summarization_usage) = - goose::context_mgmt::check_and_compact_messages( + let (summarized_messages, _token_counts, summarization_usage) = + goose::context_mgmt::compact_messages( &self.agent, - self.messages.messages(), - true, + &self.messages, false, - None, - session_metadata_for_compact.as_ref(), ) .await?; diff --git a/crates/goose-server/src/routes/context.rs b/crates/goose-server/src/routes/context.rs index 63dd203af612..97f964280269 100644 --- a/crates/goose-server/src/routes/context.rs +++ b/crates/goose-server/src/routes/context.rs @@ -1,6 +1,6 @@ use crate::state::AppState; use axum::{extract::State, http::StatusCode, routing::post, Json, Router}; -use goose::conversation::message::Message; +use goose::conversation::{message::Message, Conversation}; use serde::{Deserialize, Serialize}; use std::sync::Arc; use utoipa::ToSchema; @@ -46,16 +46,12 @@ async fn manage_context( ) -> Result, StatusCode> { let agent = state.get_agent_for_route(request.session_id).await?; - let (_, processed_messages, token_counts, _) = goose::context_mgmt::check_and_compact_messages( - &agent, - &request.messages, - true, - false, - None, - None, - ) - .await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + let conversation = Conversation::new_unvalidated(request.messages); + + let (processed_messages, token_counts, _) = + goose::context_mgmt::compact_messages(&agent, &conversation, false) + .await + .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; // TODO(Douwe): store into db Ok(Json(ContextManageResponse { diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs index 4a31d7826cb1..b2ade9ea89fb 100644 --- a/crates/goose/src/agents/agent.rs +++ b/crates/goose/src/agents/agent.rs @@ -33,7 +33,7 @@ use crate::agents::tool_router_index_manager::ToolRouterIndexManager; use crate::agents::types::SessionConfig; use crate::agents::types::{FrontendTool, ToolResultReceiver}; use crate::config::{get_enabled_extensions, get_extension_by_name, Config}; -use crate::context_mgmt::{check_and_compact_messages, DEFAULT_COMPACTION_THRESHOLD}; +use crate::context_mgmt::DEFAULT_COMPACTION_THRESHOLD; use crate::conversation::{debug_conversation_fix, fix_conversation, Conversation}; use crate::mcp_utils::ToolResult; use crate::permission::permission_inspector::PermissionInspector; @@ -918,22 +918,29 @@ impl Agent { None }; - let (did_compact, compacted_conversation, compaction_error) = - match check_and_compact_messages( - self, - unfixed_conversation.messages(), - false, - false, - None, - session_metadata.as_ref(), - ) - .await - { - Ok((did_compact, conversation, _removed_indices, _summarization_usage)) => { - (did_compact, conversation, None) + let check_result = crate::context_mgmt::check_if_compaction_needed( + self, + &unfixed_conversation, + None, + session_metadata.as_ref(), + ) + .await; + + let (did_compact, compacted_conversation, compaction_error) = match check_result { + // TODO(dkatz): send a notification that we are starting compaction here. + Ok(true) => { + match crate::context_mgmt::compact_messages(self, &unfixed_conversation, false) + .await + { + Ok((conversation, _token_counts, _summarization_usage)) => { + (true, conversation, None) + } + Err(e) => (false, unfixed_conversation.clone(), Some(e)), } - Err(e) => (false, unfixed_conversation.clone(), Some(e)), - }; + } + Ok(false) => (false, unfixed_conversation, None), + Err(e) => (false, unfixed_conversation.clone(), Some(e)), + }; if did_compact { // Get threshold from config to include in message @@ -970,7 +977,7 @@ impl Agent { )); })) } else { - self.reply_internal(unfixed_conversation, session, cancel_token) + self.reply_internal(compacted_conversation, session, cancel_token) .await } } @@ -1297,15 +1304,9 @@ impl Agent { Err(ProviderError::ContextLengthExceeded(_error_msg)) => { info!("Context length exceeded, attempting compaction"); - // Get session metadata if available - let session_metadata_for_compact = if let Some(ref session_config) = session { - SessionManager::get_session(&session_config.id, false).await.ok() - } else { - None - }; - - match check_and_compact_messages(self, conversation.messages(), true, true, None, session_metadata_for_compact.as_ref()).await { - Ok((_did_compact, compacted_conversation, _removed_indices, _usage)) => { + // TODO(dkatz): send a notification that we are starting compaction here. + match crate::context_mgmt::compact_messages(self, &conversation, true).await { + Ok((compacted_conversation, _token_counts, _usage)) => { conversation = compacted_conversation; did_recovery_compact_this_iteration = true; diff --git a/crates/goose/src/context_mgmt/mod.rs b/crates/goose/src/context_mgmt/mod.rs index 8b2c73c3feac..a56c87006d1f 100644 --- a/crates/goose/src/context_mgmt/mod.rs +++ b/crates/goose/src/context_mgmt/mod.rs @@ -12,135 +12,67 @@ use tracing::{debug, info}; pub const DEFAULT_COMPACTION_THRESHOLD: f64 = 0.8; -/// Result of auto-compaction check -#[derive(Debug)] -pub struct AutoCompactResult { - /// Whether compaction was performed - pub compacted: bool, - /// The messages after potential compaction - pub messages: Conversation, - /// Provider usage from summarization (if compaction occurred) - /// This contains the actual token counts after compaction - pub summarization_usage: Option, -} - -/// Result of checking if compaction is needed -#[derive(Debug)] -pub struct CompactionCheckResult { - /// Whether compaction is needed - pub needs_compaction: bool, - /// Current token count - pub current_tokens: usize, - /// Context limit being used - pub context_limit: usize, - /// Current usage ratio (0.0 to 1.0) - pub usage_ratio: f64, - /// Remaining tokens before compaction threshold - pub remaining_tokens: usize, - /// Percentage until compaction threshold (0.0 to 100.0) - pub percentage_until_compaction: f64, -} - #[derive(Serialize)] struct SummarizeContext { messages: String, } -/// Check if messages need compaction and compact them if necessary +/// Compact messages by summarizing them /// -/// This function combines checking and compaction. It first checks if compaction -/// is needed based on the threshold, and if so, performs the compaction by -/// summarizing messages and updating their visibility metadata. +/// This function performs the actual compaction by summarizing messages and updating +/// their visibility metadata. It does not check thresholds - use `check_if_compaction_needed` +/// first to determine if compaction is necessary. /// /// # Arguments /// * `agent` - The agent to use for context management -/// * `messages` - The current message history -/// * `force_compact` - If true, skip the threshold check and force compaction +/// * `conversation` - The current conversation history /// * `preserve_last_user_message` - If true and last message is not a user message, copy the most recent user message to the end -/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config) -/// * `session_metadata` - Optional session metadata containing actual token counts /// /// # Returns /// * A tuple containing: -/// - `bool`: Whether compaction was performed -/// - `Conversation`: The potentially compacted messages -/// - `Vec`: Indices of removed messages (empty if no compaction) -/// - `Option`: Provider usage from summarization (if compaction occurred) -pub async fn check_and_compact_messages( +/// - `Conversation`: The compacted messages +/// - `Vec`: Token counts for each message +/// - `Option`: Provider usage from summarization +pub async fn compact_messages( agent: &Agent, - messages_with_user_message: &[Message], - force_compact: bool, + conversation: &Conversation, preserve_last_user_message: bool, - threshold_override: Option, - session_metadata: Option<&crate::session::Session>, -) -> std::result::Result<(bool, Conversation, Vec, Option), anyhow::Error> { - if !force_compact { - let check_result = check_compaction_needed( - agent, - messages_with_user_message, - threshold_override, - session_metadata, - ) - .await?; +) -> Result<(Conversation, Vec, Option)> { + info!("Performing message compaction"); - // If no compaction is needed, return early - if !check_result.needs_compaction { - debug!( - "No compaction needed (usage: {:.1}% <= {:.1}% threshold)", - check_result.usage_ratio * 100.0, - check_result.percentage_until_compaction - ); - return Ok(( - false, - Conversation::new_unvalidated(messages_with_user_message.to_vec()), - Vec::new(), - None, - )); - } - - info!( - "Performing message compaction (usage: {:.1}%)", - check_result.usage_ratio * 100.0 - ); - } else { - info!("Forcing message compaction due to context limit exceeded"); - } + let messages = conversation.messages(); - // Perform the actual compaction // Check if the most recent message is a user message - let (messages, preserved_user_message) = - if let Some(last_message) = messages_with_user_message.last() { - if matches!(last_message.role, rmcp::model::Role::User) { - // Remove the last user message before compaction - ( - &messages_with_user_message[..messages_with_user_message.len() - 1], - Some(last_message.clone()), - ) - } else if preserve_last_user_message { - // Last message is not a user message, but we want to preserve the most recent user message - // Find the most recent user message and copy it (don't remove from history) - let most_recent_user_message = messages_with_user_message - .iter() - .rev() - .find(|msg| matches!(msg.role, rmcp::model::Role::User)) - .cloned(); - (messages_with_user_message, most_recent_user_message) - } else { - (messages_with_user_message, None) - } + let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last() + { + if matches!(last_message.role, rmcp::model::Role::User) { + // Remove the last user message before compaction + (&messages[..messages.len() - 1], Some(last_message.clone())) + } else if preserve_last_user_message { + // Last message is not a user message, but we want to preserve the most recent user message + // Find the most recent user message and copy it (don't remove from history) + let most_recent_user_message = messages + .iter() + .rev() + .find(|msg| matches!(msg.role, rmcp::model::Role::User)) + .cloned(); + (messages.as_slice(), most_recent_user_message) } else { - (messages_with_user_message, None) - }; + (messages.as_slice(), None) + } + } else { + (messages.as_slice(), None) + }; let provider = agent.provider().await?; - let summary = do_compact(provider.clone(), messages).await?; + let summary = do_compact(provider.clone(), messages_to_compact).await?; let (summary_message, summarization_usage) = match summary { Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)), None => { // No summary was generated (empty input) tracing::warn!("Summarization failed. Returning empty messages."); - return Ok((false, Conversation::empty(), vec![], None)); + return Ok((Conversation::empty(), vec![], None)); } }; @@ -153,7 +85,7 @@ pub async fn check_and_compact_messages( let mut final_token_counts = Vec::new(); // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false) - for msg in messages.iter().cloned() { + for msg in messages_to_compact.iter().cloned() { let updated_metadata = msg.metadata.with_agent_invisible(); let updated_msg = msg.with_metadata(updated_metadata); final_messages.push(updated_msg); @@ -198,34 +130,20 @@ Just continue the conversation naturally based on the summarized context" } Ok(( - true, Conversation::new_unvalidated(final_messages), final_token_counts, summarization_usage, )) } -/// Check if messages need compaction without performing the compaction -/// -/// This function analyzes the current token usage and returns detailed information -/// about whether compaction is needed and how close we are to the threshold. -/// It prioritizes actual token counts from session metadata when available, -/// falling back to estimated counts if needed. -/// -/// # Arguments -/// * `agent` - The agent to use for context management -/// * `messages` - The current message history -/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config) -/// * `session_metadata` - Optional session metadata containing actual token counts -/// -/// # Returns -/// * `CompactionCheckResult` containing detailed information about compaction needs -async fn check_compaction_needed( +/// Check if messages exceed the auto-compaction threshold +pub async fn check_if_compaction_needed( agent: &Agent, - messages: &[Message], + conversation: &Conversation, threshold_override: Option, session_metadata: Option<&crate::session::Session>, -) -> Result { +) -> Result { + let messages = conversation.messages(); let config = Config::global(); // TODO(Douwe): check the default here; it seems to reset to 0.3 sometimes let threshold = threshold_override.unwrap_or_else(|| { @@ -256,15 +174,6 @@ async fn check_compaction_needed( let usage_ratio = current_tokens as f64 / context_limit as f64; - let threshold_tokens = (context_limit as f64 * threshold) as usize; - let remaining_tokens = threshold_tokens.saturating_sub(current_tokens); - - let percentage_until_compaction = if usage_ratio < threshold { - (threshold - usage_ratio) * 100.0 - } else { - 0.0 - }; - let needs_compaction = if threshold <= 0.0 || threshold >= 1.0 { usage_ratio > DEFAULT_COMPACTION_THRESHOLD } else { @@ -281,14 +190,7 @@ async fn check_compaction_needed( token_source ); - Ok(CompactionCheckResult { - needs_compaction, - current_tokens, - context_limit, - usage_ratio, - remaining_tokens, - percentage_until_compaction, - }) + Ok(needs_compaction) } async fn do_compact(