Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 3 additions & 18 deletions crates/goose-cli/src/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,26 +655,11 @@ impl CliSession {
println!("{}", console::style("Summarizing conversation...").yellow());
output::show_thinking();

// Get the provider for summarization
let _provider = self.agent.provider().await?;

// Get session metadata if available
let session_metadata_for_compact =
if let Some(ref session_id) = self.session_id {
SessionManager::get_session(session_id, false).await.ok()
} else {
None
};

// Call the summarize_context method
let (_, summarized_messages, _token_counts, summarization_usage) =
goose::context_mgmt::check_and_compact_messages(
let (summarized_messages, _token_counts, summarization_usage) =
goose::context_mgmt::compact_messages(
&self.agent,
self.messages.messages(),
true,
&self.messages,
false,
None,
session_metadata_for_compact.as_ref(),
)
.await?;

Expand Down
18 changes: 7 additions & 11 deletions crates/goose-server/src/routes/context.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::state::AppState;
use axum::{extract::State, http::StatusCode, routing::post, Json, Router};
use goose::conversation::message::Message;
use goose::conversation::{message::Message, Conversation};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use utoipa::ToSchema;
Expand Down Expand Up @@ -46,16 +46,12 @@ async fn manage_context(
) -> Result<Json<ContextManageResponse>, StatusCode> {
let agent = state.get_agent_for_route(request.session_id).await?;

let (_, processed_messages, token_counts, _) = goose::context_mgmt::check_and_compact_messages(
&agent,
&request.messages,
true,
false,
None,
None,
)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
let conversation = Conversation::new_unvalidated(request.messages);

let (processed_messages, token_counts, _) =
goose::context_mgmt::compact_messages(&agent, &conversation, false)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
// TODO(Douwe): store into db

Ok(Json(ContextManageResponse {
Expand Down
53 changes: 27 additions & 26 deletions crates/goose/src/agents/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use crate::agents::tool_router_index_manager::ToolRouterIndexManager;
use crate::agents::types::SessionConfig;
use crate::agents::types::{FrontendTool, ToolResultReceiver};
use crate::config::{get_enabled_extensions, get_extension_by_name, Config};
use crate::context_mgmt::{check_and_compact_messages, DEFAULT_COMPACTION_THRESHOLD};
use crate::context_mgmt::DEFAULT_COMPACTION_THRESHOLD;
use crate::conversation::{debug_conversation_fix, fix_conversation, Conversation};
use crate::mcp_utils::ToolResult;
use crate::permission::permission_inspector::PermissionInspector;
Expand Down Expand Up @@ -918,22 +918,29 @@ impl Agent {
None
};

let (did_compact, compacted_conversation, compaction_error) =
match check_and_compact_messages(
self,
unfixed_conversation.messages(),
false,
false,
None,
session_metadata.as_ref(),
)
.await
{
Ok((did_compact, conversation, _removed_indices, _summarization_usage)) => {
(did_compact, conversation, None)
let check_result = crate::context_mgmt::check_if_compaction_needed(
self,
&unfixed_conversation,
None,
session_metadata.as_ref(),
)
.await;

let (did_compact, compacted_conversation, compaction_error) = match check_result {
// TODO(dkatz): send a notification that we are starting compaction here.
Ok(check_result) if check_result.needs_compaction => {
match crate::context_mgmt::compact_messages(self, &unfixed_conversation, false)
.await
{
Ok((conversation, _token_counts, _summarization_usage)) => {
(true, conversation, None)
}
Err(e) => (false, unfixed_conversation.clone(), Some(e)),
}
Err(e) => (false, unfixed_conversation.clone(), Some(e)),
};
}
Ok(_) => (false, unfixed_conversation, None),
Err(e) => (false, unfixed_conversation.clone(), Some(e)),
};

if did_compact {
// Get threshold from config to include in message
Expand Down Expand Up @@ -970,7 +977,7 @@ impl Agent {
));
}))
} else {
self.reply_internal(unfixed_conversation, session, cancel_token)
self.reply_internal(compacted_conversation, session, cancel_token)
.await
}
}
Expand Down Expand Up @@ -1297,15 +1304,9 @@ impl Agent {
Err(ProviderError::ContextLengthExceeded(_error_msg)) => {
info!("Context length exceeded, attempting compaction");

// Get session metadata if available
let session_metadata_for_compact = if let Some(ref session_config) = session {
SessionManager::get_session(&session_config.id, false).await.ok()
} else {
None
};

match check_and_compact_messages(self, conversation.messages(), true, true, None, session_metadata_for_compact.as_ref()).await {
Ok((_did_compact, compacted_conversation, _removed_indices, _usage)) => {
// TODO(dkatz): send a notification that we are starting compaction here.
match crate::context_mgmt::compact_messages(self, &conversation, true).await {
Ok((compacted_conversation, _token_counts, _usage)) => {
conversation = compacted_conversation;
did_recovery_compact_this_iteration = true;

Expand Down
128 changes: 44 additions & 84 deletions crates/goose/src/context_mgmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,101 +46,62 @@ struct SummarizeContext {
messages: String,
}

/// Check if messages need compaction and compact them if necessary
/// Compact messages by summarizing them
///
/// This function combines checking and compaction. It first checks if compaction
/// is needed based on the threshold, and if so, performs the compaction by
/// summarizing messages and updating their visibility metadata.
/// This function performs the actual compaction by summarizing messages and updating
/// their visibility metadata. It does not check thresholds - use `check_if_compaction_needed`
/// first to determine if compaction is necessary.
///
/// # Arguments
/// * `agent` - The agent to use for context management
/// * `messages` - The current message history
/// * `force_compact` - If true, skip the threshold check and force compaction
/// * `conversation` - The current conversation history
/// * `preserve_last_user_message` - If true and last message is not a user message, copy the most recent user message to the end
/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config)
/// * `session_metadata` - Optional session metadata containing actual token counts
///
/// # Returns
/// * A tuple containing:
/// - `bool`: Whether compaction was performed
/// - `Conversation`: The potentially compacted messages
/// - `Vec<usize>`: Indices of removed messages (empty if no compaction)
/// - `Option<ProviderUsage>`: Provider usage from summarization (if compaction occurred)
pub async fn check_and_compact_messages(
/// - `Conversation`: The compacted messages
/// - `Vec<usize>`: Token counts for each message
/// - `Option<ProviderUsage>`: Provider usage from summarization
pub async fn compact_messages(
agent: &Agent,
messages_with_user_message: &[Message],
force_compact: bool,
conversation: &Conversation,
preserve_last_user_message: bool,
threshold_override: Option<f64>,
session_metadata: Option<&crate::session::Session>,
) -> std::result::Result<(bool, Conversation, Vec<usize>, Option<ProviderUsage>), anyhow::Error> {
if !force_compact {
let check_result = check_compaction_needed(
agent,
messages_with_user_message,
threshold_override,
session_metadata,
)
.await?;

// If no compaction is needed, return early
if !check_result.needs_compaction {
debug!(
"No compaction needed (usage: {:.1}% <= {:.1}% threshold)",
check_result.usage_ratio * 100.0,
check_result.percentage_until_compaction
);
return Ok((
false,
Conversation::new_unvalidated(messages_with_user_message.to_vec()),
Vec::new(),
None,
));
}
) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> {
info!("Performing message compaction");

info!(
"Performing message compaction (usage: {:.1}%)",
check_result.usage_ratio * 100.0
);
} else {
info!("Forcing message compaction due to context limit exceeded");
}
let messages = conversation.messages();

// Perform the actual compaction
// Check if the most recent message is a user message
let (messages, preserved_user_message) =
if let Some(last_message) = messages_with_user_message.last() {
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before compaction
(
&messages_with_user_message[..messages_with_user_message.len() - 1],
Some(last_message.clone()),
)
} else if preserve_last_user_message {
// Last message is not a user message, but we want to preserve the most recent user message
// Find the most recent user message and copy it (don't remove from history)
let most_recent_user_message = messages_with_user_message
.iter()
.rev()
.find(|msg| matches!(msg.role, rmcp::model::Role::User))
.cloned();
(messages_with_user_message, most_recent_user_message)
} else {
(messages_with_user_message, None)
}
let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last()
{
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before compaction
(&messages[..messages.len() - 1], Some(last_message.clone()))
} else if preserve_last_user_message {
// Last message is not a user message, but we want to preserve the most recent user message
// Find the most recent user message and copy it (don't remove from history)
let most_recent_user_message = messages
.iter()
.rev()
.find(|msg| matches!(msg.role, rmcp::model::Role::User))
.cloned();
(messages.as_slice(), most_recent_user_message)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this whole block strikes me as weird. there's two cases here, either the last message is a user message (i.e. the user said something and we decided to auto compact or the user said something, our autocompacting didn't trigger but we ran out tokens never the less, i.e. user has compact limit at 99% and posts something long) or it is not (i.e. we ran out of context while processing a tool call reply)

we set preserve_last_user_message to true if this is triggered from the tool calling loop, but in all other cases, the last message should be a user message. so I don't think we need this flag at all.

Copy link
Collaborator Author

@katzdave katzdave Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first case where the last message is a user one, is from auto-compact.

The second exists for the difference of manual summarize vs tool calling case you described.

In manual compaction we don't actually want to re-trigger the agent loop (and the last message is probably an agent message from the last response), but if we hit a tool call ContextLimitExceeded, I want to find and re-process the most recent user message and continue the agent loop automatically.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I understand. I'm just saying there's no difference.

but if we hit a tool call ContextLimitExceeded, I want to find and re-process the most recent user message and continue the agent loop automatically.

that is one way, but the other is that we ran out of context with a user mesage. so in either case you are looking for the last user message.

The other thing we're missing here I think is that tool responses are also user mesages, so you'll typically return that for the tool loop case.

} else {
(messages_with_user_message, None)
};
(messages.as_slice(), None)
}
} else {
(messages.as_slice(), None)
};

let provider = agent.provider().await?;
let summary = do_compact(provider.clone(), messages).await?;
let summary = do_compact(provider.clone(), messages_to_compact).await?;

let (summary_message, summarization_usage) = match summary {
Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)),
None => {
// No summary was generated (empty input)
tracing::warn!("Summarization failed. Returning empty messages.");
return Ok((false, Conversation::empty(), vec![], None));
return Ok((Conversation::empty(), vec![], None));
}
};

Expand All @@ -153,7 +114,7 @@ pub async fn check_and_compact_messages(
let mut final_token_counts = Vec::new();

// Add all original messages with updated visibility (preserve user_visible, set agent_visible=false)
for msg in messages.iter().cloned() {
for msg in messages_to_compact.iter().cloned() {
let updated_metadata = msg.metadata.with_agent_invisible();
let updated_msg = msg.with_metadata(updated_metadata);
final_messages.push(updated_msg);
Expand Down Expand Up @@ -198,34 +159,33 @@ Just continue the conversation naturally based on the summarized context"
}

Ok((
true,
Conversation::new_unvalidated(final_messages),
final_token_counts,
summarization_usage,
))
}

/// Check if messages need compaction without performing the compaction
/// Check if messages exceed the auto-compaction threshold
///
/// This function analyzes the current token usage and returns detailed information
/// about whether compaction is needed and how close we are to the threshold.
/// It prioritizes actual token counts from session metadata when available,
/// falling back to estimated counts if needed.
/// This function analyzes the current token usage and returns whether compaction
/// is needed based on the configured threshold. It prioritizes actual token counts
/// from session metadata when available, falling back to estimated counts if needed.
///
/// # Arguments
/// * `agent` - The agent to use for context management
/// * `messages` - The current message history
/// * `conversation` - The current conversation history
/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config)
/// * `session_metadata` - Optional session metadata containing actual token counts
///
/// # Returns
/// * `CompactionCheckResult` containing detailed information about compaction needs
async fn check_compaction_needed(
/// * `CompactionCheckResult` containing whether compaction is needed and usage details
pub async fn check_if_compaction_needed(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't we just return a boolean here? also delete the intro comments

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah good call, I don't think we're actually doing anything interesting with CompactionCheckResult. Mostly just want to display the GOOSE_AUTO_COMPACT_THRESHOLD that we exceeded.

agent: &Agent,
messages: &[Message],
conversation: &Conversation,
threshold_override: Option<f64>,
session_metadata: Option<&crate::session::Session>,
) -> Result<CompactionCheckResult> {
let messages = conversation.messages();
let config = Config::global();
// TODO(Douwe): check the default here; it seems to reset to 0.3 sometimes
let threshold = threshold_override.unwrap_or_else(|| {
Expand Down
Loading