Skip to content
24 changes: 9 additions & 15 deletions crates/goose-cli/src/session/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub enum InputResult {
EndPlan,
Clear,
Recipe(Option<String>),
Summarize,
Compact,
}

#[derive(Debug)]
Expand Down Expand Up @@ -120,7 +120,8 @@ fn handle_slash_command(input: &str) -> Option<InputResult> {
const CMD_ENDPLAN: &str = "/endplan";
const CMD_CLEAR: &str = "/clear";
const CMD_RECIPE: &str = "/recipe";
const CMD_SUMMARIZE: &str = "/summarize";
const CMD_COMPACT: &str = "/compact";
const CMD_SUMMARIZE_DEPRECATED: &str = "/summarize";

match input {
"/exit" | "/quit" => Some(InputResult::Exit),
Expand Down Expand Up @@ -180,7 +181,11 @@ fn handle_slash_command(input: &str) -> Option<InputResult> {
s if s == CMD_ENDPLAN => Some(InputResult::EndPlan),
s if s == CMD_CLEAR => Some(InputResult::Clear),
s if s.starts_with(CMD_RECIPE) => parse_recipe_command(s),
s if s == CMD_SUMMARIZE => Some(InputResult::Summarize),
s if s == CMD_COMPACT => Some(InputResult::Compact),
s if s == CMD_SUMMARIZE_DEPRECATED => {
println!("{}", console::style("⚠️ Note: /summarize has been renamed to /compact and will be removed in a future release.").yellow());
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The deprecation message string is very long (over 100 characters). Consider breaking it into multiple lines or storing it as a constant for better readability and maintainability.

Copilot uses AI. Check for mistakes.
Some(InputResult::Compact)
}
_ => None,
}
}
Expand Down Expand Up @@ -305,7 +310,7 @@ fn print_help() {
/endplan - Exit plan mode and return to 'normal' goose mode.
/recipe [filepath] - Generate a recipe from the current conversation and save it to the specified filepath (must end with .yaml).
If no filepath is provided, it will be saved to ./recipe.yaml.
/summarize - Summarize the current conversation to reduce context length while preserving key information.
/compact - Compact the current conversation to reduce context length while preserving key information.
/? or /help - Display this help message
/clear - Clears the current chat history

Expand Down Expand Up @@ -541,17 +546,6 @@ mod tests {
assert!(matches!(result, Some(InputResult::Retry)));
}

#[test]
fn test_summarize_command() {
// Test the summarize command
let result = handle_slash_command("/summarize");
assert!(matches!(result, Some(InputResult::Summarize)));

// Test with whitespace
let result = handle_slash_command(" /summarize ");
assert!(matches!(result, Some(InputResult::Summarize)));
}

#[test]
fn test_get_input_prompt_string() {
let prompt = get_input_prompt_string();
Expand Down
82 changes: 7 additions & 75 deletions crates/goose-cli/src/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use anyhow::{Context, Result};
use completion::GooseCompleter;
use goose::agents::extension::{Envs, ExtensionConfig};
use goose::agents::types::RetryConfig;
use goose::agents::{Agent, SessionConfig};
use goose::agents::{Agent, SessionConfig, MANUAL_COMPACT_TRIGGER};
use goose::config::{Config, GooseMode};
use goose::providers::pricing::initialize_pricing_cache;
use goose::session::SessionManager;
Expand Down Expand Up @@ -641,98 +641,30 @@ impl CliSession {

continue;
}
InputResult::Summarize => {
InputResult::Compact => {
save_history(&mut editor);

let prompt = "Are you sure you want to summarize this conversation? This will condense the message history.";
let prompt = "Are you sure you want to compact this conversation? This will condense the message history.";
let should_summarize =
match cliclack::confirm(prompt).initial_value(true).interact() {
Ok(choice) => choice,
Err(e) => {
if e.kind() == std::io::ErrorKind::Interrupted {
false // If interrupted, set should_summarize to false
false
} else {
return Err(e.into());
}
}
};

if should_summarize {
println!("{}", console::style("Summarizing conversation...").yellow());
self.push_message(Message::user().with_text(MANUAL_COMPACT_TRIGGER));
output::show_thinking();

let (summarized_messages, _token_counts, summarization_usage) =
goose::context_mgmt::compact_messages(
&self.agent,
&self.messages,
false,
)
self.process_agent_response(true, CancellationToken::default())
.await?;

// Update the session messages with the summarized ones
self.messages = summarized_messages.clone();

// Persist the summarized messages and update session metadata
if let Some(session_id) = &self.session_id {
// Replace all messages with the summarized version
SessionManager::replace_conversation(session_id, &summarized_messages)
.await?;

// Update session metadata with the new token counts from summarization
if let Some(usage) = summarization_usage {
let session =
SessionManager::get_session(session_id, false).await?;

// Update token counts with the summarization usage
let summary_tokens = usage.usage.output_tokens.unwrap_or(0);

// Update accumulated tokens (add the summarization cost)
let accumulate = |a: Option<i32>, b: Option<i32>| -> Option<i32> {
match (a, b) {
(Some(x), Some(y)) => Some(x + y),
_ => a.or(b),
}
};

let accumulated_total = accumulate(
session.accumulated_total_tokens,
usage.usage.total_tokens,
);
let accumulated_input = accumulate(
session.accumulated_input_tokens,
usage.usage.input_tokens,
);
let accumulated_output = accumulate(
session.accumulated_output_tokens,
usage.usage.output_tokens,
);

SessionManager::update_session(session_id)
.total_tokens(Some(summary_tokens))
.input_tokens(None)
.output_tokens(Some(summary_tokens))
.accumulated_total_tokens(accumulated_total)
.accumulated_input_tokens(accumulated_input)
.accumulated_output_tokens(accumulated_output)
.apply()
.await?;
}
}

output::hide_thinking();
println!(
"{}",
console::style("Conversation has been summarized.").green()
);
println!(
"{}",
console::style(
"Key information has been preserved while reducing context length."
)
.green()
);
} else {
println!("{}", console::style("Summarization cancelled.").yellow());
println!("{}", console::style("Compaction cancelled.").yellow());
}
continue;
}
Expand Down
12 changes: 7 additions & 5 deletions crates/goose/src/agents/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ use crate::session::SessionManager;

const DEFAULT_MAX_TURNS: u32 = 1000;
const COMPACTION_THINKING_TEXT: &str = "goose is compacting the conversation...";
const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation";
pub const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation";

/// Context needed for the reply function
pub struct ReplyContext {
Expand Down Expand Up @@ -803,9 +803,10 @@ impl Agent {
);

match crate::context_mgmt::compact_messages(self, &conversation_to_compact, false).await {
Ok((compacted_conversation, _token_counts, _summarization_usage)) => {
Ok((compacted_conversation, summarization_usage)) => {
if let Some(session_to_store) = &session {
SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?;
Self::update_session_metrics(session_to_store, &summarization_usage, true).await?;
}

yield AgentEvent::HistoryReplaced(compacted_conversation.clone());
Expand Down Expand Up @@ -991,7 +992,7 @@ impl Agent {
// Record usage for the session
if let Some(ref session_config) = &session {
if let Some(ref usage) = usage {
Self::update_session_metrics(session_config, usage).await?;
Self::update_session_metrics(session_config, usage, false).await?;
}
}

Expand Down Expand Up @@ -1166,9 +1167,10 @@ impl Agent {
);

match crate::context_mgmt::compact_messages(self, &conversation, true).await {
Ok((compacted_conversation, _token_counts, _usage)) => {
Ok((compacted_conversation, usage)) => {
if let Some(session_to_store) = &session {
SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?
SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?;
Self::update_session_metrics(session_to_store, &usage, true).await?;
}

conversation = compacted_conversation;
Expand Down
2 changes: 1 addition & 1 deletion crates/goose/src/agents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mod tool_route_manager;
mod tool_router_index_manager;
pub mod types;

pub use agent::{Agent, AgentEvent};
pub use agent::{Agent, AgentEvent, MANUAL_COMPACT_TRIGGER};
pub use extension::ExtensionConfig;
pub use extension_manager::ExtensionManager;
pub use prompt_manager::PromptManager;
Expand Down
19 changes: 16 additions & 3 deletions crates/goose/src/agents/reply_parts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ impl Agent {
pub(crate) async fn update_session_metrics(
session_config: &crate::agents::types::SessionConfig,
usage: &ProviderUsage,
is_compaction_usage: bool,
) -> Result<()> {
let session_id = session_config.id.as_str();
let session = SessionManager::get_session(session_id, false).await?;
Expand All @@ -273,11 +274,23 @@ impl Agent {
let accumulated_output =
accumulate(session.accumulated_output_tokens, usage.usage.output_tokens);

let (current_total, current_input, current_output) = if is_compaction_usage {
// After compaction: summary output becomes new input context
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after manual compaction, should this not really just go to zero? have we sent the manually compacted conversation to the model yet or only to the client? although I could see how that would be confusing to a user

it would be good to avoid a hard coded system prompt estimate, just because that could really be all over the place

I suppose we could do something like use the token counts from the first message you send, but that would be complicated to do

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think this one is more of a question of what we want to show the user. Let's try dropping it to the summary size, there will be a bit of whiplash for the user when the system prompt re-enters on the next message, but I don't love that constant either.

let new_input = usage.usage.output_tokens.map(|out| out);
Copy link

Copilot AI Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The identity closure map(|out| out) is redundant. Consider simplifying to just usage.usage.output_tokens directly.

Suggested change
let new_input = usage.usage.output_tokens.map(|out| out);
let new_input = usage.usage.output_tokens;

Copilot uses AI. Check for mistakes.
(new_input, new_input, None)
} else {
(
usage.usage.total_tokens,
usage.usage.input_tokens,
usage.usage.output_tokens,
)
};

SessionManager::update_session(session_id)
.schedule_id(session_config.schedule_id.clone())
.total_tokens(usage.usage.total_tokens)
.input_tokens(usage.usage.input_tokens)
.output_tokens(usage.usage.output_tokens)
.total_tokens(current_total)
.input_tokens(current_input)
.output_tokens(current_output)
.accumulated_total_tokens(accumulated_total)
.accumulated_input_tokens(accumulated_input)
.accumulated_output_tokens(accumulated_output)
Expand Down
34 changes: 6 additions & 28 deletions crates/goose/src/context_mgmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@ struct SummarizeContext {
/// # Returns
/// * A tuple containing:
/// - `Conversation`: The compacted messages
/// - `Vec<usize>`: Token counts for each message
/// - `Option<ProviderUsage>`: Provider usage from summarization
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, I saw we didn't use this. There's some code on the frontend you could clean up with this too

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack will take another pass on the frontend too.

/// - `ProviderUsage`: Provider usage from summarization
pub async fn compact_messages(
agent: &Agent,
conversation: &Conversation,
preserve_last_user_message: bool,
) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> {
) -> Result<(Conversation, ProviderUsage)> {
info!("Performing message compaction");

let messages = conversation.messages();
Expand Down Expand Up @@ -99,44 +98,26 @@ pub async fn compact_messages(
};

let provider = agent.provider().await?;
let summary = do_compact(provider.clone(), messages_to_compact).await?;

let (summary_message, summarization_usage) = match summary {
Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)),
None => {
// No summary was generated (empty input)
tracing::warn!("Summarization failed. Returning empty messages.");
return Ok((Conversation::empty(), vec![], None));
}
};
let (summary_message, summarization_usage) =
do_compact(provider.clone(), messages_to_compact).await?;

// Create the final message list with updated visibility metadata:
// 1. Original messages become user_visible but not agent_visible
// 2. Summary message becomes agent_visible but not user_visible
// 3. Assistant messages to continue the conversation remain both user_visible and agent_visible

let mut final_messages = Vec::new();
let mut final_token_counts = Vec::new();

// Add all original messages with updated visibility (preserve user_visible, set agent_visible=false)
for msg in messages_to_compact.iter().cloned() {
let updated_metadata = msg.metadata.with_agent_invisible();
let updated_msg = msg.with_metadata(updated_metadata);
final_messages.push(updated_msg);
// Token count doesn't matter for agent_visible=false messages, but we'll use 0
final_token_counts.push(0);
}

// Add the summary message (agent_visible=true, user_visible=false)
let summary_msg = summary_message.with_metadata(MessageMetadata::agent_only());
// For token counting purposes, we use the output tokens (the actual summary content)
// since that's what will be in the context going forward
let summary_tokens = summarization_usage
.as_ref()
.and_then(|usage| usage.usage.output_tokens)
.unwrap_or(0) as usize;
final_messages.push(summary_msg);
final_token_counts.push(summary_tokens);

// Add an assistant message to continue the conversation (agent_visible=true, user_visible=false)
let assistant_message = Message::assistant()
Expand All @@ -146,9 +127,7 @@ Do not mention that you read a summary or that conversation summarization occurr
Just continue the conversation naturally based on the summarized context"
)
.with_metadata(MessageMetadata::agent_only());
let assistant_message_tokens: usize = 0; // Not counted since it's for agent context only
final_messages.push(assistant_message);
final_token_counts.push(assistant_message_tokens);

// Add back the preserved user message if it exists
if let Some(user_text) = preserved_user_text {
Expand All @@ -157,7 +136,6 @@ Just continue the conversation naturally based on the summarized context"

Ok((
Conversation::new_unvalidated(final_messages),
final_token_counts,
summarization_usage,
))
}
Expand Down Expand Up @@ -222,7 +200,7 @@ pub async fn check_if_compaction_needed(
async fn do_compact(
provider: Arc<dyn Provider>,
messages: &[Message],
) -> Result<Option<(Message, ProviderUsage)>, anyhow::Error> {
) -> Result<(Message, ProviderUsage), anyhow::Error> {
let agent_visible_messages: Vec<&Message> = messages
.iter()
.filter(|msg| msg.is_agent_visible())
Expand Down Expand Up @@ -255,7 +233,7 @@ async fn do_compact(
.await
.map_err(|e| anyhow::anyhow!("Failed to ensure usage tokens: {}", e))?;

Ok(Some((response, provider_usage)))
Ok((response, provider_usage))
}

fn format_message_for_compacting(msg: &Message) -> String {
Expand Down
Loading