block · katzdave · Nov 3, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/crates/goose-cli/src/session/input.rs b/crates/goose-cli/src/session/input.rs
@@ -20,7 +20,7 @@ pub enum InputResult {
     EndPlan,
     Clear,
     Recipe(Option<String>),
-    Summarize,
+    Compact,
 }
 
 #[derive(Debug)]
@@ -120,7 +120,8 @@ fn handle_slash_command(input: &str) -> Option<InputResult> {
     const CMD_ENDPLAN: &str = "/endplan";
     const CMD_CLEAR: &str = "/clear";
     const CMD_RECIPE: &str = "/recipe";
-    const CMD_SUMMARIZE: &str = "/summarize";
+    const CMD_COMPACT: &str = "/compact";
+    const CMD_SUMMARIZE_DEPRECATED: &str = "/summarize";
 
     match input {
         "/exit" | "/quit" => Some(InputResult::Exit),
@@ -180,7 +181,11 @@ fn handle_slash_command(input: &str) -> Option<InputResult> {
         s if s == CMD_ENDPLAN => Some(InputResult::EndPlan),
         s if s == CMD_CLEAR => Some(InputResult::Clear),
         s if s.starts_with(CMD_RECIPE) => parse_recipe_command(s),
-        s if s == CMD_SUMMARIZE => Some(InputResult::Summarize),
+        s if s == CMD_COMPACT => Some(InputResult::Compact),
+        s if s == CMD_SUMMARIZE_DEPRECATED => {
+            println!("{}", console::style("⚠️  Note: /summarize has been renamed to /compact and will be removed in a future release.").yellow());
+            Some(InputResult::Compact)
+        }
         _ => None,
     }
 }
@@ -305,7 +310,7 @@ fn print_help() {
 /endplan - Exit plan mode and return to 'normal' goose mode.
 /recipe [filepath] - Generate a recipe from the current conversation and save it to the specified filepath (must end with .yaml).
                        If no filepath is provided, it will be saved to ./recipe.yaml.
-/summarize - Summarize the current conversation to reduce context length while preserving key information.
+/compact - Compact the current conversation to reduce context length while preserving key information.
 /? or /help - Display this help message
 /clear - Clears the current chat history
 
@@ -541,17 +546,6 @@ mod tests {
         assert!(matches!(result, Some(InputResult::Retry)));
     }
 
-    #[test]
-    fn test_summarize_command() {
-        // Test the summarize command
-        let result = handle_slash_command("/summarize");
-        assert!(matches!(result, Some(InputResult::Summarize)));
-
-        // Test with whitespace
-        let result = handle_slash_command("  /summarize  ");
-        assert!(matches!(result, Some(InputResult::Summarize)));
-    }
-
     #[test]
     fn test_get_input_prompt_string() {
         let prompt = get_input_prompt_string();

diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs
@@ -28,7 +28,7 @@ use anyhow::{Context, Result};
 use completion::GooseCompleter;
 use goose::agents::extension::{Envs, ExtensionConfig};
 use goose::agents::types::RetryConfig;
-use goose::agents::{Agent, SessionConfig};
+use goose::agents::{Agent, SessionConfig, MANUAL_COMPACT_TRIGGER};
 use goose::config::{Config, GooseMode};
 use goose::providers::pricing::initialize_pricing_cache;
 use goose::session::SessionManager;
@@ -641,98 +641,30 @@ impl CliSession {
 
                     continue;
                 }
-                InputResult::Summarize => {
+                InputResult::Compact => {
                     save_history(&mut editor);
 
-                    let prompt = "Are you sure you want to summarize this conversation? This will condense the message history.";
+                    let prompt = "Are you sure you want to compact this conversation? This will condense the message history.";
                     let should_summarize =
                         match cliclack::confirm(prompt).initial_value(true).interact() {
                             Ok(choice) => choice,
                             Err(e) => {
                                 if e.kind() == std::io::ErrorKind::Interrupted {
-                                    false // If interrupted, set should_summarize to false
+                                    false
                                 } else {
                                     return Err(e.into());
                                 }
                             }
                         };
 
                     if should_summarize {
-                        println!("{}", console::style("Summarizing conversation...").yellow());
+                        self.push_message(Message::user().with_text(MANUAL_COMPACT_TRIGGER));
                         output::show_thinking();
-
-                        let (summarized_messages, _token_counts, summarization_usage) =
-                            goose::context_mgmt::compact_messages(
-                                &self.agent,
-                                &self.messages,
-                                false,
-                            )
+                        self.process_agent_response(true, CancellationToken::default())
                             .await?;
-
-                        // Update the session messages with the summarized ones
-                        self.messages = summarized_messages.clone();
-
-                        // Persist the summarized messages and update session metadata
-                        if let Some(session_id) = &self.session_id {
-                            // Replace all messages with the summarized version
-                            SessionManager::replace_conversation(session_id, &summarized_messages)
-                                .await?;
-
-                            // Update session metadata with the new token counts from summarization
-                            if let Some(usage) = summarization_usage {
-                                let session =
-                                    SessionManager::get_session(session_id, false).await?;
-
-                                // Update token counts with the summarization usage
-                                let summary_tokens = usage.usage.output_tokens.unwrap_or(0);
-
-                                // Update accumulated tokens (add the summarization cost)
-                                let accumulate = |a: Option<i32>, b: Option<i32>| -> Option<i32> {
-                                    match (a, b) {
-                                        (Some(x), Some(y)) => Some(x + y),
-                                        _ => a.or(b),
-                                    }
-                                };
-
-                                let accumulated_total = accumulate(
-                                    session.accumulated_total_tokens,
-                                    usage.usage.total_tokens,
-                                );
-                                let accumulated_input = accumulate(
-                                    session.accumulated_input_tokens,
-                                    usage.usage.input_tokens,
-                                );
-                                let accumulated_output = accumulate(
-                                    session.accumulated_output_tokens,
-                                    usage.usage.output_tokens,
-                                );
-
-                                SessionManager::update_session(session_id)
-                                    .total_tokens(Some(summary_tokens))
-                                    .input_tokens(None)
-                                    .output_tokens(Some(summary_tokens))
-                                    .accumulated_total_tokens(accumulated_total)
-                                    .accumulated_input_tokens(accumulated_input)
-                                    .accumulated_output_tokens(accumulated_output)
-                                    .apply()
-                                    .await?;
-                            }
-                        }
-
                         output::hide_thinking();
-                        println!(
-                            "{}",
-                            console::style("Conversation has been summarized.").green()
-                        );
-                        println!(
-                            "{}",
-                            console::style(
-                                "Key information has been preserved while reducing context length."
-                            )
-                            .green()
-                        );
                     } else {
-                        println!("{}", console::style("Summarization cancelled.").yellow());
+                        println!("{}", console::style("Compaction cancelled.").yellow());
                     }
                     continue;
                 }

diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs
@@ -65,7 +65,7 @@ use crate::session::SessionManager;
 
 const DEFAULT_MAX_TURNS: u32 = 1000;
 const COMPACTION_THINKING_TEXT: &str = "goose is compacting the conversation...";
-const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation";
+pub const MANUAL_COMPACT_TRIGGER: &str = "Please compact this conversation";
 
 /// Context needed for the reply function
 pub struct ReplyContext {
@@ -803,9 +803,10 @@ impl Agent {
             );
 
             match crate::context_mgmt::compact_messages(self, &conversation_to_compact, false).await {
-                Ok((compacted_conversation, _token_counts, _summarization_usage)) => {
+                Ok((compacted_conversation, summarization_usage)) => {
                     if let Some(session_to_store) = &session {
                         SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?;
+                        Self::update_session_metrics(session_to_store, &summarization_usage, true).await?;
                     }
 
                     yield AgentEvent::HistoryReplaced(compacted_conversation.clone());
@@ -991,7 +992,7 @@ impl Agent {
                             // Record usage for the session
                             if let Some(ref session_config) = &session {
                                 if let Some(ref usage) = usage {
-                                    Self::update_session_metrics(session_config, usage).await?;
+                                    Self::update_session_metrics(session_config, usage, false).await?;
                                 }
                             }
 
@@ -1166,9 +1167,10 @@ impl Agent {
                             );
 
                             match crate::context_mgmt::compact_messages(self, &conversation, true).await {
-                                Ok((compacted_conversation, _token_counts, _usage)) => {
+                                Ok((compacted_conversation, usage)) => {
                                     if let Some(session_to_store) = &session {
-                                        SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?
+                                        SessionManager::replace_conversation(&session_to_store.id, &compacted_conversation).await?;
+                                        Self::update_session_metrics(session_to_store, &usage, true).await?;
                                     }
 
                                     conversation = compacted_conversation;

diff --git a/crates/goose/src/agents/mod.rs b/crates/goose/src/agents/mod.rs
@@ -26,7 +26,7 @@ mod tool_route_manager;
 mod tool_router_index_manager;
 pub mod types;
 
-pub use agent::{Agent, AgentEvent};
+pub use agent::{Agent, AgentEvent, MANUAL_COMPACT_TRIGGER};
 pub use extension::ExtensionConfig;
 pub use extension_manager::ExtensionManager;
 pub use prompt_manager::PromptManager;

diff --git a/crates/goose/src/agents/reply_parts.rs b/crates/goose/src/agents/reply_parts.rs
@@ -255,6 +255,7 @@ impl Agent {
     pub(crate) async fn update_session_metrics(
         session_config: &crate::agents::types::SessionConfig,
         usage: &ProviderUsage,
+        is_compaction_usage: bool,
     ) -> Result<()> {
         let session_id = session_config.id.as_str();
         let session = SessionManager::get_session(session_id, false).await?;
@@ -273,11 +274,23 @@ impl Agent {
         let accumulated_output =
             accumulate(session.accumulated_output_tokens, usage.usage.output_tokens);
 
+        let (current_total, current_input, current_output) = if is_compaction_usage {
+            // After compaction: summary output becomes new input context
+            let new_input = usage.usage.output_tokens.map(|out| out);
-            let new_input = usage.usage.output_tokens.map(|out| out);
+            let new_input = usage.usage.output_tokens;
-            let new_input = usage.usage.output_tokens.map(|out| out);
+            let new_input = usage.usage.output_tokens;
+            (new_input, new_input, None)
+        } else {
+            (
+                usage.usage.total_tokens,
+                usage.usage.input_tokens,
+                usage.usage.output_tokens,
+            )
+        };
+
         SessionManager::update_session(session_id)
             .schedule_id(session_config.schedule_id.clone())
-            .total_tokens(usage.usage.total_tokens)
-            .input_tokens(usage.usage.input_tokens)
-            .output_tokens(usage.usage.output_tokens)
+            .total_tokens(current_total)
+            .input_tokens(current_input)
+            .output_tokens(current_output)
             .accumulated_total_tokens(accumulated_total)
             .accumulated_input_tokens(accumulated_input)
             .accumulated_output_tokens(accumulated_output)

diff --git a/crates/goose/src/context_mgmt/mod.rs b/crates/goose/src/context_mgmt/mod.rs
@@ -31,13 +31,12 @@ struct SummarizeContext {
 /// # Returns
 /// * A tuple containing:
 ///   - `Conversation`: The compacted messages
-///   - `Vec<usize>`: Token counts for each message
-///   - `Option<ProviderUsage>`: Provider usage from summarization
+///   - `ProviderUsage`: Provider usage from summarization
 pub async fn compact_messages(
     agent: &Agent,
     conversation: &Conversation,
     preserve_last_user_message: bool,
-) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> {
+) -> Result<(Conversation, ProviderUsage)> {
     info!("Performing message compaction");
 
     let messages = conversation.messages();
@@ -99,44 +98,26 @@ pub async fn compact_messages(
     };
 
     let provider = agent.provider().await?;
-    let summary = do_compact(provider.clone(), messages_to_compact).await?;
-
-    let (summary_message, summarization_usage) = match summary {
-        Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)),
-        None => {
-            // No summary was generated (empty input)
-            tracing::warn!("Summarization failed. Returning empty messages.");
-            return Ok((Conversation::empty(), vec![], None));
-        }
-    };
+    let (summary_message, summarization_usage) =
+        do_compact(provider.clone(), messages_to_compact).await?;
 
     // Create the final message list with updated visibility metadata:
     // 1. Original messages become user_visible but not agent_visible
     // 2. Summary message becomes agent_visible but not user_visible
     // 3. Assistant messages to continue the conversation remain both user_visible and agent_visible
 
     let mut final_messages = Vec::new();
-    let mut final_token_counts = Vec::new();
 
     // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false)
     for msg in messages_to_compact.iter().cloned() {
         let updated_metadata = msg.metadata.with_agent_invisible();
         let updated_msg = msg.with_metadata(updated_metadata);
         final_messages.push(updated_msg);
-        // Token count doesn't matter for agent_visible=false messages, but we'll use 0
-        final_token_counts.push(0);
     }
 
     // Add the summary message (agent_visible=true, user_visible=false)
     let summary_msg = summary_message.with_metadata(MessageMetadata::agent_only());
-    // For token counting purposes, we use the output tokens (the actual summary content)
-    // since that's what will be in the context going forward
-    let summary_tokens = summarization_usage
-        .as_ref()
-        .and_then(|usage| usage.usage.output_tokens)
-        .unwrap_or(0) as usize;
     final_messages.push(summary_msg);
-    final_token_counts.push(summary_tokens);
 
     // Add an assistant message to continue the conversation (agent_visible=true, user_visible=false)
     let assistant_message = Message::assistant()
@@ -146,9 +127,7 @@ Do not mention that you read a summary or that conversation summarization occurr
 Just continue the conversation naturally based on the summarized context"
         )
         .with_metadata(MessageMetadata::agent_only());
-    let assistant_message_tokens: usize = 0; // Not counted since it's for agent context only
     final_messages.push(assistant_message);
-    final_token_counts.push(assistant_message_tokens);
 
     // Add back the preserved user message if it exists
     if let Some(user_text) = preserved_user_text {
@@ -157,7 +136,6 @@ Just continue the conversation naturally based on the summarized context"
 
     Ok((
         Conversation::new_unvalidated(final_messages),
-        final_token_counts,
         summarization_usage,
     ))
 }
@@ -222,7 +200,7 @@ pub async fn check_if_compaction_needed(
 async fn do_compact(
     provider: Arc<dyn Provider>,
     messages: &[Message],
-) -> Result<Option<(Message, ProviderUsage)>, anyhow::Error> {
+) -> Result<(Message, ProviderUsage), anyhow::Error> {
     let agent_visible_messages: Vec<&Message> = messages
         .iter()
         .filter(|msg| msg.is_agent_visible())
@@ -255,7 +233,7 @@ async fn do_compact(
         .await
         .map_err(|e| anyhow::anyhow!("Failed to ensure usage tokens: {}", e))?;
 
-    Ok(Some((response, provider_usage)))
+    Ok((response, provider_usage))
 }
 
 fn format_message_for_compacting(msg: &Message) -> String {