diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs
index ea32ef089c53..133b3aef4886 100644
--- a/crates/goose-cli/src/session/mod.rs
+++ b/crates/goose-cli/src/session/mod.rs
@@ -655,26 +655,11 @@ impl CliSession {
                         println!("{}", console::style("Summarizing conversation...").yellow());
                         output::show_thinking();
 
-                        // Get the provider for summarization
-                        let _provider = self.agent.provider().await?;
-
-                        // Get session metadata if available
-                        let session_metadata_for_compact =
-                            if let Some(ref session_id) = self.session_id {
-                                SessionManager::get_session(session_id, false).await.ok()
-                            } else {
-                                None
-                            };
-
-                        // Call the summarize_context method
-                        let (_, summarized_messages, _token_counts, summarization_usage) =
-                            goose::context_mgmt::check_and_compact_messages(
+                        let (summarized_messages, _token_counts, summarization_usage) =
+                            goose::context_mgmt::compact_messages(
                                 &self.agent,
-                                self.messages.messages(),
-                                true,
+                                &self.messages,
                                 false,
-                                None,
-                                session_metadata_for_compact.as_ref(),
                             )
                             .await?;
 
diff --git a/crates/goose-server/src/routes/context.rs b/crates/goose-server/src/routes/context.rs
index 63dd203af612..97f964280269 100644
--- a/crates/goose-server/src/routes/context.rs
+++ b/crates/goose-server/src/routes/context.rs
@@ -1,6 +1,6 @@
 use crate::state::AppState;
 use axum::{extract::State, http::StatusCode, routing::post, Json, Router};
-use goose::conversation::message::Message;
+use goose::conversation::{message::Message, Conversation};
 use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 use utoipa::ToSchema;
@@ -46,16 +46,12 @@ async fn manage_context(
 ) -> Result<Json<ContextManageResponse>, StatusCode> {
     let agent = state.get_agent_for_route(request.session_id).await?;
 
-    let (_, processed_messages, token_counts, _) = goose::context_mgmt::check_and_compact_messages(
-        &agent,
-        &request.messages,
-        true,
-        false,
-        None,
-        None,
-    )
-    .await
-    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+    let conversation = Conversation::new_unvalidated(request.messages);
+
+    let (processed_messages, token_counts, _) =
+        goose::context_mgmt::compact_messages(&agent, &conversation, false)
+            .await
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
     // TODO(Douwe): store into db
 
     Ok(Json(ContextManageResponse {
diff --git a/crates/goose/src/agents/agent.rs b/crates/goose/src/agents/agent.rs
index 4a31d7826cb1..b2ade9ea89fb 100644
--- a/crates/goose/src/agents/agent.rs
+++ b/crates/goose/src/agents/agent.rs
@@ -33,7 +33,7 @@ use crate::agents::tool_router_index_manager::ToolRouterIndexManager;
 use crate::agents::types::SessionConfig;
 use crate::agents::types::{FrontendTool, ToolResultReceiver};
 use crate::config::{get_enabled_extensions, get_extension_by_name, Config};
-use crate::context_mgmt::{check_and_compact_messages, DEFAULT_COMPACTION_THRESHOLD};
+use crate::context_mgmt::DEFAULT_COMPACTION_THRESHOLD;
 use crate::conversation::{debug_conversation_fix, fix_conversation, Conversation};
 use crate::mcp_utils::ToolResult;
 use crate::permission::permission_inspector::PermissionInspector;
@@ -918,22 +918,29 @@ impl Agent {
             None
         };
 
-        let (did_compact, compacted_conversation, compaction_error) =
-            match check_and_compact_messages(
-                self,
-                unfixed_conversation.messages(),
-                false,
-                false,
-                None,
-                session_metadata.as_ref(),
-            )
-            .await
-            {
-                Ok((did_compact, conversation, _removed_indices, _summarization_usage)) => {
-                    (did_compact, conversation, None)
+        let check_result = crate::context_mgmt::check_if_compaction_needed(
+            self,
+            &unfixed_conversation,
+            None,
+            session_metadata.as_ref(),
+        )
+        .await;
+
+        let (did_compact, compacted_conversation, compaction_error) = match check_result {
+            // TODO(dkatz): send a notification that we are starting compaction here.
+            Ok(true) => {
+                match crate::context_mgmt::compact_messages(self, &unfixed_conversation, false)
+                    .await
+                {
+                    Ok((conversation, _token_counts, _summarization_usage)) => {
+                        (true, conversation, None)
+                    }
+                    Err(e) => (false, unfixed_conversation.clone(), Some(e)),
                 }
-                Err(e) => (false, unfixed_conversation.clone(), Some(e)),
-            };
+            }
+            Ok(false) => (false, unfixed_conversation, None),
+            Err(e) => (false, unfixed_conversation.clone(), Some(e)),
+        };
 
         if did_compact {
             // Get threshold from config to include in message
@@ -970,7 +977,7 @@ impl Agent {
                 ));
             }))
         } else {
-            self.reply_internal(unfixed_conversation, session, cancel_token)
+            self.reply_internal(compacted_conversation, session, cancel_token)
                 .await
         }
     }
@@ -1297,15 +1304,9 @@ impl Agent {
                         Err(ProviderError::ContextLengthExceeded(_error_msg)) => {
                             info!("Context length exceeded, attempting compaction");
 
-                            // Get session metadata if available
-                            let session_metadata_for_compact = if let Some(ref session_config) = session {
-                                SessionManager::get_session(&session_config.id, false).await.ok()
-                            } else {
-                                None
-                            };
-
-                            match check_and_compact_messages(self, conversation.messages(), true, true, None, session_metadata_for_compact.as_ref()).await {
-                                Ok((_did_compact, compacted_conversation, _removed_indices, _usage)) => {
+                            // TODO(dkatz): send a notification that we are starting compaction here.
+                            match crate::context_mgmt::compact_messages(self, &conversation, true).await {
+                                Ok((compacted_conversation, _token_counts, _usage)) => {
                                     conversation = compacted_conversation;
                                     did_recovery_compact_this_iteration = true;
 
diff --git a/crates/goose/src/context_mgmt/mod.rs b/crates/goose/src/context_mgmt/mod.rs
index 8b2c73c3feac..a56c87006d1f 100644
--- a/crates/goose/src/context_mgmt/mod.rs
+++ b/crates/goose/src/context_mgmt/mod.rs
@@ -12,135 +12,67 @@ use tracing::{debug, info};
 
 pub const DEFAULT_COMPACTION_THRESHOLD: f64 = 0.8;
 
-/// Result of auto-compaction check
-#[derive(Debug)]
-pub struct AutoCompactResult {
-    /// Whether compaction was performed
-    pub compacted: bool,
-    /// The messages after potential compaction
-    pub messages: Conversation,
-    /// Provider usage from summarization (if compaction occurred)
-    /// This contains the actual token counts after compaction
-    pub summarization_usage: Option<crate::providers::base::ProviderUsage>,
-}
-
-/// Result of checking if compaction is needed
-#[derive(Debug)]
-pub struct CompactionCheckResult {
-    /// Whether compaction is needed
-    pub needs_compaction: bool,
-    /// Current token count
-    pub current_tokens: usize,
-    /// Context limit being used
-    pub context_limit: usize,
-    /// Current usage ratio (0.0 to 1.0)
-    pub usage_ratio: f64,
-    /// Remaining tokens before compaction threshold
-    pub remaining_tokens: usize,
-    /// Percentage until compaction threshold (0.0 to 100.0)
-    pub percentage_until_compaction: f64,
-}
-
 #[derive(Serialize)]
 struct SummarizeContext {
     messages: String,
 }
 
-/// Check if messages need compaction and compact them if necessary
+/// Compact messages by summarizing them
 ///
-/// This function combines checking and compaction. It first checks if compaction
-/// is needed based on the threshold, and if so, performs the compaction by
-/// summarizing messages and updating their visibility metadata.
+/// This function performs the actual compaction by summarizing messages and updating
+/// their visibility metadata. It does not check thresholds - use `check_if_compaction_needed`
+/// first to determine if compaction is necessary.
 ///
 /// # Arguments
 /// * `agent` - The agent to use for context management
-/// * `messages` - The current message history
-/// * `force_compact` - If true, skip the threshold check and force compaction
+/// * `conversation` - The current conversation history
 /// * `preserve_last_user_message` - If true and last message is not a user message, copy the most recent user message to the end
-/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config)
-/// * `session_metadata` - Optional session metadata containing actual token counts
 ///
 /// # Returns
 /// * A tuple containing:
-///   - `bool`: Whether compaction was performed
-///   - `Conversation`: The potentially compacted messages
-///   - `Vec<usize>`: Indices of removed messages (empty if no compaction)
-///   - `Option<ProviderUsage>`: Provider usage from summarization (if compaction occurred)
-pub async fn check_and_compact_messages(
+///   - `Conversation`: The compacted messages
+///   - `Vec<usize>`: Token counts for each message
+///   - `Option<ProviderUsage>`: Provider usage from summarization
+pub async fn compact_messages(
     agent: &Agent,
-    messages_with_user_message: &[Message],
-    force_compact: bool,
+    conversation: &Conversation,
     preserve_last_user_message: bool,
-    threshold_override: Option<f64>,
-    session_metadata: Option<&crate::session::Session>,
-) -> std::result::Result<(bool, Conversation, Vec<usize>, Option<ProviderUsage>), anyhow::Error> {
-    if !force_compact {
-        let check_result = check_compaction_needed(
-            agent,
-            messages_with_user_message,
-            threshold_override,
-            session_metadata,
-        )
-        .await?;
+) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> {
+    info!("Performing message compaction");
 
-        // If no compaction is needed, return early
-        if !check_result.needs_compaction {
-            debug!(
-                "No compaction needed (usage: {:.1}% <= {:.1}% threshold)",
-                check_result.usage_ratio * 100.0,
-                check_result.percentage_until_compaction
-            );
-            return Ok((
-                false,
-                Conversation::new_unvalidated(messages_with_user_message.to_vec()),
-                Vec::new(),
-                None,
-            ));
-        }
-
-        info!(
-            "Performing message compaction (usage: {:.1}%)",
-            check_result.usage_ratio * 100.0
-        );
-    } else {
-        info!("Forcing message compaction due to context limit exceeded");
-    }
+    let messages = conversation.messages();
 
-    // Perform the actual compaction
     // Check if the most recent message is a user message
-    let (messages, preserved_user_message) =
-        if let Some(last_message) = messages_with_user_message.last() {
-            if matches!(last_message.role, rmcp::model::Role::User) {
-                // Remove the last user message before compaction
-                (
-                    &messages_with_user_message[..messages_with_user_message.len() - 1],
-                    Some(last_message.clone()),
-                )
-            } else if preserve_last_user_message {
-                // Last message is not a user message, but we want to preserve the most recent user message
-                // Find the most recent user message and copy it (don't remove from history)
-                let most_recent_user_message = messages_with_user_message
-                    .iter()
-                    .rev()
-                    .find(|msg| matches!(msg.role, rmcp::model::Role::User))
-                    .cloned();
-                (messages_with_user_message, most_recent_user_message)
-            } else {
-                (messages_with_user_message, None)
-            }
+    let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last()
+    {
+        if matches!(last_message.role, rmcp::model::Role::User) {
+            // Remove the last user message before compaction
+            (&messages[..messages.len() - 1], Some(last_message.clone()))
+        } else if preserve_last_user_message {
+            // Last message is not a user message, but we want to preserve the most recent user message
+            // Find the most recent user message and copy it (don't remove from history)
+            let most_recent_user_message = messages
+                .iter()
+                .rev()
+                .find(|msg| matches!(msg.role, rmcp::model::Role::User))
+                .cloned();
+            (messages.as_slice(), most_recent_user_message)
         } else {
-            (messages_with_user_message, None)
-        };
+            (messages.as_slice(), None)
+        }
+    } else {
+        (messages.as_slice(), None)
+    };
 
     let provider = agent.provider().await?;
-    let summary = do_compact(provider.clone(), messages).await?;
+    let summary = do_compact(provider.clone(), messages_to_compact).await?;
 
     let (summary_message, summarization_usage) = match summary {
         Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)),
         None => {
             // No summary was generated (empty input)
             tracing::warn!("Summarization failed. Returning empty messages.");
-            return Ok((false, Conversation::empty(), vec![], None));
+            return Ok((Conversation::empty(), vec![], None));
         }
     };
 
@@ -153,7 +85,7 @@ pub async fn check_and_compact_messages(
     let mut final_token_counts = Vec::new();
 
     // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false)
-    for msg in messages.iter().cloned() {
+    for msg in messages_to_compact.iter().cloned() {
         let updated_metadata = msg.metadata.with_agent_invisible();
         let updated_msg = msg.with_metadata(updated_metadata);
         final_messages.push(updated_msg);
@@ -198,34 +130,20 @@ Just continue the conversation naturally based on the summarized context"
     }
 
     Ok((
-        true,
         Conversation::new_unvalidated(final_messages),
         final_token_counts,
         summarization_usage,
     ))
 }
 
-/// Check if messages need compaction without performing the compaction
-///
-/// This function analyzes the current token usage and returns detailed information
-/// about whether compaction is needed and how close we are to the threshold.
-/// It prioritizes actual token counts from session metadata when available,
-/// falling back to estimated counts if needed.
-///
-/// # Arguments
-/// * `agent` - The agent to use for context management
-/// * `messages` - The current message history
-/// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config)
-/// * `session_metadata` - Optional session metadata containing actual token counts
-///
-/// # Returns
-/// * `CompactionCheckResult` containing detailed information about compaction needs
-async fn check_compaction_needed(
+/// Check if messages exceed the auto-compaction threshold
+pub async fn check_if_compaction_needed(
     agent: &Agent,
-    messages: &[Message],
+    conversation: &Conversation,
     threshold_override: Option<f64>,
     session_metadata: Option<&crate::session::Session>,
-) -> Result<CompactionCheckResult> {
+) -> Result<bool> {
+    let messages = conversation.messages();
     let config = Config::global();
     // TODO(Douwe): check the default here; it seems to reset to 0.3 sometimes
     let threshold = threshold_override.unwrap_or_else(|| {
@@ -256,15 +174,6 @@ async fn check_compaction_needed(
 
     let usage_ratio = current_tokens as f64 / context_limit as f64;
 
-    let threshold_tokens = (context_limit as f64 * threshold) as usize;
-    let remaining_tokens = threshold_tokens.saturating_sub(current_tokens);
-
-    let percentage_until_compaction = if usage_ratio < threshold {
-        (threshold - usage_ratio) * 100.0
-    } else {
-        0.0
-    };
-
     let needs_compaction = if threshold <= 0.0 || threshold >= 1.0 {
         usage_ratio > DEFAULT_COMPACTION_THRESHOLD
     } else {
@@ -281,14 +190,7 @@ async fn check_compaction_needed(
         token_source
     );
 
-    Ok(CompactionCheckResult {
-        needs_compaction,
-        current_tokens,
-        context_limit,
-        usage_ratio,
-        remaining_tokens,
-        percentage_until_compaction,
-    })
+    Ok(needs_compaction)
 }
 
 async fn do_compact(