Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 77 additions & 110 deletions crates/goose/src/context_mgmt/auto_compact.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::conversation::message::Message;
use crate::conversation::Conversation;
use crate::conversation::{fix_conversation, Conversation};
use crate::{
agents::Agent, config::Config, context_mgmt::get_messages_token_counts_async,
token_counter::create_async_token_counter,
Expand Down Expand Up @@ -135,21 +135,33 @@ pub async fn check_compaction_needed(
pub async fn perform_compaction(agent: &Agent, messages: &[Message]) -> Result<AutoCompactResult> {
info!("Performing message compaction");

// Fix the conversation to handle any issues (like trailing assistant messages with tool requests)
let conversation = Conversation::new_unvalidated(messages.to_vec());
let (fixed_conversation, pre_compact_issues) = fix_conversation(conversation);
if !pre_compact_issues.is_empty() {
debug!("Fixed issues before compaction: {:?}", pre_compact_issues);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can't we just call the conversation fixer here to be safe?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will try that; would be nice to not have to add new logic.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it could possibly help with some other corner cases like if there's an empty message in there during streaming that we recently saw. maybe also change the type here to conversation from Vec - fixes some of that too

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah doesn't quite have this new logic so keeping both, but threw it in too.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now that you have the conversation fixer I don't think you need this check

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't handle the visible/non-visible messages now but added some logic for that.


let messages_to_process = fixed_conversation.messages().clone();

// Check if the most recent message is a user message
let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last()
{
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before compaction
(&messages[..messages.len() - 1], Some(last_message.clone()))
let (messages_to_compact, preserved_user_message) =
if let Some(last_message) = messages_to_process.last() {
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before compaction
(
&messages_to_process[..messages_to_process.len() - 1],
Some(last_message.clone()),
)
} else {
(messages_to_process.as_slice(), None)
}
} else {
(messages, None)
}
} else {
(messages, None)
};
(messages_to_process.as_slice(), None)
};

// Perform the compaction on messages excluding the preserved user message
let (mut compacted_messages, _, summarization_usage) =
let (mut compacted_messages, _token_counts, summarization_usage) =
agent.summarize_context(messages_to_compact).await?;

// Add back the preserved user message if it exists
Expand Down Expand Up @@ -207,35 +219,7 @@ pub async fn check_and_compact_messages(
check_result.usage_ratio * 100.0
);

// Check if the most recent message is a user message
let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last()
{
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before auto-compaction
(&messages[..messages.len() - 1], Some(last_message.clone()))
} else {
(messages, None)
}
} else {
(messages, None)
};

// Perform the compaction on messages excluding the preserved user message
// The summarize_context method already handles the visibility properly
let (mut summary_messages, _, summarization_usage) =
agent.summarize_context(messages_to_compact).await?;

// Add back the preserved user message if it exists
// (keeps default visibility: both true)
if let Some(user_message) = preserved_user_message {
summary_messages.push(user_message);
}

Ok(AutoCompactResult {
compacted: true,
messages: summary_messages,
summarization_usage,
})
perform_compaction(agent, messages).await
}

#[cfg(test)]
Expand Down Expand Up @@ -470,72 +454,10 @@ mod tests {
}

assert!(result.compacted);
assert!(result.summarization_usage.is_some());

// Verify that summarization usage contains token counts
if let Some(usage) = &result.summarization_usage {
assert!(usage.usage.total_tokens.is_some());
let after = usage.usage.total_tokens.unwrap_or(0) as usize;
assert!(
after > 0,
"Token count after compaction should be greater than 0"
);
}

// After visibility implementation, we keep all messages plus summary
// Original messages become user_visible only, summary becomes agent_visible only
assert!(result.messages.len() > messages.len());
}

#[tokio::test]
async fn test_auto_compact_respects_config() {
let mock_provider = Arc::new(MockProvider {
model_config: ModelConfig::new("test-model")
.unwrap()
.with_context_limit(Some(30_000)), // Smaller context limit to make threshold easier to hit
});

let agent = Agent::new();
let _ = agent.update_provider(mock_provider).await;

// Create enough messages to trigger compaction with low threshold
let mut messages = Vec::new();
// With 30k context limit, after overhead we have ~27k usable tokens
// 10% of 27k = 2.7k tokens, so we need messages that exceed that
for i in 0..200 {
messages.push(create_test_message(&format!(
"Message {} with enough content to ensure we exceed 10% of the context limit. \
Adding more content to increase token count substantially. This message contains \
multiple sentences to increase the token count. We need to ensure that our total \
token usage exceeds 10% of the available context limit after accounting for \
system prompt and tools overhead.",
i
)));
}
// Note: summarization_usage might be None if messages were fixed/removed before compaction

// Set config value
let config = Config::global();
config
.set_param("GOOSE_AUTO_COMPACT_THRESHOLD", serde_json::Value::from(0.1))
.unwrap();

// Should use config value when no override provided
let result = check_and_compact_messages(&agent, &messages, None, None)
.await
.unwrap();

// Debug info if not compacted
if !result.compacted {
eprintln!("Test failed - compaction not triggered");
}

// With such a low threshold (10%), it should compact
assert!(result.compacted);

// Clean up config
config
.set_param("GOOSE_AUTO_COMPACT_THRESHOLD", serde_json::Value::from(0.3))
.unwrap();
// Verify that we have messages after compaction
assert!(!result.messages.is_empty());
}

#[tokio::test]
Expand Down Expand Up @@ -664,14 +586,59 @@ mod tests {

// Should have triggered compaction
assert!(result.compacted);
assert!(result.summarization_usage.is_some());
// Note: summarization_usage might be None if messages were fixed/removed before compaction

// Verify the compacted messages are returned
assert!(!result.messages.is_empty());
}

#[tokio::test]
async fn test_auto_compact_removes_pending_tool_request() {
use mcp_core::tool::ToolCall;

let mock_provider = Arc::new(MockProvider {
model_config: ModelConfig::new("test-model")
.unwrap()
.with_context_limit(10_000.into()),
});

let agent = Agent::new();
let _ = agent.update_provider(mock_provider).await;

// Create messages including an assistant message with a tool request
let mut messages = vec![
create_test_message("First message"),
create_test_message("Second message"),
];

// Add an assistant message with a tool request
let tool_call = ToolCall::new("test_tool", serde_json::json!({}));

let assistant_msg = Message::assistant()
.with_tool_request("test_tool_id".to_string(), Ok(tool_call))
.with_text("I'll help you with that");
messages.push(assistant_msg);

// Create session metadata with high token count to trigger compaction
let mut session_metadata = crate::session::storage::SessionMetadata::default();
session_metadata.total_tokens = Some(9000); // High enough to trigger compaction

// Perform compaction
let result = perform_compaction(&agent, &messages).await.unwrap();

// After visibility implementation, we keep all messages plus summary
// Original messages become user_visible only, summary becomes agent_visible only
assert!(result.messages.len() > messages.len());
// The compaction should have removed the last assistant message with tool request
assert!(result.compacted);

// Check that the last assistant message with tool request is not in the compacted messages
let has_tool_request = result
.messages
.messages()
.iter()
.any(|m| matches!(m.role, rmcp::model::Role::Assistant) && m.is_tool_call());
assert!(
!has_tool_request,
"Compacted messages should not contain assistant message with tool request"
);
}

#[tokio::test]
Expand Down
96 changes: 91 additions & 5 deletions crates/goose/src/conversation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,31 @@ impl Default for Conversation {
/// Fix a conversation that we're about to send to an LLM. So the last and first
/// messages should always be from the user.
pub fn fix_conversation(conversation: Conversation) -> (Conversation, Vec<String>) {
let messages = conversation.messages().clone();
let (messages, issues) = fix_messages(messages);
(Conversation::new_unvalidated(messages), issues)
let all_messages = conversation.messages().clone();

// Separate agent-visible and non-visible messages
let mut agent_visible_messages = Vec::new();
let mut non_visible_messages = Vec::new();

for msg in all_messages {
if msg.metadata.agent_visible {
agent_visible_messages.push(msg);
} else {
non_visible_messages.push(msg);
}
}

// Only fix agent-visible messages
let (fixed_visible, issues) = fix_messages(agent_visible_messages);

// Combine back: non-visible messages stay unchanged, visible messages are fixed
let mut final_messages = non_visible_messages;
final_messages.extend(fixed_visible);

// Sort by timestamp to maintain order
final_messages.sort_by_key(|m| m.created);

(Conversation::new_unvalidated(final_messages), issues)
}

fn fix_messages(messages: Vec<Message>) -> (Vec<Message>, Vec<String>) {
Expand Down Expand Up @@ -326,7 +348,8 @@ fn fix_lead_trail(mut messages: Vec<Message>) -> (Vec<Message>, Vec<String>) {
if let Some(last) = messages.last() {
if last.role == Role::Assistant {
messages.pop();
issues.push("Removed trailing assistant message".to_string());
issues
.push("Removed trailing assistant message with pending tool requests".to_string());
}
}

Expand Down Expand Up @@ -389,7 +412,7 @@ mod tests {
let (fixed, issues) = fix_conversation(Conversation::new_unvalidated(messages.clone()));

// Uncomment the following line to print the debug report
// let report = debug_conversation_fix(&messages, &fixed, &issues);
// let report = debug_conversation_fix(&messages, &fixed.messages(), &issues);
// print!("\n{}", report);

let (_fixed, issues_with_fixed) = fix_conversation(fixed.clone());
Expand Down Expand Up @@ -550,4 +573,67 @@ mod tests {
let (_fixed, issues) = run_verify(messages);
assert_eq!(issues.len(), 0);
}

#[test]
fn test_last_assistant_message_with_pending_tool_request() {
// Test the scenario where the last assistant message has a tool request
// This should be removed to prevent orphaned tool responses after compaction
let messages = vec![
Message::user().with_text("Help me with something"),
Message::assistant()
.with_text("I'll help you with that")
.with_tool_request("tool_1", Ok(ToolCall::new("some_tool", json!({})))),
];

let (fixed, issues) = run_verify(messages);

// The entire last assistant message should be removed
assert_eq!(fixed.len(), 1);
assert_eq!(issues.len(), 2);

// Should have removed the orphaned tool request and the trailing assistant message
assert!(issues
.iter()
.any(|i| i.contains("Removed orphaned tool request 'tool_1'")));
assert!(issues
.iter()
.any(|i| i.contains("Removed trailing assistant message")));

// Only the user message should remain
assert_eq!(fixed[0].role, Role::User);
assert!(fixed[0].as_concat_text().contains("Help me with something"));
}

#[test]
fn test_last_assistant_message_with_multiple_pending_tool_requests() {
// Test with multiple tool requests in the last assistant message
let messages = vec![
Message::user().with_text("Do multiple things"),
Message::assistant()
.with_text("I'll do multiple things")
.with_tool_request("tool_1", Ok(ToolCall::new("tool_a", json!({}))))
.with_tool_request("tool_2", Ok(ToolCall::new("tool_b", json!({})))),
];

let (fixed, issues) = run_verify(messages);

// The entire last assistant message should be removed
assert_eq!(fixed.len(), 1);
assert_eq!(issues.len(), 3);

// Should have removed both orphaned tool requests and the trailing assistant message
assert!(issues
.iter()
.any(|i| i.contains("Removed orphaned tool request 'tool_1'")));
assert!(issues
.iter()
.any(|i| i.contains("Removed orphaned tool request 'tool_2'")));
assert!(issues
.iter()
.any(|i| i.contains("Removed trailing assistant message")));

// Only the user message should remain
assert_eq!(fixed[0].role, Role::User);
assert!(fixed[0].as_concat_text().contains("Do multiple things"));
}
}