Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions crates/goose-server/src/openapi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ use rmcp::model::{
use utoipa::{OpenApi, ToSchema};

use goose::conversation::message::{
ContextLengthExceeded, FrontendToolRequest, Message, MessageContent, RedactedThinkingContent,
SummarizationRequested, ThinkingContent, ToolConfirmationRequest, ToolRequest, ToolResponse,
ContextLengthExceeded, FrontendToolRequest, Message, MessageContent, MessageMetadata,
RedactedThinkingContent, SummarizationRequested, ThinkingContent, ToolConfirmationRequest,
ToolRequest, ToolResponse,
};
use utoipa::openapi::schema::{
AdditionalProperties, AnyOfBuilder, ArrayBuilder, ObjectBuilder, OneOfBuilder, Schema,
Expand Down Expand Up @@ -421,6 +422,7 @@ impl<'__s> ToSchema<'__s> for AnnotatedSchema {
super::routes::session::SessionHistoryResponse,
Message,
MessageContent,
MessageMetadata,
ContentSchema,
EmbeddedResourceSchema,
ImageContentSchema,
Expand Down
7 changes: 6 additions & 1 deletion crates/goose-server/src/routes/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ async fn manage_context(
}

Ok(Json(ContextManageResponse {
messages: processed_messages.messages().clone(),
messages: processed_messages
.messages()
.iter()
.filter(|m| m.is_user_visible())
.cloned()
.collect(),
token_counts,
}))
}
Expand Down
6 changes: 5 additions & 1 deletion crates/goose-server/src/routes/reply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,11 @@ async fn reply_handler(
}

all_messages.push(message.clone());
stream_event(MessageEvent::Message { message }, &tx, &cancel_token).await;

// Only send message to client if it's user_visible
if message.is_user_visible() {
stream_event(MessageEvent::Message { message }, &tx, &cancel_token).await;
}
}
Ok(Some(Ok(AgentEvent::HistoryReplaced(new_messages)))) => {
// Replace the message history with the compacted messages
Expand Down
10 changes: 9 additions & 1 deletion crates/goose-server/src/routes/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,18 @@ async fn get_session_history(
}
};

// Filter messages to only include user_visible ones
let user_visible_messages: Vec<Message> = messages
.messages()
.iter()
.filter(|m| m.is_user_visible())
.cloned()
.collect();

Ok(Json(SessionHistoryResponse {
session_id,
metadata,
messages: messages.messages().clone(),
messages: user_visible_messages,
}))
}

Expand Down
82 changes: 49 additions & 33 deletions crates/goose/src/agents/context.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Ok;

use crate::conversation::message::Message;
use crate::conversation::message::{Message, MessageMetadata};
use crate::conversation::Conversation;
use crate::token_counter::create_async_token_counter;

Expand Down Expand Up @@ -64,48 +64,64 @@ impl Agent {
let provider = self.provider().await?;
let summary_result = summarize_messages(provider.clone(), messages).await?;

let (mut new_messages, mut new_token_counts, summarization_usage) = match summary_result {
Some((summary_message, provider_usage)) => {
// For token counting purposes, we use the output tokens (the actual summary content)
// since that's what will be in the context going forward
let total_tokens = provider_usage.usage.output_tokens.unwrap_or(0) as usize;
(
vec![summary_message],
vec![total_tokens],
Some(provider_usage),
)
}
let (summary_message, summarization_usage) = match summary_result {
Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)),
None => {
// No summary was generated (empty input)
tracing::warn!("Summarization failed. Returning empty messages.");
return Ok((Conversation::empty(), vec![], None));
}
};

// Add an assistant message to the summarized messages to ensure the assistant's response is included in the context.
if new_messages.len() == 1 {
let compaction_marker = Message::assistant()
.with_summarization_requested("Conversation compacted and summarized");
let compaction_marker_tokens: usize = 8;

// Insert the marker before the summary message
new_messages.insert(0, compaction_marker);
new_token_counts.insert(0, compaction_marker_tokens);

// Add an assistant message to continue the conversation
let assistant_message = Message::assistant().with_text("
The previous message contains a summary that was prepared because a context limit was reached.
Do not mention that you read a summary or that conversation summarization occurred
Just continue the conversation naturally based on the summarized context
");
let assistant_message_tokens: usize = 41;
new_messages.push(assistant_message);
new_token_counts.push(assistant_message_tokens);
// Create the final message list with updated visibility metadata:
// 1. Original messages become user_visible but not agent_visible
// 2. Summary message becomes agent_visible but not user_visible
// 3. Assistant messages to continue the conversation remain both user_visible and agent_visible

let mut final_messages = Vec::new();
let mut final_token_counts = Vec::new();

// Add all original messages with updated visibility (preserve user_visible, set agent_visible=false)
for msg in messages.iter().cloned() {
let updated_metadata = msg.metadata.with_agent_invisible();
let updated_msg = msg.with_metadata(updated_metadata);
final_messages.push(updated_msg);
// Token count doesn't matter for agent_visible=false messages, but we'll use 0
final_token_counts.push(0);
}

// Add the compaction marker (user_visible=true, agent_visible=false)
let compaction_marker = Message::assistant()
.with_summarization_requested("Conversation compacted and summarized")
.with_metadata(MessageMetadata::user_only());
let compaction_marker_tokens: usize = 0; // Not counted since agent_visible=false
final_messages.push(compaction_marker);
final_token_counts.push(compaction_marker_tokens);

// Add the summary message (agent_visible=true, user_visible=false)
let summary_msg = summary_message.with_metadata(MessageMetadata::agent_only());
// For token counting purposes, we use the output tokens (the actual summary content)
// since that's what will be in the context going forward
let summary_tokens = summarization_usage
.as_ref()
.and_then(|usage| usage.usage.output_tokens)
.unwrap_or(0) as usize;
final_messages.push(summary_msg);
final_token_counts.push(summary_tokens);

// Add an assistant message to continue the conversation (agent_visible=true, user_visible=false)
let assistant_message = Message::assistant().with_text("
The previous message contains a summary that was prepared because a context limit was reached.
Do not mention that you read a summary or that conversation summarization occurred
Just continue the conversation naturally based on the summarized context
").with_metadata(MessageMetadata::agent_only());
let assistant_message_tokens: usize = 0; // Not counted since it's for agent context only
final_messages.push(assistant_message);
final_token_counts.push(assistant_message_tokens);

Ok((
Conversation::new_unvalidated(new_messages),
new_token_counts,
Conversation::new_unvalidated(final_messages),
final_token_counts,
summarization_usage,
))
}
Expand Down
13 changes: 7 additions & 6 deletions crates/goose/src/agents/reply_parts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,13 @@ impl Agent {
}
}

let filtered_message = Message {
id: response.id.clone(),
role: response.role.clone(),
created: response.created,
content: filtered_content,
};
let mut filtered_message =
Message::new(response.role.clone(), response.created, filtered_content);

// Preserve the ID if it exists
if let Some(id) = response.id.clone() {
filtered_message = filtered_message.with_id(id);
}

// Categorize tool requests
let mut frontend_requests = Vec::new();
Expand Down
41 changes: 35 additions & 6 deletions crates/goose/src/context_mgmt/auto_compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,35 @@ pub async fn check_and_compact_messages(
check_result.usage_ratio * 100.0
);

// Use perform_compaction to do the actual work
perform_compaction(agent, messages).await
// Check if the most recent message is a user message
let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last()
{
if matches!(last_message.role, rmcp::model::Role::User) {
// Remove the last user message before auto-compaction
(&messages[..messages.len() - 1], Some(last_message.clone()))
} else {
(messages, None)
}
} else {
(messages, None)
};

// Perform the compaction on messages excluding the preserved user message
// The summarize_context method already handles the visibility properly
let (mut summary_messages, _, summarization_usage) =
agent.summarize_context(messages_to_compact).await?;

// Add back the preserved user message if it exists
// (keeps default visibility: both true)
if let Some(user_message) = preserved_user_message {
summary_messages.push(user_message);
}

Ok(AutoCompactResult {
compacted: true,
messages: summary_messages,
summarization_usage,
})
}

#[cfg(test)]
Expand Down Expand Up @@ -455,8 +482,9 @@ mod tests {
);
}

// Should have fewer messages (summarized)
assert!(result.messages.len() <= messages.len());
// After visibility implementation, we keep all messages plus summary
// Original messages become user_visible only, summary becomes agent_visible only
assert!(result.messages.len() > messages.len());
}

#[tokio::test]
Expand Down Expand Up @@ -641,8 +669,9 @@ mod tests {
// Verify the compacted messages are returned
assert!(!result.messages.is_empty());

// Should have fewer messages after compaction
assert!(result.messages.len() <= messages.len());
// After visibility implementation, we keep all messages plus summary
// Original messages become user_visible only, summary becomes agent_visible only
assert!(result.messages.len() > messages.len());
}

#[tokio::test]
Expand Down
Loading
Loading