-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Break compaction back into check_ and do_ compaction #5212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
15224fb
a795429
814de07
af08051
69b66f2
60aca5e
d0098b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -46,101 +46,62 @@ struct SummarizeContext { | |
| messages: String, | ||
| } | ||
|
|
||
| /// Check if messages need compaction and compact them if necessary | ||
| /// Compact messages by summarizing them | ||
| /// | ||
| /// This function combines checking and compaction. It first checks if compaction | ||
| /// is needed based on the threshold, and if so, performs the compaction by | ||
| /// summarizing messages and updating their visibility metadata. | ||
| /// This function performs the actual compaction by summarizing messages and updating | ||
| /// their visibility metadata. It does not check thresholds - use `check_if_compaction_needed` | ||
| /// first to determine if compaction is necessary. | ||
| /// | ||
| /// # Arguments | ||
| /// * `agent` - The agent to use for context management | ||
| /// * `messages` - The current message history | ||
| /// * `force_compact` - If true, skip the threshold check and force compaction | ||
| /// * `conversation` - The current conversation history | ||
| /// * `preserve_last_user_message` - If true and last message is not a user message, copy the most recent user message to the end | ||
| /// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config) | ||
| /// * `session_metadata` - Optional session metadata containing actual token counts | ||
| /// | ||
| /// # Returns | ||
| /// * A tuple containing: | ||
| /// - `bool`: Whether compaction was performed | ||
| /// - `Conversation`: The potentially compacted messages | ||
| /// - `Vec<usize>`: Indices of removed messages (empty if no compaction) | ||
| /// - `Option<ProviderUsage>`: Provider usage from summarization (if compaction occurred) | ||
| pub async fn check_and_compact_messages( | ||
| /// - `Conversation`: The compacted messages | ||
| /// - `Vec<usize>`: Token counts for each message | ||
| /// - `Option<ProviderUsage>`: Provider usage from summarization | ||
| pub async fn compact_messages( | ||
| agent: &Agent, | ||
| messages_with_user_message: &[Message], | ||
| force_compact: bool, | ||
| conversation: &Conversation, | ||
| preserve_last_user_message: bool, | ||
| threshold_override: Option<f64>, | ||
| session_metadata: Option<&crate::session::Session>, | ||
| ) -> std::result::Result<(bool, Conversation, Vec<usize>, Option<ProviderUsage>), anyhow::Error> { | ||
| if !force_compact { | ||
| let check_result = check_compaction_needed( | ||
| agent, | ||
| messages_with_user_message, | ||
| threshold_override, | ||
| session_metadata, | ||
| ) | ||
| .await?; | ||
|
|
||
| // If no compaction is needed, return early | ||
| if !check_result.needs_compaction { | ||
| debug!( | ||
| "No compaction needed (usage: {:.1}% <= {:.1}% threshold)", | ||
| check_result.usage_ratio * 100.0, | ||
| check_result.percentage_until_compaction | ||
| ); | ||
| return Ok(( | ||
| false, | ||
| Conversation::new_unvalidated(messages_with_user_message.to_vec()), | ||
| Vec::new(), | ||
| None, | ||
| )); | ||
| } | ||
| ) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> { | ||
| info!("Performing message compaction"); | ||
|
|
||
| info!( | ||
| "Performing message compaction (usage: {:.1}%)", | ||
| check_result.usage_ratio * 100.0 | ||
| ); | ||
| } else { | ||
| info!("Forcing message compaction due to context limit exceeded"); | ||
| } | ||
| let messages = conversation.messages(); | ||
|
|
||
| // Perform the actual compaction | ||
| // Check if the most recent message is a user message | ||
| let (messages, preserved_user_message) = | ||
| if let Some(last_message) = messages_with_user_message.last() { | ||
| if matches!(last_message.role, rmcp::model::Role::User) { | ||
| // Remove the last user message before compaction | ||
| ( | ||
| &messages_with_user_message[..messages_with_user_message.len() - 1], | ||
| Some(last_message.clone()), | ||
| ) | ||
| } else if preserve_last_user_message { | ||
| // Last message is not a user message, but we want to preserve the most recent user message | ||
| // Find the most recent user message and copy it (don't remove from history) | ||
| let most_recent_user_message = messages_with_user_message | ||
| .iter() | ||
| .rev() | ||
| .find(|msg| matches!(msg.role, rmcp::model::Role::User)) | ||
| .cloned(); | ||
| (messages_with_user_message, most_recent_user_message) | ||
| } else { | ||
| (messages_with_user_message, None) | ||
| } | ||
| let (messages_to_compact, preserved_user_message) = if let Some(last_message) = messages.last() | ||
| { | ||
| if matches!(last_message.role, rmcp::model::Role::User) { | ||
| // Remove the last user message before compaction | ||
| (&messages[..messages.len() - 1], Some(last_message.clone())) | ||
| } else if preserve_last_user_message { | ||
| // Last message is not a user message, but we want to preserve the most recent user message | ||
| // Find the most recent user message and copy it (don't remove from history) | ||
| let most_recent_user_message = messages | ||
| .iter() | ||
| .rev() | ||
| .find(|msg| matches!(msg.role, rmcp::model::Role::User)) | ||
| .cloned(); | ||
| (messages.as_slice(), most_recent_user_message) | ||
| } else { | ||
| (messages_with_user_message, None) | ||
| }; | ||
| (messages.as_slice(), None) | ||
| } | ||
| } else { | ||
| (messages.as_slice(), None) | ||
| }; | ||
|
|
||
| let provider = agent.provider().await?; | ||
| let summary = do_compact(provider.clone(), messages).await?; | ||
| let summary = do_compact(provider.clone(), messages_to_compact).await?; | ||
|
|
||
| let (summary_message, summarization_usage) = match summary { | ||
| Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)), | ||
| None => { | ||
| // No summary was generated (empty input) | ||
| tracing::warn!("Summarization failed. Returning empty messages."); | ||
| return Ok((false, Conversation::empty(), vec![], None)); | ||
| return Ok((Conversation::empty(), vec![], None)); | ||
| } | ||
| }; | ||
|
|
||
|
|
@@ -153,7 +114,7 @@ pub async fn check_and_compact_messages( | |
| let mut final_token_counts = Vec::new(); | ||
|
|
||
| // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false) | ||
| for msg in messages.iter().cloned() { | ||
| for msg in messages_to_compact.iter().cloned() { | ||
| let updated_metadata = msg.metadata.with_agent_invisible(); | ||
| let updated_msg = msg.with_metadata(updated_metadata); | ||
| final_messages.push(updated_msg); | ||
|
|
@@ -198,34 +159,33 @@ Just continue the conversation naturally based on the summarized context" | |
| } | ||
|
|
||
| Ok(( | ||
| true, | ||
| Conversation::new_unvalidated(final_messages), | ||
| final_token_counts, | ||
| summarization_usage, | ||
| )) | ||
| } | ||
|
|
||
| /// Check if messages need compaction without performing the compaction | ||
| /// Check if messages exceed the auto-compaction threshold | ||
| /// | ||
| /// This function analyzes the current token usage and returns detailed information | ||
| /// about whether compaction is needed and how close we are to the threshold. | ||
| /// It prioritizes actual token counts from session metadata when available, | ||
| /// falling back to estimated counts if needed. | ||
| /// This function analyzes the current token usage and returns whether compaction | ||
| /// is needed based on the configured threshold. It prioritizes actual token counts | ||
| /// from session metadata when available, falling back to estimated counts if needed. | ||
| /// | ||
| /// # Arguments | ||
| /// * `agent` - The agent to use for context management | ||
| /// * `messages` - The current message history | ||
| /// * `conversation` - The current conversation history | ||
| /// * `threshold_override` - Optional threshold override (defaults to GOOSE_AUTO_COMPACT_THRESHOLD config) | ||
| /// * `session_metadata` - Optional session metadata containing actual token counts | ||
| /// | ||
| /// # Returns | ||
| /// * `CompactionCheckResult` containing detailed information about compaction needs | ||
| async fn check_compaction_needed( | ||
| /// * `CompactionCheckResult` containing whether compaction is needed and usage details | ||
| pub async fn check_if_compaction_needed( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can't we just return a boolean here? also delete the intro comments
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah good call, I don't think we're actually doing anything interesting with CompactionCheckResult. Mostly just want to display the GOOSE_AUTO_COMPACT_THRESHOLD that we exceeded. |
||
| agent: &Agent, | ||
| messages: &[Message], | ||
| conversation: &Conversation, | ||
| threshold_override: Option<f64>, | ||
| session_metadata: Option<&crate::session::Session>, | ||
| ) -> Result<CompactionCheckResult> { | ||
| let messages = conversation.messages(); | ||
| let config = Config::global(); | ||
| // TODO(Douwe): check the default here; it seems to reset to 0.3 sometimes | ||
| let threshold = threshold_override.unwrap_or_else(|| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this whole block strikes me as weird. there's two cases here, either the last message is a user message (i.e. the user said something and we decided to auto compact or the user said something, our autocompacting didn't trigger but we ran out tokens never the less, i.e. user has compact limit at 99% and posts something long) or it is not (i.e. we ran out of context while processing a tool call reply)
we set preserve_last_user_message to true if this is triggered from the tool calling loop, but in all other cases, the last message should be a user message. so I don't think we need this flag at all.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first case where the last message is a user one, is from auto-compact.
The second exists for the difference of manual summarize vs tool calling case you described.
In manual compaction we don't actually want to re-trigger the agent loop (and the last message is probably an agent message from the last response), but if we hit a tool call ContextLimitExceeded, I want to find and re-process the most recent user message and continue the agent loop automatically.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes I understand. I'm just saying there's no difference.
that is one way, but the other is that we ran out of context with a user mesage. so in either case you are looking for the last user message.
The other thing we're missing here I think is that tool responses are also user mesages, so you'll typically return that for the tool loop case.