-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Manual compaction counting fix + cli cleanup #5480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
c0e2825
214ac1a
1488502
29912ad
6c593dc
b323e98
a0f1265
23ca07c
5649ab8
094a4c3
bf6ea3d
1deff33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -255,6 +255,7 @@ impl Agent { | |||||
| pub(crate) async fn update_session_metrics( | ||||||
| session_config: &crate::agents::types::SessionConfig, | ||||||
| usage: &ProviderUsage, | ||||||
| is_compaction_usage: bool, | ||||||
| ) -> Result<()> { | ||||||
| let session_id = session_config.id.as_str(); | ||||||
| let session = SessionManager::get_session(session_id, false).await?; | ||||||
|
|
@@ -273,11 +274,23 @@ impl Agent { | |||||
| let accumulated_output = | ||||||
| accumulate(session.accumulated_output_tokens, usage.usage.output_tokens); | ||||||
|
|
||||||
| let (current_total, current_input, current_output) = if is_compaction_usage { | ||||||
| // After compaction: summary output becomes new input context | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. after manual compaction, should this not really just go to zero? have we sent the manually compacted conversation to the model yet or only to the client? although I could see how that would be confusing to a user it would be good to avoid a hard coded system prompt estimate, just because that could really be all over the place I suppose we could do something like use the token counts from the first message you send, but that would be complicated to do
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I think this one is more of a question of what we want to show the user. Let's try dropping it to the summary size, there will be a bit of whiplash for the user when the system prompt re-enters on the next message, but I don't love that constant either. |
||||||
| let new_input = usage.usage.output_tokens.map(|out| out); | ||||||
|
||||||
| let new_input = usage.usage.output_tokens.map(|out| out); | |
| let new_input = usage.usage.output_tokens; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,13 +31,12 @@ struct SummarizeContext { | |
| /// # Returns | ||
| /// * A tuple containing: | ||
| /// - `Conversation`: The compacted messages | ||
| /// - `Vec<usize>`: Token counts for each message | ||
| /// - `Option<ProviderUsage>`: Provider usage from summarization | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice, I saw we didn't use this. There's some code on the frontend you could clean up with this too
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ack will take another pass on the frontend too. |
||
| /// - `ProviderUsage`: Provider usage from summarization | ||
| pub async fn compact_messages( | ||
| agent: &Agent, | ||
| conversation: &Conversation, | ||
| preserve_last_user_message: bool, | ||
| ) -> Result<(Conversation, Vec<usize>, Option<ProviderUsage>)> { | ||
| ) -> Result<(Conversation, ProviderUsage)> { | ||
| info!("Performing message compaction"); | ||
|
|
||
| let messages = conversation.messages(); | ||
|
|
@@ -99,44 +98,26 @@ pub async fn compact_messages( | |
| }; | ||
|
|
||
| let provider = agent.provider().await?; | ||
| let summary = do_compact(provider.clone(), messages_to_compact).await?; | ||
|
|
||
| let (summary_message, summarization_usage) = match summary { | ||
| Some((summary_message, provider_usage)) => (summary_message, Some(provider_usage)), | ||
| None => { | ||
| // No summary was generated (empty input) | ||
| tracing::warn!("Summarization failed. Returning empty messages."); | ||
| return Ok((Conversation::empty(), vec![], None)); | ||
| } | ||
| }; | ||
| let (summary_message, summarization_usage) = | ||
| do_compact(provider.clone(), messages_to_compact).await?; | ||
|
|
||
| // Create the final message list with updated visibility metadata: | ||
| // 1. Original messages become user_visible but not agent_visible | ||
| // 2. Summary message becomes agent_visible but not user_visible | ||
| // 3. Assistant messages to continue the conversation remain both user_visible and agent_visible | ||
|
|
||
| let mut final_messages = Vec::new(); | ||
| let mut final_token_counts = Vec::new(); | ||
|
|
||
| // Add all original messages with updated visibility (preserve user_visible, set agent_visible=false) | ||
| for msg in messages_to_compact.iter().cloned() { | ||
| let updated_metadata = msg.metadata.with_agent_invisible(); | ||
| let updated_msg = msg.with_metadata(updated_metadata); | ||
| final_messages.push(updated_msg); | ||
| // Token count doesn't matter for agent_visible=false messages, but we'll use 0 | ||
| final_token_counts.push(0); | ||
| } | ||
|
|
||
| // Add the summary message (agent_visible=true, user_visible=false) | ||
| let summary_msg = summary_message.with_metadata(MessageMetadata::agent_only()); | ||
| // For token counting purposes, we use the output tokens (the actual summary content) | ||
| // since that's what will be in the context going forward | ||
| let summary_tokens = summarization_usage | ||
| .as_ref() | ||
| .and_then(|usage| usage.usage.output_tokens) | ||
| .unwrap_or(0) as usize; | ||
| final_messages.push(summary_msg); | ||
| final_token_counts.push(summary_tokens); | ||
|
|
||
| // Add an assistant message to continue the conversation (agent_visible=true, user_visible=false) | ||
| let assistant_message = Message::assistant() | ||
|
|
@@ -146,9 +127,7 @@ Do not mention that you read a summary or that conversation summarization occurr | |
| Just continue the conversation naturally based on the summarized context" | ||
| ) | ||
| .with_metadata(MessageMetadata::agent_only()); | ||
| let assistant_message_tokens: usize = 0; // Not counted since it's for agent context only | ||
| final_messages.push(assistant_message); | ||
| final_token_counts.push(assistant_message_tokens); | ||
|
|
||
| // Add back the preserved user message if it exists | ||
| if let Some(user_text) = preserved_user_text { | ||
|
|
@@ -157,7 +136,6 @@ Just continue the conversation naturally based on the summarized context" | |
|
|
||
| Ok(( | ||
| Conversation::new_unvalidated(final_messages), | ||
| final_token_counts, | ||
| summarization_usage, | ||
| )) | ||
| } | ||
|
|
@@ -222,7 +200,7 @@ pub async fn check_if_compaction_needed( | |
| async fn do_compact( | ||
| provider: Arc<dyn Provider>, | ||
| messages: &[Message], | ||
| ) -> Result<Option<(Message, ProviderUsage)>, anyhow::Error> { | ||
| ) -> Result<(Message, ProviderUsage), anyhow::Error> { | ||
| let agent_visible_messages: Vec<&Message> = messages | ||
| .iter() | ||
| .filter(|msg| msg.is_agent_visible()) | ||
|
|
@@ -255,7 +233,7 @@ async fn do_compact( | |
| .await | ||
| .map_err(|e| anyhow::anyhow!("Failed to ensure usage tokens: {}", e))?; | ||
|
|
||
| Ok(Some((response, provider_usage))) | ||
| Ok((response, provider_usage)) | ||
| } | ||
|
|
||
| fn format_message_for_compacting(msg: &Message) -> String { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The deprecation message string is very long (over 100 characters). Consider breaking it into multiple lines or storing it as a constant for better readability and maintainability.