diff --git a/Cargo.lock b/Cargo.lock index 9541e0b7dbf9..e44955dcb11f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3597,6 +3597,7 @@ dependencies = [ "criterion", "ctor", "dotenv", + "goose", "include_dir", "indoc 1.0.9", "lazy_static", diff --git a/crates/goose-cli/src/commands/project.rs b/crates/goose-cli/src/commands/project.rs index 17e63e412baf..e049e66e323e 100644 --- a/crates/goose-cli/src/commands/project.rs +++ b/crates/goose-cli/src/commands/project.rs @@ -4,7 +4,7 @@ use cliclack::{self, intro, outro}; use std::path::Path; use crate::project_tracker::ProjectTracker; -use crate::utils::safe_truncate; +use goose::utils::safe_truncate; /// Format a DateTime for display fn format_date(date: DateTime) -> String { diff --git a/crates/goose-cli/src/commands/session.rs b/crates/goose-cli/src/commands/session.rs index fbb862482ad0..ff8f37e47ca6 100644 --- a/crates/goose-cli/src/commands/session.rs +++ b/crates/goose-cli/src/commands/session.rs @@ -1,9 +1,9 @@ use crate::session::message_to_markdown; -use crate::utils::safe_truncate; use anyhow::{Context, Result}; use cliclack::{confirm, multiselect, select}; use goose::session::info::{get_valid_sorted_sessions, SessionInfo, SortOrder}; use goose::session::{self, Identifier}; +use goose::utils::safe_truncate; use regex::Regex; use std::fs; use std::path::{Path, PathBuf}; diff --git a/crates/goose-cli/src/lib.rs b/crates/goose-cli/src/lib.rs index 055f38b9033a..68f2357f5ee0 100644 --- a/crates/goose-cli/src/lib.rs +++ b/crates/goose-cli/src/lib.rs @@ -7,7 +7,6 @@ pub mod project_tracker; pub mod recipes; pub mod session; pub mod signal; -pub mod utils; // Re-export commonly used types pub use session::Session; diff --git a/crates/goose-cli/src/session/export.rs b/crates/goose-cli/src/session/export.rs index 57b83b1efa22..90d1c9a76054 100644 --- a/crates/goose-cli/src/session/export.rs +++ b/crates/goose-cli/src/session/export.rs @@ -1,4 +1,5 @@ use goose::message::{Message, MessageContent, ToolRequest, ToolResponse}; +use goose::utils::safe_truncate; use mcp_core::content::Content as McpContent; use mcp_core::resource::ResourceContents; use mcp_core::role::Role; @@ -10,9 +11,9 @@ const REDACTED_PREFIX_LENGTH: usize = 100; // Show first 100 chars before trimmi fn value_to_simple_markdown_string(value: &Value, export_full_strings: bool) -> String { match value { Value::String(s) => { - if !export_full_strings && s.len() > MAX_STRING_LENGTH_MD_EXPORT { - let prefix = &s[..REDACTED_PREFIX_LENGTH.min(s.len())]; - let trimmed_chars = s.len() - prefix.len(); + if !export_full_strings && s.chars().count() > MAX_STRING_LENGTH_MD_EXPORT { + let prefix = safe_truncate(s, REDACTED_PREFIX_LENGTH); + let trimmed_chars = s.chars().count() - prefix.chars().count(); format!("`{}[ ... trimmed : {} chars ... ]`", prefix, trimmed_chars) } else { // Escape backticks and newlines for inline code. @@ -40,7 +41,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) -> md_string.push_str(&format!("{}* **{}**: ", base_indent_str, key)); match val { Value::String(s) => { - if s.contains('\n') || s.len() > 80 { + if s.contains('\n') || s.chars().count() > 80 { // Heuristic for block md_string.push_str(&format!( "\n{} ```\n{}{}\n{} ```\n", @@ -74,7 +75,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) -> md_string.push_str(&format!("{}* - ", base_indent_str)); match item { Value::String(s) => { - if s.contains('\n') || s.len() > 80 { + if s.contains('\n') || s.chars().count() > 80 { // Heuristic for block md_string.push_str(&format!( "\n{} ```\n{}{}\n{} ```\n", @@ -397,7 +398,7 @@ mod tests { assert!(result.starts_with("`")); assert!(result.contains("[ ... trimmed : ")); assert!(result.contains("4900 chars ... ]`")); - assert!(result.contains(&"a".repeat(100))); // Should contain the prefix + assert!(result.contains(&"a".repeat(97))); // Should contain the prefix (100 - 3 for "...") } #[test] diff --git a/crates/goose-cli/src/session/mod.rs b/crates/goose-cli/src/session/mod.rs index 825dd17cc0bd..35a8ae0c33b5 100644 --- a/crates/goose-cli/src/session/mod.rs +++ b/crates/goose-cli/src/session/mod.rs @@ -15,6 +15,7 @@ use goose::permission::Permission; use goose::permission::PermissionConfirmation; use goose::providers::base::Provider; pub use goose::session::Identifier; +use goose::utils::safe_truncate; use anyhow::{Context, Result}; use completion::GooseCompleter; @@ -993,11 +994,7 @@ impl Session { if min_priority > 0.1 && !self.debug { // High/Medium verbosity: show truncated response if let Some(response_content) = msg.strip_prefix("Responded: ") { - if response_content.len() > 100 { - format!("🤖 Responded: {}...", &response_content[..100]) - } else { - format!("🤖 {}", msg) - } + format!("🤖 Responded: {}", safe_truncate(response_content, 100)) } else { format!("🤖 {}", msg) } diff --git a/crates/goose-llm/Cargo.toml b/crates/goose-llm/Cargo.toml index 17723e31aac4..ef073a37f90b 100644 --- a/crates/goose-llm/Cargo.toml +++ b/crates/goose-llm/Cargo.toml @@ -15,6 +15,7 @@ crate-type = ["lib", "cdylib"] name = "goose_llm" [dependencies] +goose = { path = "../goose" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" anyhow = "1.0" diff --git a/crates/goose-llm/src/extractors/session_name.rs b/crates/goose-llm/src/extractors/session_name.rs index a01b18ab2eeb..ab0cf97df8a1 100644 --- a/crates/goose-llm/src/extractors/session_name.rs +++ b/crates/goose-llm/src/extractors/session_name.rs @@ -3,6 +3,7 @@ use crate::providers::errors::ProviderError; use crate::types::core::Role; use crate::{message::Message, types::json_value_ffi::JsonValueFfi}; use anyhow::Result; +use goose::utils::safe_truncate; use indoc::indoc; use serde_json::{json, Value}; @@ -60,11 +61,7 @@ pub async fn generate_session_name( .take(3) .map(|m| { let text = m.content.concat_text_str(); - if text.len() > 300 { - text.chars().take(300).collect() - } else { - text - } + safe_truncate(&text, 300) }) .collect(); diff --git a/crates/goose/src/agents/large_response_handler.rs b/crates/goose/src/agents/large_response_handler.rs index e4c0ab105544..0369bfa5cbb5 100644 --- a/crates/goose/src/agents/large_response_handler.rs +++ b/crates/goose/src/agents/large_response_handler.rs @@ -17,14 +17,14 @@ pub fn process_tool_response( match content { Content::Text(text_content) => { // Check if text exceeds threshold - if text_content.text.len() > LARGE_TEXT_THRESHOLD { + if text_content.text.chars().count() > LARGE_TEXT_THRESHOLD { // Write to temp file match write_large_text_to_file(&text_content.text) { Ok(file_path) => { // Create a new text content with reference to the file let message = format!( "The response returned from the tool call was larger ({} characters) and is stored in the file which you can use other tools to examine or search in: {}", - text_content.text.len(), + text_content.text.chars().count(), file_path ); processed_contents.push(Content::text(message)); diff --git a/crates/goose/src/context_mgmt/truncate.rs b/crates/goose/src/context_mgmt/truncate.rs index ba2f6490e0bb..2bc49f924386 100644 --- a/crates/goose/src/context_mgmt/truncate.rs +++ b/crates/goose/src/context_mgmt/truncate.rs @@ -1,4 +1,5 @@ use crate::message::{Message, MessageContent}; +use crate::utils::safe_truncate; use anyhow::{anyhow, Result}; use mcp_core::{Content, ResourceContents, Role}; use std::collections::HashSet; @@ -75,11 +76,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul for content in &mut new_message.content { match content { MessageContent::Text(text_content) => { - if text_content.text.len() > max_content_size { + if text_content.text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... content truncated from {} to {} characters ...]", - &text_content.text[..max_content_size.min(text_content.text.len())], - text_content.text.len(), + safe_truncate(&text_content.text, max_content_size), + text_content.text.chars().count(), max_content_size ); text_content.text = truncated; @@ -89,11 +90,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul if let Ok(ref mut result) = tool_response.tool_result { for content_item in result { if let Content::Text(ref mut text_content) = content_item { - if text_content.text.len() > max_content_size { + if text_content.text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... tool response truncated from {} to {} characters ...]", - &text_content.text[..max_content_size.min(text_content.text.len())], - text_content.text.len(), + safe_truncate(&text_content.text, max_content_size), + text_content.text.chars().count(), max_content_size ); text_content.text = truncated; @@ -104,11 +105,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul if let ResourceContents::TextResourceContents { text, .. } = &mut resource_content.resource { - if text.len() > max_content_size { + if text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... resource content truncated from {} to {} characters ...]", - &text[..max_content_size.min(text.len())], - text.len(), + safe_truncate(text, max_content_size), + text.chars().count(), max_content_size ); *text = truncated; diff --git a/crates/goose/src/lib.rs b/crates/goose/src/lib.rs index 83c4934d76fa..80e8e1abef32 100644 --- a/crates/goose/src/lib.rs +++ b/crates/goose/src/lib.rs @@ -15,6 +15,7 @@ pub mod temporal_scheduler; pub mod token_counter; pub mod tool_monitor; pub mod tracing; +pub mod utils; #[cfg(test)] mod cron_test; diff --git a/crates/goose/src/session/storage.rs b/crates/goose/src/session/storage.rs index b176511f300b..794b6748eac6 100644 --- a/crates/goose/src/session/storage.rs +++ b/crates/goose/src/session/storage.rs @@ -7,6 +7,7 @@ use crate::message::Message; use crate::providers::base::Provider; +use crate::utils::safe_truncate; use anyhow::Result; use chrono::Local; use etcetera::{choose_app_strategy, AppStrategy, AppStrategyArgs}; @@ -605,7 +606,7 @@ pub fn read_messages_with_truncation( // Log details about corrupted lines (with limited detail for security) for (num, line) in &corrupted_lines { let preview = if line.len() > 50 { - format!("{}... (truncated)", &line[..50]) + format!("{}... (truncated)", safe_truncate(line, 50)) } else { line.clone() }; @@ -678,11 +679,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us for content in &mut message.content { match content { MessageContent::Text(text_content) => { - if text_content.text.len() > max_content_size { + if text_content.text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... content truncated during session loading from {} to {} characters ...]", - &text_content.text[..max_content_size.min(text_content.text.len())], - text_content.text.len(), + safe_truncate(&text_content.text, max_content_size), + text_content.text.chars().count(), max_content_size ); text_content.text = truncated; @@ -693,11 +694,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us for content_item in result { match content_item { Content::Text(ref mut text_content) => { - if text_content.text.len() > max_content_size { + if text_content.text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... tool response truncated during session loading from {} to {} characters ...]", - &text_content.text[..max_content_size.min(text_content.text.len())], - text_content.text.len(), + safe_truncate(&text_content.text, max_content_size), + text_content.text.chars().count(), max_content_size ); text_content.text = truncated; @@ -707,11 +708,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us if let ResourceContents::TextResourceContents { text, .. } = &mut resource_content.resource { - if text.len() > max_content_size { + if text.chars().count() > max_content_size { let truncated = format!( "{}\n\n[... resource content truncated during session loading from {} to {} characters ...]", - &text[..max_content_size.min(text.len())], - text.len(), + safe_truncate(text, max_content_size), + text.chars().count(), max_content_size ); *text = truncated; @@ -751,7 +752,7 @@ fn attempt_corruption_recovery(json_str: &str, max_content_size: Option) // Strategy 4: Create a placeholder message with the raw content println!("[SESSION] All recovery strategies failed, creating placeholder message"); let preview = if json_str.len() > 200 { - format!("{}...", &json_str[..200]) + format!("{}...", safe_truncate(json_str, 200)) } else { json_str.to_string() }; @@ -968,7 +969,7 @@ fn truncate_json_string(json_str: &str, max_content_size: usize) -> String { if text_content.len() > max_content_size { let truncated_text = format!( "{}\n\n[... content truncated during JSON parsing from {} to {} characters ...]", - &text_content[..max_content_size.min(text_content.len())], + safe_truncate(text_content, max_content_size), text_content.len(), max_content_size ); @@ -1269,11 +1270,7 @@ pub async fn generate_description_with_schedule_id( .take(3) // Use up to first 3 user messages for context .map(|m| { let text = m.as_concat_text(); - if text.len() > 300 { - format!("{}...", &text[..300]) - } else { - text - } + safe_truncate(&text, 300) }) .collect(); @@ -1302,9 +1299,9 @@ pub async fn generate_description_with_schedule_id( let description = result.0.as_concat_text(); // Validate description length for security - let sanitized_description = if description.len() > 100 { + let sanitized_description = if description.chars().count() > 100 { tracing::warn!("Generated description too long, truncating"); - format!("{}...", &description[..97]) + safe_truncate(&description, 100) } else { description }; @@ -1379,9 +1376,9 @@ mod tests { println!( "[TEST] Input: {}", if corrupt_json.len() > 100 { - &corrupt_json[..100] + safe_truncate(corrupt_json, 100) } else { - corrupt_json + corrupt_json.to_string() } ); diff --git a/crates/goose-cli/src/utils.rs b/crates/goose/src/utils.rs similarity index 95% rename from crates/goose-cli/src/utils.rs rename to crates/goose/src/utils.rs index 69daddf1d2a1..60121f1bfe4d 100644 --- a/crates/goose-cli/src/utils.rs +++ b/crates/goose/src/utils.rs @@ -1,4 +1,3 @@ -/// Utility functions for safe string handling and other common operations /// Safely truncate a string at character boundaries, not byte boundaries /// /// This function ensures that multi-byte UTF-8 characters (like Japanese, emoji, etc.)