Skip to content
170 changes: 167 additions & 3 deletions crates/goose-mcp/src/developer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,46 @@ impl DeveloperRouter {
self.ignore_patterns.matched(path, false).is_ignore()
}

// shell output can be large, this will help manage that
fn process_shell_output(&self, output_str: &str) -> Result<(String, String), ToolError> {
let lines: Vec<&str> = output_str.lines().collect();
let line_count = lines.len();

let start = lines.len().saturating_sub(100);
let last_100_lines_str = lines[start..].join("\n");

let final_output = if line_count > 100 {
let tmp_file = tempfile::NamedTempFile::new().map_err(|e| {
ToolError::ExecutionError(format!("Failed to create temporary file: {}", e))
})?;

std::fs::write(tmp_file.path(), output_str).map_err(|e| {
ToolError::ExecutionError(format!("Failed to write to temporary file: {}", e))
})?;

let (_, path) = tmp_file.keep().map_err(|e| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's nothing that's garbage collecting these temporary files (apart from the OS level tempfile cleaner right?)

That seems very suboptimal...I did this IMO a better way in #2817

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How long to retain the tempfiles is an interesting topic - in #2817 notice that the tempfile is owned in Rust by the mcp server (so we get cleanup on drop) and the semantics basically are that the next shell command that outputs large data replaces it.

My thought is that in a flow of doing make/cargo/whatever build (gets redirected), then the model can do e.g. grep <tmpfile> error: and as long as that doesn't overflow 100 lines the tempfile is preserved, which should hopefully be a common case. But it might needs careful explaining to the model, I didn't really hammer on it from that angle.

Leaking the tempfiles entirely avoids that problem but...yeah I don't think we want to do that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah the os will clean them up which is preferred (and isn't the end of the world if they are gone in an old session, hopefully we got the useful information). Anything more durable and yeah it should be in the session structure itself (but not sent to the LLM)

ToolError::ExecutionError(format!("Failed to persist temporary file: {}", e))
})?;

format!(
"private note: output was {} lines and we are only showing the most recent lines, remainder of lines in {} do not show tmp file to user, that file can be searched if extra context needed to fulfill request. truncated output: \n{}",
line_count,
path.display(),
last_100_lines_str
)
} else {
output_str.to_string()
};

let user_output = if line_count > 100 {
format!("... \n{}", last_100_lines_str)
} else {
output_str.to_string()
};

Ok((final_output, user_output))
}

// Helper method to resolve a path relative to cwd with platform-specific handling
fn resolve_path(&self, path_str: &str) -> Result<PathBuf, ToolError> {
let cwd = std::env::current_dir().expect("should have a current working dir");
Expand Down Expand Up @@ -765,9 +805,11 @@ impl DeveloperRouter {
)));
}

let (final_output, user_output) = self.process_shell_output(&output_str)?;

Ok(vec![
Content::text(output_str.clone()).with_audience(vec![Role::Assistant]),
Content::text(output_str)
Content::text(final_output).with_audience(vec![Role::Assistant]),
Content::text(user_output)
.with_audience(vec![Role::User])
.with_priority(0.0),
])
Expand Down Expand Up @@ -1673,9 +1715,10 @@ impl Clone for DeveloperRouter {
#[cfg(test)]
mod tests {
use super::*;
use core::panic;
use serde_json::json;
use serial_test::serial;
use std::fs;
use std::fs::{self, read_to_string};
use tempfile::TempDir;
use tokio::sync::OnceCell;

Expand Down Expand Up @@ -3189,4 +3232,125 @@ mod tests {

temp_dir.close().unwrap();
}

#[tokio::test]
#[serial]
async fn test_bash_output_truncation() {
let temp_dir = tempfile::tempdir().unwrap();
std::env::set_current_dir(&temp_dir).unwrap();

let router = get_router().await;

// Create a command that generates > 100 lines of output
let command = if cfg!(windows) {
"for /L %i in (1,1,150) do @echo Line %i"
} else {
"for i in {1..150}; do echo \"Line $i\"; done"
};

let result = router
.call_tool("shell", json!({ "command": command }), dummy_sender())
.await
.unwrap();

// Should have two Content items
assert_eq!(result.len(), 2);

// Find the Assistant and User content
let assistant_content = result
.iter()
.find(|c| {
c.audience()
.is_some_and(|roles| roles.contains(&Role::Assistant))
})
.unwrap()
.as_text()
.unwrap();

let user_content = result
.iter()
.find(|c| {
c.audience()
.is_some_and(|roles| roles.contains(&Role::User))
})
.unwrap()
.as_text()
.unwrap();

// Assistant should get the full message with temp file info
assert!(assistant_content.text.contains("private note: output was"));

// User should only get the truncated output with prefix
assert!(user_content.text.starts_with("..."));
assert!(!user_content.text.contains("private note: output was"));

// User output should contain lines 51-150 (last 100 lines)
assert!(user_content.text.contains("Line 51"));
assert!(user_content.text.contains("Line 150"));
assert!(!user_content.text.contains("Line 50"));

let start_tag = "remainder of lines in";
let end_tag = "do not show tmp file to user";

if let (Some(start), Some(end)) = (
assistant_content.text.find(start_tag),
assistant_content.text.find(end_tag),
) {
let start_idx = start + start_tag.len();
if start_idx < end {
let path = assistant_content.text[start_idx..end].trim();
println!("Extracted path: {}", path);

let file_contents =
read_to_string(path).expect("Failed to read extracted temp file");

let lines: Vec<&str> = file_contents.lines().collect();

// Ensure we have exactly 150 lines
assert_eq!(lines.len(), 150, "Expected 150 lines in temp file");

// Ensure the first and last lines are correct
assert_eq!(lines.first(), Some(&"Line 1"), "First line mismatch");
assert_eq!(lines.last(), Some(&"Line 150"), "Last line mismatch");
} else {
panic!("No path found in bash output truncation output");
}
} else {
panic!("Failed to find start or end tag in bash output truncation output");
}

temp_dir.close().unwrap();
}

#[test]
fn test_process_shell_output_short() {
let dir = TempDir::new().unwrap();
std::env::set_current_dir(dir.path()).unwrap();

let router = DeveloperRouter::new();

// Test with short output (< 100 lines)
let short_output = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5";
let result = router.process_shell_output(short_output).unwrap();

// Both outputs should be the same for short outputs
assert_eq!(result.0, short_output);
assert_eq!(result.1, short_output);
}

#[test]
fn test_process_shell_output_empty() {
let dir = TempDir::new().unwrap();
std::env::set_current_dir(dir.path()).unwrap();

let router = DeveloperRouter::new();

// Test with empty output
let empty_output = "";
let result = router.process_shell_output(empty_output).unwrap();

// Both outputs should be empty
assert_eq!(result.0, "");
assert_eq!(result.1, "");
}
}
7 changes: 7 additions & 0 deletions crates/goose/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,11 @@ impl ModelConfig {
#[cfg(test)]
mod tests {
use super::*;
use serial_test::serial;
use temp_env::with_var;

#[test]
#[serial]
fn test_model_config_context_limits() {
let config = ModelConfig::new("claude-3-opus")
.unwrap()
Expand All @@ -263,6 +265,7 @@ mod tests {
}

#[test]
#[serial]
fn test_invalid_context_limit() {
with_var("GOOSE_CONTEXT_LIMIT", Some("abc"), || {
let result = ModelConfig::new("test-model");
Expand All @@ -285,6 +288,7 @@ mod tests {
}

#[test]
#[serial]
fn test_invalid_temperature() {
with_var("GOOSE_TEMPERATURE", Some("hot"), || {
let result = ModelConfig::new("test-model");
Expand All @@ -298,6 +302,7 @@ mod tests {
}

#[test]
#[serial]
fn test_invalid_toolshim() {
with_var("GOOSE_TOOLSHIM", Some("maybe"), || {
let result = ModelConfig::new("test-model");
Expand All @@ -311,6 +316,7 @@ mod tests {
}

#[test]
#[serial]
fn test_empty_toolshim_model() {
with_var("GOOSE_TOOLSHIM_OLLAMA_MODEL", Some(""), || {
let result = ModelConfig::new("test-model");
Expand All @@ -328,6 +334,7 @@ mod tests {
}

#[test]
#[serial]
fn test_valid_configurations() {
// Test with environment variables set
with_var("GOOSE_CONTEXT_LIMIT", Some("50000"), || {
Expand Down
1 change: 0 additions & 1 deletion ui/desktop/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -1862,7 +1862,6 @@
"description": "A message to or from an LLM",
"required": [
"role",
"created",
"content"
],
"properties": {
Expand Down
2 changes: 1 addition & 1 deletion ui/desktop/src/api/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ export type ListSchedulesResponse = {
*/
export type Message = {
content: Array<MessageContent>;
created: number;
created?: number;
id?: string | null;
role: Role;
};
Expand Down
2 changes: 1 addition & 1 deletion ui/desktop/src/components/context_management/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ export function convertApiMessageToFrontendMessage(
sendToLLM: sendToLLM ?? true,
id: generateId(),
role: apiMessage.role as Role,
created: apiMessage.created,
created: apiMessage.created ?? 0,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DOsinga @zanesq not sure about this one - but something had to be here as it seems to now be optional? I don't quite understand the change earlier

content: apiMessage.content
.map((apiContent) => mapApiContentToFrontendMessageContent(apiContent))
.filter((content): content is FrontendMessageContent => content !== null),
Expand Down
Loading