From 34ddd5948b85acbcd1c54282a50aee2cb330c9c8 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Fri, 1 Aug 2025 13:18:22 +1000 Subject: [PATCH 1/6] logic and test to not ingest large files --- crates/goose-mcp/src/developer/mod.rs | 230 ++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 8accfe0e8298..6c59a2512712 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1011,6 +1011,16 @@ impl DeveloperRouter { let lines: Vec<&str> = content.lines().collect(); let total_lines = lines.len(); + // Check if file has more than 2000 lines and no view_range is provided + if view_range.is_none() && total_lines > 2000 { + return Err(ToolError::ExecutionError(format!( + "File '{}' is {} lines long, recommended to read in with view_range (or searching) to get bite size content. If you do wish to read all the file, please pass in view_range with [1, {}] to read it all at once", + path.display(), + total_lines, + total_lines + ))); + } + // Handle view_range if provided, otherwise show all lines let (start_idx, end_idx) = if let Some((start_line, end_line)) = view_range { // Convert 1-indexed line numbers to 0-indexed @@ -3233,6 +3243,226 @@ mod tests { temp_dir.close().unwrap(); } + #[tokio::test] + #[serial] + async fn test_text_editor_view_large_file_without_range() { + let router = get_router().await; + + let temp_dir = tempfile::tempdir().unwrap(); + let file_path = temp_dir.path().join("large_file.txt"); + let file_path_str = file_path.to_str().unwrap(); + std::env::set_current_dir(&temp_dir).unwrap(); + + // Create a file with more than 2000 lines + let mut content = String::new(); + for i in 1..=2001 { + content.push_str(&format!("Line {}\n", i)); + } + + router + .call_tool( + "text_editor", + json!({ + "command": "write", + "path": file_path_str, + "file_text": content + }), + dummy_sender(), + ) + .await + .unwrap(); + + // Test viewing without view_range - should trigger the error + let result = router + .call_tool( + "text_editor", + json!({ + "command": "view", + "path": file_path_str + }), + dummy_sender(), + ) + .await; + + assert!(result.is_err()); + let err = result.err().unwrap(); + assert!(matches!(err, ToolError::ExecutionError(_))); + assert!(err.to_string().contains("2001 lines long")); + assert!(err + .to_string() + .contains("recommended to read in with view_range")); + assert!(err + .to_string() + .contains("please pass in view_range with [1, 2001]")); + + // Test viewing with view_range - should work + let result = router + .call_tool( + "text_editor", + json!({ + "command": "view", + "path": file_path_str, + "view_range": [1, 100] + }), + dummy_sender(), + ) + .await; + + assert!(result.is_ok()); + let view_result = result.unwrap(); + let text = view_result + .iter() + .find(|c| { + c.audience() + .is_some_and(|roles| roles.contains(&Role::User)) + }) + .unwrap() + .as_text() + .unwrap(); + + // Should contain lines 1-100 + assert!(text.text.contains("1: Line 1")); + assert!(text.text.contains("100: Line 100")); + assert!(!text.text.contains("101: Line 101")); + + // Test viewing with explicit full range - should work + let result = router + .call_tool( + "text_editor", + json!({ + "command": "view", + "path": file_path_str, + "view_range": [1, 2001] + }), + dummy_sender(), + ) + .await; + + assert!(result.is_ok()); + + temp_dir.close().unwrap(); + } + + #[tokio::test] + #[serial] + async fn test_text_editor_view_file_with_exactly_2000_lines() { + let router = get_router().await; + + let temp_dir = tempfile::tempdir().unwrap(); + let file_path = temp_dir.path().join("file_2000.txt"); + let file_path_str = file_path.to_str().unwrap(); + std::env::set_current_dir(&temp_dir).unwrap(); + + // Create a file with exactly 2000 lines (should not trigger the check) + let mut content = String::new(); + for i in 1..=2000 { + content.push_str(&format!("Line {}\n", i)); + } + + router + .call_tool( + "text_editor", + json!({ + "command": "write", + "path": file_path_str, + "file_text": content + }), + dummy_sender(), + ) + .await + .unwrap(); + + // Test viewing without view_range - should work since it's exactly 2000 lines + let result = router + .call_tool( + "text_editor", + json!({ + "command": "view", + "path": file_path_str + }), + dummy_sender(), + ) + .await; + + assert!(result.is_ok()); + let view_result = result.unwrap(); + let text = view_result + .iter() + .find(|c| { + c.audience() + .is_some_and(|roles| roles.contains(&Role::User)) + }) + .unwrap() + .as_text() + .unwrap(); + + // Should contain all lines + assert!(text.text.contains("1: Line 1")); + assert!(text.text.contains("2000: Line 2000")); + + temp_dir.close().unwrap(); + } + + #[tokio::test] + #[serial] + async fn test_text_editor_view_small_file_without_range() { + let router = get_router().await; + + let temp_dir = tempfile::tempdir().unwrap(); + let file_path = temp_dir.path().join("small_file.txt"); + let file_path_str = file_path.to_str().unwrap(); + std::env::set_current_dir(&temp_dir).unwrap(); + + // Create a file with less than 2000 lines + let mut content = String::new(); + for i in 1..=100 { + content.push_str(&format!("Line {}\n", i)); + } + + router + .call_tool( + "text_editor", + json!({ + "command": "write", + "path": file_path_str, + "file_text": content + }), + dummy_sender(), + ) + .await + .unwrap(); + + // Test viewing without view_range - should work fine + let result = router + .call_tool( + "text_editor", + json!({ + "command": "view", + "path": file_path_str + }), + dummy_sender(), + ) + .await; + + assert!(result.is_ok()); + let view_result = result.unwrap(); + let text = view_result + .iter() + .find(|c| { + c.audience() + .is_some_and(|roles| roles.contains(&Role::User)) + }) + .unwrap() + .as_text() + .unwrap(); + + // Should contain all lines + assert!(text.text.contains("1: Line 1")); + assert!(text.text.contains("100: Line 100")); + + temp_dir.close().unwrap(); + } + #[tokio::test] #[serial] async fn test_bash_output_truncation() { From 5714a084882c2408c1c46a240945386e8b29f439 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Fri, 1 Aug 2025 14:20:35 +1000 Subject: [PATCH 2/6] Update mod.rs remove slop comment --- crates/goose-mcp/src/developer/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 6c59a2512712..42df26024332 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1011,7 +1011,6 @@ impl DeveloperRouter { let lines: Vec<&str> = content.lines().collect(); let total_lines = lines.len(); - // Check if file has more than 2000 lines and no view_range is provided if view_range.is_none() && total_lines > 2000 { return Err(ToolError::ExecutionError(format!( "File '{}' is {} lines long, recommended to read in with view_range (or searching) to get bite size content. If you do wish to read all the file, please pass in view_range with [1, {}] to read it all at once", From 357f4a2a81579ad787339dcf82aa1bc267f4536e Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 5 Aug 2025 11:20:10 +1000 Subject: [PATCH 3/6] refactoring for new clippy rules --- .goosehints | 2 +- crates/goose-mcp/src/developer/mod.rs | 251 ++++++++++-------- .../test_lead_worker.sh | 0 test_web.sh => scripts/test_web.sh | 0 4 files changed, 138 insertions(+), 115 deletions(-) rename test_lead_worker.sh => scripts/test_lead_worker.sh (100%) rename test_web.sh => scripts/test_web.sh (100%) diff --git a/.goosehints b/.goosehints index aa85fa021d80..c29eff98dc66 100644 --- a/.goosehints +++ b/.goosehints @@ -14,5 +14,5 @@ that you can call the functionality from the server from the typescript. tips: - can look at unstaged changes for what is being worked on if starting -- always check rust compiles, cargo fmt etc and cargo clippy -- -D warnings (as well as run tests in files you are working on) +- always check rust compiles, cargo fmt etc and `./scripts/clippy-lint.sh` (as well as run tests in files you are working on) - in ui/desktop, look at how you can run lint checks and if other tests can run diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 178f15eab7ff..5200ae639ea9 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -48,6 +48,7 @@ use ignore::gitignore::{Gitignore, GitignoreBuilder}; // Embeds the prompts directory to the build static PROMPTS_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/src/developer/prompts"); +const LINE_READ_LIMIT: usize = 2000; /// Loads prompt files from the embedded PROMPTS_DIR and returns a HashMap of prompts. /// Ensures that each prompt name is unique. @@ -963,139 +964,152 @@ impl DeveloperRouter { } } - async fn text_editor_view( + // Helper method to validate and calculate view range indices + fn calculate_view_range( &self, - path: &PathBuf, view_range: Option<(usize, i64)>, - ) -> Result, ToolError> { - if path.is_file() { - // Check file size first (400KB limit) - const MAX_FILE_SIZE: u64 = 400 * 1024; // 400KB in bytes - - let f = File::open(path) - .map_err(|e| ToolError::ExecutionError(format!("Failed to open file: {}", e)))?; + total_lines: usize, + ) -> Result<(usize, usize), ToolError> { + if let Some((start_line, end_line)) = view_range { + // Convert 1-indexed line numbers to 0-indexed + let start_idx = if start_line > 0 { start_line - 1 } else { 0 }; + let end_idx = if end_line == -1 { + total_lines + } else { + std::cmp::min(end_line as usize, total_lines) + }; - let file_size = f - .metadata() - .map_err(|e| { - ToolError::ExecutionError(format!("Failed to get file metadata: {}", e)) - })? - .len(); + if start_idx >= total_lines { + return Err(ToolError::InvalidParameters(format!( + "Start line {} is beyond the end of the file (total lines: {})", + start_line, total_lines + ))); + } - if file_size > MAX_FILE_SIZE { - return Err(ToolError::ExecutionError(format!( - "File '{}' is too large ({:.2}KB). Maximum size is 400KB to prevent memory issues.", - path.display(), - file_size as f64 / 1024.0 + if start_idx >= end_idx { + return Err(ToolError::InvalidParameters(format!( + "Start line {} must be less than end line {}", + start_line, end_line ))); } - // Ensure we never read over that limit even if the file is being concurrently mutated - // (e.g. it's a log file) - let mut f = f.take(MAX_FILE_SIZE); - let uri = Url::from_file_path(path) - .map_err(|_| ToolError::ExecutionError("Invalid file path".into()))? - .to_string(); + Ok((start_idx, end_idx)) + } else { + Ok((0, total_lines)) + } + } - let mut content = String::new(); - f.read_to_string(&mut content) - .map_err(|e| ToolError::ExecutionError(format!("Failed to read file: {}", e)))?; + // Helper method to format file content with line numbers + fn format_file_content( + &self, + path: &Path, + lines: &[&str], + start_idx: usize, + end_idx: usize, + view_range: Option<(usize, i64)>, + ) -> String { + let display_content = if lines.is_empty() { + String::new() + } else { + let selected_lines: Vec = lines[start_idx..end_idx] + .iter() + .enumerate() + .map(|(i, line)| format!("{}: {}", start_idx + i + 1, line)) + .collect(); - let lines: Vec<&str> = content.lines().collect(); - let total_lines = lines.len(); + selected_lines.join("\n") + }; - if view_range.is_none() && total_lines > 2000 { - return Err(ToolError::ExecutionError(format!( - "File '{}' is {} lines long, recommended to read in with view_range (or searching) to get bite size content. If you do wish to read all the file, please pass in view_range with [1, {}] to read it all at once", - path.display(), - total_lines, - total_lines - ))); + let language = lang::get_language_identifier(path); + if view_range.is_some() { + formatdoc! {" + ### {path} (lines {start}-{end}) + ```{language} + {content} + ``` + ", + path=path.display(), + start=view_range.unwrap().0, + end=if view_range.unwrap().1 == -1 { "end".to_string() } else { view_range.unwrap().1.to_string() }, + language=language, + content=display_content, + } + } else { + formatdoc! {" + ### {path} + ```{language} + {content} + ``` + ", + path=path.display(), + language=language, + content=display_content, } + } + } - // Handle view_range if provided, otherwise show all lines - let (start_idx, end_idx) = if let Some((start_line, end_line)) = view_range { - // Convert 1-indexed line numbers to 0-indexed - let start_idx = if start_line > 0 { start_line - 1 } else { 0 }; - let end_idx = if end_line == -1 { - total_lines - } else { - std::cmp::min(end_line as usize, total_lines) - }; - - if start_idx >= total_lines { - return Err(ToolError::InvalidParameters(format!( - "Start line {} is beyond the end of the file (total lines: {})", - start_line, total_lines - ))); - } + async fn text_editor_view( + &self, + path: &PathBuf, + view_range: Option<(usize, i64)>, + ) -> Result, ToolError> { + if !path.is_file() { + return Err(ToolError::ExecutionError(format!( + "The path '{}' does not exist or is not a file.", + path.display() + ))); + } - if start_idx >= end_idx { - return Err(ToolError::InvalidParameters(format!( - "Start line {} must be less than end line {}", - start_line, end_line - ))); - } + // Check file size first (400KB limit) + const MAX_FILE_SIZE: u64 = 400 * 1024; // 400KB in bytes - (start_idx, end_idx) - } else { - (0, total_lines) - }; + let f = File::open(path) + .map_err(|e| ToolError::ExecutionError(format!("Failed to open file: {}", e)))?; - // Always format lines with line numbers for better usability - let display_content = if total_lines == 0 { - String::new() - } else { - let selected_lines: Vec = lines[start_idx..end_idx] - .iter() - .enumerate() - .map(|(i, line)| format!("{}: {}", start_idx + i + 1, line)) - .collect(); + let file_size = f + .metadata() + .map_err(|e| { + ToolError::ExecutionError(format!("Failed to get file metadata: {}", e)) + })? + .len(); - selected_lines.join("\n") - }; + if file_size > MAX_FILE_SIZE { + return Err(ToolError::ExecutionError(format!( + "File '{}' is too large ({:.2}KB). Maximum size is 400KB to prevent memory issues.", + path.display(), + file_size as f64 / 1024.0 + ))); + } - let language = lang::get_language_identifier(path); - let formatted = if view_range.is_some() { - formatdoc! {" - ### {path} (lines {start}-{end}) - ```{language} - {content} - ``` - ", - path=path.display(), - start=view_range.unwrap().0, - end=if view_range.unwrap().1 == -1 { "end".to_string() } else { view_range.unwrap().1.to_string() }, - language=language, - content=display_content, - } - } else { - formatdoc! {" - ### {path} - ```{language} - {content} - ``` - ", - path=path.display(), - language=language, - content=display_content, - } - }; + // Ensure we never read over that limit even if the file is being concurrently mutated + let mut f = f.take(MAX_FILE_SIZE); - // The LLM gets just a quick update as we expect the file to view in the status - // but we send a low priority message for the human - Ok(vec![ - Content::embedded_text(uri, content).with_audience(vec![Role::Assistant]), - Content::text(formatted) - .with_audience(vec![Role::User]) - .with_priority(0.0), - ]) - } else { - Err(ToolError::ExecutionError(format!( - "The path '{}' does not exist or is not a file.", - path.display() - ))) + let uri = Url::from_file_path(path) + .map_err(|_| ToolError::ExecutionError("Invalid file path".into()))? + .to_string(); + + let mut content = String::new(); + f.read_to_string(&mut content) + .map_err(|e| ToolError::ExecutionError(format!("Failed to read file: {}", e)))?; + + let lines: Vec<&str> = content.lines().collect(); + let total_lines = lines.len(); + + if view_range.is_none() && total_lines > LINE_READ_LIMIT { + return recommend_read_range(path, total_lines); } + + let (start_idx, end_idx) = self.calculate_view_range(view_range, total_lines)?; + let formatted = self.format_file_content(path, &lines, start_idx, end_idx, view_range); + + // The LLM gets just a quick update as we expect the file to view in the status + // but we send a low priority message for the human + Ok(vec![ + Content::embedded_text(uri, content).with_audience(vec![Role::Assistant]), + Content::text(formatted) + .with_audience(vec![Role::User]) + .with_priority(0.0), + ]) } async fn text_editor_write( @@ -1615,6 +1629,15 @@ impl DeveloperRouter { } } +fn recommend_read_range(path: &Path, total_lines: usize) -> Result, ToolError> { + Err(ToolError::ExecutionError(format!( + "File '{}' is {} lines long, recommended to read in with view_range (or searching) to get bite size content. If you do wish to read all the file, please pass in view_range with [1, {}] to read it all at once", + path.display(), + total_lines, + total_lines + ))) +} + impl Router for DeveloperRouter { fn name(&self) -> String { "developer".to_string() diff --git a/test_lead_worker.sh b/scripts/test_lead_worker.sh similarity index 100% rename from test_lead_worker.sh rename to scripts/test_lead_worker.sh diff --git a/test_web.sh b/scripts/test_web.sh similarity index 100% rename from test_web.sh rename to scripts/test_web.sh From e1078004abf7ccf3471ea48df89d822b2b621dc3 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 5 Aug 2025 11:22:46 +1000 Subject: [PATCH 4/6] fmt --- crates/goose-mcp/src/developer/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 5200ae639ea9..0a494c4fecf3 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1068,9 +1068,7 @@ impl DeveloperRouter { let file_size = f .metadata() - .map_err(|e| { - ToolError::ExecutionError(format!("Failed to get file metadata: {}", e)) - })? + .map_err(|e| ToolError::ExecutionError(format!("Failed to get file metadata: {}", e)))? .len(); if file_size > MAX_FILE_SIZE { @@ -1094,7 +1092,7 @@ impl DeveloperRouter { let lines: Vec<&str> = content.lines().collect(); let total_lines = lines.len(); - + if view_range.is_none() && total_lines > LINE_READ_LIMIT { return recommend_read_range(path, total_lines); } @@ -1635,7 +1633,7 @@ fn recommend_read_range(path: &Path, total_lines: usize) -> Result, path.display(), total_lines, total_lines - ))) + ))) } impl Router for DeveloperRouter { From 06c25ffc51139f1fa5ed407cbc9928f4283849ce Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 5 Aug 2025 11:33:04 +1000 Subject: [PATCH 5/6] human written comment --- crates/goose-mcp/src/developer/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 0a494c4fecf3..5cc7391ec5c9 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1093,6 +1093,8 @@ impl DeveloperRouter { let lines: Vec<&str> = content.lines().collect(); let total_lines = lines.len(); + // We will gently encourage the LLM to specify a range for large line count files + // it can of course specify exact range to read any size file if view_range.is_none() && total_lines > LINE_READ_LIMIT { return recommend_read_range(path, total_lines); } From cf87a415a9eebbe63b2109f0de07b3457b94a90d Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Tue, 5 Aug 2025 18:58:28 +1000 Subject: [PATCH 6/6] some leftover mistakes carried over, cleaning --- .../src/developer/editor_models/morphllm_editor.rs | 3 --- crates/goose-mcp/src/developer/mod.rs | 9 ++++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/goose-mcp/src/developer/editor_models/morphllm_editor.rs b/crates/goose-mcp/src/developer/editor_models/morphllm_editor.rs index 3c6266d16ff6..76834cf588ba 100644 --- a/crates/goose-mcp/src/developer/editor_models/morphllm_editor.rs +++ b/crates/goose-mcp/src/developer/editor_models/morphllm_editor.rs @@ -67,8 +67,6 @@ impl EditorModelImpl for MorphLLMEditor { _old_str: &str, update_snippet: &str, ) -> Result { - eprintln!("Calling MorphLLM Editor API"); - // Construct the full URL let provider_url = if self.host.ends_with("/chat/completions") { self.host.clone() @@ -128,7 +126,6 @@ impl EditorModelImpl for MorphLLMEditor { .and_then(|content| content.as_str()) .ok_or_else(|| "Invalid response format".to_string())?; - eprintln!("MorphLLM Editor API worked"); Ok(content.to_string()) } diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 6e158b583c6d..5864d5e8cde1 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1062,8 +1062,7 @@ impl DeveloperRouter { ))); } - // Check file size first (400KB limit) - const MAX_FILE_SIZE: u64 = 400 * 1024; // 400KB in bytes + const MAX_FILE_SIZE: u64 = 400 * 1024; // 400KB let f = File::open(path) .map_err(|e| ToolError::ExecutionError(format!("Failed to open file: {}", e)))?; @@ -1734,7 +1733,7 @@ impl Clone for DeveloperRouter { instructions: self.instructions.clone(), file_history: Arc::clone(&self.file_history), ignore_patterns: Arc::clone(&self.ignore_patterns), - editor_model: create_editor_model(), // Recreate the editor model since it's not Clone + editor_model: create_editor_model(), } } } @@ -3270,9 +3269,9 @@ mod tests { let file_path_str = file_path.to_str().unwrap(); std::env::set_current_dir(&temp_dir).unwrap(); - // Create a file with more than 2000 lines + // Create a file with more than LINE_READ_LIMIT lines let mut content = String::new(); - for i in 1..=2001 { + for i in 1..=LINE_READ_LIMIT + 1 { content.push_str(&format!("Line {}\n", i)); }