diff --git a/Cargo.lock b/Cargo.lock index e6b9aa964bfd..8c69389d742e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2725,6 +2725,7 @@ dependencies = [ "serial_test", "shellexpand", "sysinfo 0.32.1", + "temp-env", "tempfile", "thiserror 1.0.69", "tokio", diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml index dbf8000483c3..feb195ded449 100644 --- a/crates/goose-mcp/Cargo.toml +++ b/crates/goose-mcp/Cargo.toml @@ -64,6 +64,7 @@ glob = "0.3" [dev-dependencies] serial_test = "3.0.0" sysinfo = "0.32.1" +temp-env = "0.3.6" [features] utoipa = ["dep:utoipa"] diff --git a/crates/goose-mcp/src/developer/goose_hints/import_files.rs b/crates/goose-mcp/src/developer/goose_hints/import_files.rs new file mode 100644 index 000000000000..e74411c83814 --- /dev/null +++ b/crates/goose-mcp/src/developer/goose_hints/import_files.rs @@ -0,0 +1,488 @@ +use ignore::gitignore::Gitignore; +use once_cell::sync::Lazy; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, +}; + +static FILE_REFERENCE_REGEX: Lazy = Lazy::new(|| { + regex::Regex::new(r"(?:^|\s)@([a-zA-Z0-9_\-./]+(?:\.[a-zA-Z0-9]+)+|[A-Z][a-zA-Z0-9_\-]*|[a-zA-Z0-9_\-./]*[./][a-zA-Z0-9_\-./]*)") + .expect("Invalid file reference regex pattern") +}); + +const MAX_DEPTH: usize = 3; + +fn sanitize_reference_path( + reference: &Path, + including_file_path: &Path, + import_boundary: &Path, +) -> Result { + if reference.is_absolute() { + return Err(std::io::Error::new( + std::io::ErrorKind::PermissionDenied, + "Absolute paths not allowed in file references", + )); + } + let resolved = including_file_path.join(reference); + let boundary_canonical = import_boundary.canonicalize().map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::NotFound, + "Import boundary directory not found", + ) + })?; + + if let Ok(canonical) = resolved.canonicalize() { + if !canonical.starts_with(&boundary_canonical) { + return Err(std::io::Error::new( + std::io::ErrorKind::PermissionDenied, + format!( + "Include: '{}' is outside the import boundary '{}'", + resolved.display(), + import_boundary.display() + ), + )); + } + Ok(canonical) + } else { + Ok(resolved) // File doesn't exist, but path structure is safe + } +} + +fn parse_file_references(content: &str) -> Vec { + // Keep size limits for ReDoS protection - .goosehints should be reasonably sized + const MAX_CONTENT_LENGTH: usize = 131_072; // 128KB limit + + if content.len() > MAX_CONTENT_LENGTH { + tracing::warn!( + "Content too large for file reference parsing: {} bytes (limit: {} bytes)", + content.len(), + MAX_CONTENT_LENGTH + ); + return Vec::new(); + } + + FILE_REFERENCE_REGEX + .captures_iter(content) + .map(|cap| PathBuf::from(&cap[1])) + .collect() +} + +fn should_process_reference( + reference: &Path, + including_file_path: &Path, + import_boundary: &Path, + visited: &HashSet, + ignore_patterns: &Gitignore, +) -> Option { + if visited.contains(reference) { + return None; + } + let safe_path = match sanitize_reference_path(reference, including_file_path, import_boundary) { + Ok(path) => path, + Err(_) => { + tracing::warn!("Skipping unsafe file reference: {:?}", reference); + return None; + } + }; + + if ignore_patterns.matched(&safe_path, false).is_ignore() { + tracing::debug!("Skipping ignored file reference: {:?}", safe_path); + return None; + } + + if !safe_path.is_file() { + return None; + } + + Some(safe_path) +} + +fn process_file_reference( + reference: &Path, + safe_path: &Path, + visited: &mut HashSet, + import_boundary: &Path, + depth: usize, + ignore_patterns: &Gitignore, +) -> Option<(String, String)> { + if depth >= MAX_DEPTH { + tracing::warn!("Maximum reference depth {} exceeded", MAX_DEPTH); + return None; + } + + visited.insert(reference.to_path_buf()); + + let expanded_content = read_referenced_files( + safe_path, + import_boundary, + visited, + depth + 1, + ignore_patterns, + ); + + let reference_pattern = format!("@{}", reference.to_string_lossy()); + let replacement = format!( + "--- Content from {} ---\n{}\n--- End of {} ---", + reference.display(), + expanded_content, + reference.display() + ); + + visited.remove(reference); + + Some((reference_pattern, replacement)) +} + +pub fn read_referenced_files( + file_path: &Path, + import_boundary: &Path, + visited: &mut HashSet, + depth: usize, + ignore_patterns: &Gitignore, +) -> String { + let content = match std::fs::read_to_string(file_path) { + Ok(content) => content, + Err(e) => { + tracing::warn!("Could not read file {:?}: {}", file_path, e); + return String::new(); + } + }; + + let including_file_path = file_path.parent().unwrap_or(file_path); + + let references = parse_file_references(&content); + let mut result = content.to_string(); + + for reference in references { + let safe_path = match should_process_reference( + &reference, + including_file_path, + import_boundary, + visited, + ignore_patterns, + ) { + Some(path) => path, + None => continue, + }; + + if let Some((pattern, replacement)) = process_file_reference( + &reference, + &safe_path, + visited, + import_boundary, + depth, + ignore_patterns, + ) { + result = result.replace(&pattern, &replacement); + } + } + + result +} + +#[cfg(test)] +mod tests { + use ignore::gitignore::GitignoreBuilder; + + use super::*; + + #[test] + fn test_parse_file_references() { + let content = r#" + Basic file references: @README.md @./docs/guide.md @../shared/config.json @/absolute/path/file.txt + Inline references: @file1.txt and @file2.py + Files with extensions: @component.tsx @file.test.js @config.local.json + Files without extensions: @Makefile @LICENSE @Dockerfile @CHANGELOG + Complex paths: @src/utils/helper.js @docs/api/endpoints.md + + Should not match: + - Email addresses: user@example.com admin@company.org + - Social handles: @username @user123 + - URLs: https://example.com/@user + "#; + + let references = parse_file_references(content); + + // Should match expected file references + let expected_files = [ + "README.md", + "./docs/guide.md", + "../shared/config.json", + "/absolute/path/file.txt", + "file1.txt", + "file2.py", + "component.tsx", + "file.test.js", + "config.local.json", + "Makefile", + "LICENSE", + "Dockerfile", + "CHANGELOG", + "src/utils/helper.js", + "docs/api/endpoints.md", + ]; + + for expected in expected_files { + assert!( + references.contains(&PathBuf::from(expected)), + "Expected to find reference: {}", + expected + ); + } + + // Should not match email addresses or social handles + assert!(!references + .iter() + .any(|p| p.to_str().unwrap().contains("example.com"))); + assert!(!references + .iter() + .any(|p| p.to_str().unwrap().contains("company.org"))); + assert!(!references.iter().any(|p| p.to_str().unwrap() == "username")); + assert!(!references.iter().any(|p| p.to_str().unwrap() == "user123")); + } + + mod read_referenced_files { + use super::*; + + fn create_ignore_patterns(import_boundary: &Path) -> Gitignore { + let builder = GitignoreBuilder::new(import_boundary); + builder.build().unwrap() + } + + fn create_file(import_boundary: &Path, file_name: &str, content: &str) -> PathBuf { + let file_path = import_boundary.join(file_name); + std::fs::write(&file_path, content).unwrap(); + file_path + } + + #[test] + fn test_direct_reference() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + + create_file( + import_boundary, + "basic_included_file.md", + "This is basic content", + ); + + let ignore_patterns = create_ignore_patterns(import_boundary); + + let mut visited = HashSet::new(); + let main_file = create_file( + import_boundary, + "main.md", + "Main content\n@basic_included_file.md\nMore content", + ); + + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + assert!(expanded.contains("Main content")); + assert!(expanded.contains("--- Content from")); + assert!(expanded.contains("This is basic content")); + assert!(expanded.contains("--- End of")); + assert!(expanded.contains("More content")); + } + + #[test] + fn test_nested_reference() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + + create_file(import_boundary, "level1.md", "Level 1 content\n@level2.md"); + create_file(import_boundary, "level2.md", "Level 2 content"); + + let mut visited = HashSet::new(); + let main_file = create_file(import_boundary, "main.md", "Main content\n@level1.md"); + + let ignore_patterns = create_ignore_patterns(import_boundary); + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + assert!(expanded.contains("Main content")); + assert!(expanded.contains("Level 1 content")); + assert!(expanded.contains("Level 2 content")); + } + + #[test] + fn test_circular_reference() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + + let ignore_patterns = create_ignore_patterns(import_boundary); + create_file(import_boundary, "file1.md", "File 1\n@file2.md"); + create_file(import_boundary, "file2.md", "File 2\n@file1.md"); + let main_file = create_file(import_boundary, "main.md", "Main\n@file1.md"); + + let mut visited = HashSet::new(); + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + assert!(expanded.contains("File 1")); + assert!(expanded.contains("File 2")); + // Should only appear once due to circular reference protection + let file1_count = expanded.matches("File 1").count(); + assert_eq!(file1_count, 1); + } + + #[test] + fn test_max_depth_limit() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + let ignore_patterns = create_ignore_patterns(import_boundary); + let mut visited = HashSet::new(); + for i in 1..=5 { + let content = if i < 5 { + format!("Level {} content\n@level{}.md", i, i + 1) + } else { + format!("Level {} content", i) + }; + create_file(import_boundary, &format!("level{}.md", i), &content); + } + let main_file = create_file(import_boundary, "main.md", "Main\n@level1.md"); + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + // Should contain up to level 3 (MAX_DEPTH = 3) + assert!(expanded.contains("Level 1 content")); + assert!(expanded.contains("Level 2 content")); + assert!(expanded.contains("Level 3 content")); + // Should not contain level 4 or 5 due to depth limit + assert!(!expanded.contains("Level 4 content")); + assert!(!expanded.contains("Level 5 content")); + } + + #[test] + fn test_missing_file() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + let ignore_patterns = create_ignore_patterns(import_boundary); + let mut visited = HashSet::new(); + let main_file = create_file( + import_boundary, + "main.md", + "Main\n@missing.md\nMore content", + ); + + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + assert!(expanded.contains("@missing.md")); + assert!(!expanded.contains("--- Content from")); + } + + #[test] + fn test_read_referenced_files_respects_ignore() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + + create_file(import_boundary, "allowed.md", "Allowed content"); + create_file(import_boundary, "secret.md", "Secret content"); + + let mut builder = GitignoreBuilder::new(import_boundary); + builder.add_line(None, "secret.md").unwrap(); + let ignore_patterns = builder.build().unwrap(); + + let mut visited = HashSet::new(); + // Create main content with references + let content = "Main\n@allowed.md\n@secret.md"; + let main_file = create_file(import_boundary, "main.md", content); + let expanded = read_referenced_files( + &main_file, + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + // Should contain allowed content but not ignored content + assert!(expanded.contains("Allowed content")); + assert!(!expanded.contains("Secret content")); + + // The @secret.md reference should remain unchanged + assert!(expanded.contains("@secret.md")); + + temp_dir.close().unwrap(); + } + + #[test] + fn test_security_integration_with_file_expansion() { + let temp_dir = tempfile::tempdir().unwrap(); + let import_boundary = temp_dir.path(); + let ignore_patterns = create_ignore_patterns(import_boundary); + + // Create a legitimate file + create_file( + import_boundary, + "legitimate_file.md", + "This is safe content", + ); + + let absolute_path_file = create_file( + import_boundary, + "used_with_absolute_path.md", + "Absolute path content", + ); + let absolute_path_file_path = absolute_path_file + .canonicalize() + .unwrap() + .to_string_lossy() + .into_owned(); + + // Create a config file attempting path traversal + let malicious_content = format!( + r#" + Normal content here. + @../etc/passwd + @{} + @legitimate_file.md + "#, + absolute_path_file_path + ); + create_file(import_boundary, "main.md", &malicious_content); + + let mut visited = HashSet::new(); + let expanded = read_referenced_files( + &import_boundary.join("main.md"), + import_boundary, + &mut visited, + 0, + &ignore_patterns, + ); + + // Should contain the legitimate file but not the malicious attempts + assert!(expanded.contains("This is safe content")); + assert!(!expanded.contains("root:")); // Common content in /etc/passwd + assert!(!expanded.contains("Absolute path content")); + + // The malicious references should still be present (not expanded) + assert!(expanded.contains("@../etc/passwd")); + assert!(expanded.contains(absolute_path_file_path.as_str())); + } + } +} diff --git a/crates/goose-mcp/src/developer/goose_hints/load_hints.rs b/crates/goose-mcp/src/developer/goose_hints/load_hints.rs new file mode 100644 index 000000000000..b2074c318ee8 --- /dev/null +++ b/crates/goose-mcp/src/developer/goose_hints/load_hints.rs @@ -0,0 +1,536 @@ +use etcetera::{choose_app_strategy, AppStrategy}; +use ignore::gitignore::Gitignore; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, +}; + +use crate::developer::goose_hints::import_files::read_referenced_files; + +pub const GOOSE_HINTS_FILENAME: &str = ".goosehints"; + +fn find_git_root(start_dir: &Path) -> Option<&Path> { + let mut check_dir = start_dir; + + loop { + if check_dir.join(".git").exists() { + return Some(check_dir); + } + if let Some(parent) = check_dir.parent() { + check_dir = parent; + } else { + break; + } + } + + None +} + +fn get_local_directories(git_root: Option<&Path>, cwd: &Path) -> Vec { + match git_root { + Some(git_root) => { + let mut directories = Vec::new(); + let mut current_dir = cwd; + + loop { + directories.push(current_dir.to_path_buf()); + if current_dir == git_root { + break; + } + if let Some(parent) = current_dir.parent() { + current_dir = parent; + } else { + break; + } + } + directories.reverse(); + directories + } + None => vec![cwd.to_path_buf()], + } +} + +pub fn load_hint_files( + cwd: &Path, + hints_filenames: &[String], + ignore_patterns: &Gitignore, +) -> String { + let mut global_hints_contents = Vec::with_capacity(hints_filenames.len()); + let mut local_hints_contents = Vec::with_capacity(hints_filenames.len()); + + for hints_filename in hints_filenames { + // Global hints + // choose_app_strategy().config_dir() + // - macOS/Linux: ~/.config/goose/ + // - Windows: ~\AppData\Roaming\Block\goose\config\ + // keep previous behavior of expanding ~/.config in case this fails + let global_hints_path = choose_app_strategy(crate::APP_STRATEGY.clone()) + .map(|strategy| strategy.in_config_dir(hints_filename)) + .unwrap_or_else(|_| { + let path_str = format!("~/.config/goose/{}", hints_filename); + PathBuf::from(shellexpand::tilde(&path_str).to_string()) + }); + + if let Some(parent) = global_hints_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + + if global_hints_path.is_file() { + let mut visited = HashSet::new(); + let hints_dir = global_hints_path.parent().unwrap(); + let expanded_content = read_referenced_files( + &global_hints_path, + hints_dir, + &mut visited, + 0, + ignore_patterns, + ); + if !expanded_content.is_empty() { + global_hints_contents.push(expanded_content); + } + } + } + let git_root = find_git_root(cwd); + let local_directories = get_local_directories(git_root, cwd); + + let import_boundary = git_root.unwrap_or(cwd); + + for directory in &local_directories { + for hints_filename in hints_filenames { + let hints_path = directory.join(hints_filename); + if hints_path.is_file() { + let mut visited = HashSet::new(); + let expanded_content = read_referenced_files( + &hints_path, + import_boundary, + &mut visited, + 0, + ignore_patterns, + ); + if !expanded_content.is_empty() { + local_hints_contents.push(expanded_content); + } + } + } + } + + let mut hints = String::new(); + if !global_hints_contents.is_empty() { + hints.push_str("\n### Global Hints\nThe developer extension includes some global hints that apply to all projects & directories.\n"); + hints.push_str(&global_hints_contents.join("\n")); + } + + if !local_hints_contents.is_empty() { + if !hints.is_empty() { + hints.push_str("\n\n"); + } + hints.push_str("### Project Hints\nThe developer extension includes some hints for working on the project in this directory.\n"); + hints.push_str(&local_hints_contents.join("\n")); + } + + hints +} + +#[cfg(test)] +mod tests { + use super::*; + use ignore::gitignore::GitignoreBuilder; + use serial_test::serial; + use std::fs::{self}; + use tempfile::TempDir; + + fn create_dummy_gitignore() -> Gitignore { + let temp_dir = tempfile::tempdir().expect("failed to create tempdir"); + let builder = GitignoreBuilder::new(temp_dir.path()); + builder.build().expect("failed to build gitignore") + } + + #[test] + #[serial] + fn test_global_goosehints() { + // if ~/.config/goose/.goosehints exists, it should be included in the instructions + // copy the existing global hints file to a .bak file + let global_hints_path = PathBuf::from( + shellexpand::tilde(format!("~/.config/goose/{}", GOOSE_HINTS_FILENAME).as_str()) + .to_string(), + ); + let global_hints_bak_path = PathBuf::from( + shellexpand::tilde(format!("~/.config/goose/{}.bak", GOOSE_HINTS_FILENAME).as_str()) + .to_string(), + ); + let mut globalhints_existed = false; + + if global_hints_path.is_file() { + globalhints_existed = true; + fs::copy(&global_hints_path, &global_hints_bak_path).unwrap(); + } + + fs::write(&global_hints_path, "These are my global goose hints.").unwrap(); + + let dir = TempDir::new().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files(dir.path(), &[GOOSE_HINTS_FILENAME.to_string()], &gitignore); + + assert!(hints.contains("### Global Hints")); + assert!(hints.contains("my global goose hints.")); + + // restore backup if globalhints previously existed + if globalhints_existed { + fs::copy(&global_hints_bak_path, &global_hints_path).unwrap(); + fs::remove_file(&global_hints_bak_path).unwrap(); + } else { + // Clean up the test file we created + let _ = fs::remove_file(&global_hints_path); + } + } + + #[test] + #[serial] + fn test_goosehints_when_present() { + let dir = TempDir::new().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + + fs::write(dir.path().join(GOOSE_HINTS_FILENAME), "Test hint content").unwrap(); + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files(dir.path(), &[GOOSE_HINTS_FILENAME.to_string()], &gitignore); + + assert!(hints.contains("Test hint content")); + } + + #[test] + #[serial] + fn test_goosehints_when_missing() { + let dir = TempDir::new().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files(dir.path(), &[GOOSE_HINTS_FILENAME.to_string()], &gitignore); + + assert!(!hints.contains("Project Hints")); + } + + #[test] + #[serial] + fn test_goosehints_multiple_filenames() { + let dir = TempDir::new().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + + fs::write( + dir.path().join("CLAUDE.md"), + "Custom hints file content from CLAUDE.md", + ) + .unwrap(); + fs::write( + dir.path().join(GOOSE_HINTS_FILENAME), + "Custom hints file content from .goosehints", + ) + .unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + dir.path(), + &["CLAUDE.md".to_string(), GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + assert!(hints.contains("Custom hints file content from CLAUDE.md")); + assert!(hints.contains("Custom hints file content from .goosehints")); + } + + #[test] + #[serial] + fn test_goosehints_configurable_filename() { + let dir = TempDir::new().unwrap(); + std::env::set_current_dir(dir.path()).unwrap(); + + fs::write(dir.path().join("CLAUDE.md"), "Custom hints file content").unwrap(); + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files(dir.path(), &["CLAUDE.md".to_string()], &gitignore); + + assert!(hints.contains("Custom hints file content")); + assert!(!hints.contains(".goosehints")); // Make sure it's not loading the default + } + + #[test] + fn test_nested_goosehints_with_git_root() { + let temp_dir = TempDir::new().unwrap(); + let project_root = temp_dir.path(); + + fs::create_dir(project_root.join(".git")).unwrap(); + fs::write( + project_root.join(GOOSE_HINTS_FILENAME), + "Root hints content", + ) + .unwrap(); + + let subdir = project_root.join("subdir"); + fs::create_dir(&subdir).unwrap(); + fs::write(subdir.join(GOOSE_HINTS_FILENAME), "Subdir hints content").unwrap(); + let current_dir = subdir.join("current_dir"); + fs::create_dir(¤t_dir).unwrap(); + fs::write( + current_dir.join(GOOSE_HINTS_FILENAME), + "current_dir hints content", + ) + .unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + ¤t_dir, + &[GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + assert!( + hints.contains("Root hints content\nSubdir hints content\ncurrent_dir hints content") + ); + } + + #[test] + fn test_nested_goosehints_without_git_root() { + let temp_dir = TempDir::new().unwrap(); + let base_dir = temp_dir.path(); + + fs::write(base_dir.join(GOOSE_HINTS_FILENAME), "Base hints content").unwrap(); + + let subdir = base_dir.join("subdir"); + fs::create_dir(&subdir).unwrap(); + fs::write(subdir.join(GOOSE_HINTS_FILENAME), "Subdir hints content").unwrap(); + + let current_dir = subdir.join("current_dir"); + fs::create_dir(¤t_dir).unwrap(); + fs::write( + current_dir.join(GOOSE_HINTS_FILENAME), + "Current dir hints content", + ) + .unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + ¤t_dir, + &[GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + // Without .git, should only find hints in current directory + assert!(hints.contains("Current dir hints content")); + assert!(!hints.contains("Base hints content")); + assert!(!hints.contains("Subdir hints content")); + } + + #[test] + fn test_nested_goosehints_mixed_filenames() { + let temp_dir = TempDir::new().unwrap(); + let project_root = temp_dir.path(); + + fs::create_dir(project_root.join(".git")).unwrap(); + fs::write(project_root.join("CLAUDE.md"), "Root CLAUDE.md content").unwrap(); + + let subdir = project_root.join("subdir"); + fs::create_dir(&subdir).unwrap(); + fs::write( + subdir.join(GOOSE_HINTS_FILENAME), + "Subdir .goosehints content", + ) + .unwrap(); + + let current_dir = subdir.join("current_dir"); + fs::create_dir(¤t_dir).unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + ¤t_dir, + &["CLAUDE.md".to_string(), GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + assert!(hints.contains("Root CLAUDE.md content")); + assert!(hints.contains("Subdir .goosehints content")); + } + + #[test] + fn test_hints_with_basic_imports() { + let temp_dir = TempDir::new().unwrap(); + let project_root = temp_dir.path(); + + fs::create_dir(project_root.join(".git")).unwrap(); + + fs::write( + project_root.join("README.md"), + "# Project README\nProject overview content", + ) + .unwrap(); + fs::write(project_root.join("config.md"), "Configuration details").unwrap(); + + let hints_content = r#"Project hints content +@README.md +@config.md +Additional instructions here."#; + fs::write(project_root.join(GOOSE_HINTS_FILENAME), hints_content).unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + project_root, + &[GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + assert!(hints.contains("Project hints content")); + assert!(hints.contains("Additional instructions here")); + + assert!(hints.contains("--- Content from README.md ---")); + assert!(hints.contains("# Project README")); + assert!(hints.contains("Project overview content")); + assert!(hints.contains("--- End of README.md ---")); + + assert!(hints.contains("--- Content from config.md ---")); + assert!(hints.contains("Configuration details")); + assert!(hints.contains("--- End of config.md ---")); + } + + #[test] + fn test_hints_with_git_import_boundary() { + let temp_dir = TempDir::new().unwrap(); + let project_root = temp_dir.path(); + + fs::create_dir(project_root.join(".git")).unwrap(); + + fs::write(project_root.join("root_file.md"), "Root file content").unwrap(); + fs::write( + project_root.join("shared_docs.md"), + "Shared documentation content", + ) + .unwrap(); + + let docs_dir = project_root.join("docs"); + fs::create_dir_all(&docs_dir).unwrap(); + fs::write(docs_dir.join("api.md"), "API documentation content").unwrap(); + + let utils_dir = project_root.join("src").join("utils"); + fs::create_dir_all(&utils_dir).unwrap(); + fs::write( + utils_dir.join("helpers.md"), + "Helper utilities content @../../shared_docs.md", + ) + .unwrap(); + + let components_dir = project_root.join("src").join("components"); + fs::create_dir_all(&components_dir).unwrap(); + fs::write(components_dir.join("local_file.md"), "Local file content").unwrap(); + + let outside_dir = temp_dir.path().parent().unwrap(); + fs::write(outside_dir.join("forbidden.md"), "Forbidden content").unwrap(); + + let root_hints_content = r#"Project root hints +@docs/api.md +Root level instructions"#; + fs::write(project_root.join(GOOSE_HINTS_FILENAME), root_hints_content).unwrap(); + + let nested_hints_content = r#"Nested directory hints +@local_file.md +@../utils/helpers.md +@../../docs/api.md +@../../root_file.md +@../../../forbidden.md +End of nested hints"#; + fs::write( + components_dir.join(GOOSE_HINTS_FILENAME), + nested_hints_content, + ) + .unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + &components_dir, + &[GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + println!("======{}", hints); + assert!(hints.contains("Project root hints")); + assert!(hints.contains("Root level instructions")); + + assert!(hints.contains("API documentation content")); + assert!(hints.contains("--- Content from docs/api.md ---")); + + assert!(hints.contains("Nested directory hints")); + assert!(hints.contains("End of nested hints")); + + assert!(hints.contains("Local file content")); + assert!(hints.contains("--- Content from local_file.md ---")); + + assert!(hints.contains("Helper utilities content")); + assert!(hints.contains("--- Content from ../utils/helpers.md ---")); + assert!(hints.contains("Shared documentation content")); + assert!(hints.contains("--- Content from ../../shared_docs.md ---")); + + let api_content_count = hints.matches("API documentation content").count(); + assert_eq!( + api_content_count, 2, + "API content should appear twice - from root and nested hints" + ); + + assert!(hints.contains("Root file content")); + assert!(hints.contains("--- Content from ../../root_file.md ---")); + + assert!(!hints.contains("Forbidden content")); + assert!(hints.contains("@../../../forbidden.md")); + } + + #[test] + fn test_hints_without_git_import_boundary() { + let temp_dir = TempDir::new().unwrap(); + let base_dir = temp_dir.path(); + + let current_dir = base_dir.join("current"); + fs::create_dir(¤t_dir).unwrap(); + fs::write(current_dir.join("local.md"), "Local content").unwrap(); + + fs::write(base_dir.join("parent.md"), "Parent content").unwrap(); + + let hints_content = r#"Current directory hints +@local.md +@../parent.md +End of hints"#; + fs::write(current_dir.join(GOOSE_HINTS_FILENAME), hints_content).unwrap(); + + let gitignore = create_dummy_gitignore(); + let hints = load_hint_files( + ¤t_dir, + &[GOOSE_HINTS_FILENAME.to_string()], + &gitignore, + ); + + assert!(hints.contains("Local content")); + assert!(hints.contains("--- Content from local.md ---")); + + assert!(!hints.contains("Parent content")); + assert!(hints.contains("@../parent.md")); + } + + #[test] + fn test_import_boundary_respects_nested_setting() { + let temp_dir = TempDir::new().unwrap(); + let project_root = temp_dir.path(); + fs::create_dir(project_root.join(".git")).unwrap(); + fs::write(project_root.join("root_file.md"), "Root file content").unwrap(); + let subdir = project_root.join("subdir"); + fs::create_dir(&subdir).unwrap(); + fs::write(subdir.join("local_file.md"), "Local file content").unwrap(); + let hints_content = r#"Subdir hints +@local_file.md +@../root_file.md +End of hints"#; + fs::write(subdir.join(GOOSE_HINTS_FILENAME), hints_content).unwrap(); + let gitignore = create_dummy_gitignore(); + + let hints = load_hint_files(&subdir, &[GOOSE_HINTS_FILENAME.to_string()], &gitignore); + + assert!(hints.contains("Local file content")); + assert!(hints.contains("--- Content from local_file.md ---")); + + assert!(hints.contains("Root file content")); + assert!(hints.contains("--- Content from ../root_file.md ---")); + } +} diff --git a/crates/goose-mcp/src/developer/goose_hints/mod.rs b/crates/goose-mcp/src/developer/goose_hints/mod.rs new file mode 100644 index 000000000000..818255dbc628 --- /dev/null +++ b/crates/goose-mcp/src/developer/goose_hints/mod.rs @@ -0,0 +1,2 @@ +mod import_files; +pub mod load_hints; diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 9b3b6982eb17..0222eb60001b 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1,5 +1,5 @@ mod editor_models; - +mod goose_hints; mod lang; mod shell; @@ -10,7 +10,7 @@ use indoc::formatdoc; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::{ - collections::{HashMap, HashSet}, + collections::HashMap, fs::File, future::Future, io::{Cursor, Read}, @@ -30,9 +30,9 @@ use mcp_core::{ handler::{require_str_parameter, PromptError, ResourceError}, protocol::ServerCapabilities, }; + use mcp_server::router::CapabilitiesBuilder; use mcp_server::Router; -use once_cell::sync::Lazy; use rmcp::model::{ Content, ErrorCode, ErrorData, JsonRpcMessage, JsonRpcNotification, JsonRpcVersion2_0, @@ -40,6 +40,8 @@ use rmcp::model::{ }; use rmcp::object; +use crate::developer::goose_hints::load_hints::{load_hint_files, GOOSE_HINTS_FILENAME}; + use self::editor_models::{create_editor_model, EditorModel}; use self::shell::{expand_path, get_shell_config, is_absolute_path, normalize_line_endings}; use indoc::indoc; @@ -110,183 +112,6 @@ pub fn load_prompt_files() -> HashMap { prompts } -/// Regex pattern to match file references (@-mentions) in text -static FILE_REFERENCE_REGEX: Lazy = Lazy::new(|| { - regex::Regex::new(r"(?:^|\s)@([a-zA-Z0-9_\-./]+(?:\.[a-zA-Z0-9]+)+|[A-Z][a-zA-Z0-9_\-]*|[a-zA-Z0-9_\-./]*[./][a-zA-Z0-9_\-./]*)") - .expect("Invalid file reference regex pattern") -}); - -/// Sanitize and resolve a file reference path safely -/// -/// This function prevents path traversal attacks by: -/// 1. Rejecting absolute paths -/// 2. Resolving the path canonically -/// 3. Ensuring the resolved path stays within the allowed base directory -fn sanitize_reference_path(reference: &Path, base_path: &Path) -> Result { - if reference.is_absolute() { - return Err(std::io::Error::new( - std::io::ErrorKind::PermissionDenied, - "Absolute paths not allowed in file references", - )); - } - - let resolved = base_path.join(reference); - let base_canonical = base_path.canonicalize().map_err(|_| { - std::io::Error::new(std::io::ErrorKind::NotFound, "Base directory not found") - })?; - - if let Ok(canonical) = resolved.canonicalize() { - if !canonical.starts_with(&base_canonical) { - return Err(std::io::Error::new( - std::io::ErrorKind::PermissionDenied, - "Path traversal attempt detected", - )); - } - Ok(canonical) - } else { - Ok(resolved) // File doesn't exist, but path structure is safe - } -} - -/// Parse file references (@-mentions) from content -fn parse_file_references(content: &str) -> Vec { - // Keep size limits for ReDoS protection - .goosehints should be reasonably sized - const MAX_CONTENT_LENGTH: usize = 131_072; // 128KB limit - - if content.len() > MAX_CONTENT_LENGTH { - tracing::warn!( - "Content too large for file reference parsing: {} bytes (limit: {} bytes)", - content.len(), - MAX_CONTENT_LENGTH - ); - return Vec::new(); - } - - FILE_REFERENCE_REGEX - .captures_iter(content) - .map(|cap| PathBuf::from(&cap[1])) - .collect() -} - -/// Read referenced files and expand their content -/// Check if a file reference should be processed -fn should_process_reference_v2( - reference: &Path, - visited: &HashSet, - base_path: &Path, - ignore_patterns: &Gitignore, -) -> Option { - // Check if we've already visited this file (circular reference protection) - if visited.contains(reference) { - return None; - } - - // Sanitize the path - let safe_path = match sanitize_reference_path(reference, base_path) { - Ok(path) => path, - Err(_) => { - tracing::warn!("Skipping unsafe file reference: {:?}", reference); - return None; - } - }; - - // Check if the file should be ignored - if ignore_patterns.matched(&safe_path, false).is_ignore() { - tracing::debug!("Skipping ignored file reference: {:?}", safe_path); - return None; - } - - // Check if file exists - if !safe_path.is_file() { - return None; - } - - Some(safe_path) -} - -/// Process a single file reference and return the replacement content -fn process_file_reference_v2( - reference: &Path, - safe_path: &Path, - visited: &mut HashSet, - base_path: &Path, - depth: usize, - ignore_patterns: &Gitignore, -) -> Option<(String, String)> { - match std::fs::read_to_string(safe_path) { - Ok(file_content) => { - // Mark this file as visited - visited.insert(reference.to_path_buf()); - - // Recursively expand any references in the included file - let expanded_content = read_referenced_files( - &file_content, - base_path, - visited, - depth + 1, - ignore_patterns, - ); - - // Create the replacement content - let reference_pattern = format!("@{}", reference.to_string_lossy()); - let replacement = format!( - "--- Content from {} ---\n{}\n--- End of {} ---", - reference.display(), - expanded_content, - reference.display() - ); - - // Remove from visited so it can be referenced again in different contexts - visited.remove(reference); - - Some((reference_pattern, replacement)) - } - Err(e) => { - tracing::warn!("Could not read referenced file {:?}: {}", safe_path, e); - None - } - } -} - -fn read_referenced_files( - content: &str, - base_path: &Path, - visited: &mut HashSet, - depth: usize, - ignore_patterns: &Gitignore, -) -> String { - const MAX_DEPTH: usize = 3; - - if depth >= MAX_DEPTH { - tracing::warn!("Maximum reference depth {} exceeded", MAX_DEPTH); - return content.to_string(); - } - - let references = parse_file_references(content); - let mut result = content.to_string(); - - for reference in references { - let safe_path = - match should_process_reference_v2(&reference, visited, base_path, ignore_patterns) { - Some(path) => path, - None => continue, - }; - - if let Some((pattern, replacement)) = process_file_reference_v2( - &reference, - &safe_path, - visited, - base_path, - depth, - ignore_patterns, - ) { - result = result.replace(&pattern, &replacement); - } - } - - result -} - pub struct DeveloperRouter { tools: Vec, prompts: Arc>, @@ -582,46 +407,6 @@ impl DeveloperRouter { }, }; - let hints_filenames: Vec = std::env::var("CONTEXT_FILE_NAMES") - .ok() - .and_then(|s| serde_json::from_str(&s).ok()) - .unwrap_or_else(|| vec!["AGENTS.md".to_string(), ".goosehints".to_string()]); - - let mut global_hints_contents = Vec::with_capacity(hints_filenames.len()); - let mut local_hints_contents = Vec::with_capacity(hints_filenames.len()); - - for hints_filename in &hints_filenames { - // Global hints - // choose_app_strategy().config_dir() - // - macOS/Linux: ~/.config/goose/ - // - Windows: ~\AppData\Roaming\Block\goose\config\ - // keep previous behavior of expanding ~/.config in case this fails - let global_hints_path = choose_app_strategy(crate::APP_STRATEGY.clone()) - .map(|strategy| strategy.in_config_dir(hints_filename)) - .unwrap_or_else(|_| { - let path_str = format!("~/.config/goose/{}", hints_filename); - PathBuf::from(shellexpand::tilde(&path_str).to_string()) - }); - - if let Some(parent) = global_hints_path.parent() { - let _ = std::fs::create_dir_all(parent); - } - - if global_hints_path.is_file() { - if let Ok(content) = std::fs::read_to_string(&global_hints_path) { - global_hints_contents.push(content); - } - } - - let local_hints_path = cwd.join(hints_filename); - if local_hints_path.is_file() { - if let Ok(content) = std::fs::read_to_string(&local_hints_path) { - local_hints_contents.push(content); - } - } - } - - // Build ignore patterns first so we can use them for file reference expansion let mut builder = GitignoreBuilder::new(cwd.clone()); let mut has_ignore_file = false; @@ -671,42 +456,11 @@ impl DeveloperRouter { let ignore_patterns = builder.build().expect("Failed to build ignore patterns"); - // Now process hints with file reference expansion - let mut hints = String::new(); - if !global_hints_contents.is_empty() { - hints.push_str("\n### Global Hints\nThe developer extension includes some global hints that apply to all projects & directories.\n"); - - // Expand file references in global hints - let mut visited = HashSet::new(); - let global_hints_text = global_hints_contents.join("\n"); - let global_config_dir = choose_app_strategy(crate::APP_STRATEGY.clone()) - .map(|strategy| strategy.config_dir()) - .unwrap_or_else(|_| { - PathBuf::from(shellexpand::tilde("~/.config/goose").to_string()) - }); - let expanded_global_hints = read_referenced_files( - &global_hints_text, - &global_config_dir, - &mut visited, - 0, - &ignore_patterns, - ); - hints.push_str(&expanded_global_hints); - } - - if !local_hints_contents.is_empty() { - if !hints.is_empty() { - hints.push_str("\n\n"); - } - hints.push_str("### Project Hints\nThe developer extension includes some hints for working on the project in this directory.\n"); - - // Expand file references in local hints - let mut visited = HashSet::new(); - let local_hints_text = local_hints_contents.join("\n"); - let expanded_local_hints = - read_referenced_files(&local_hints_text, &cwd, &mut visited, 0, &ignore_patterns); - hints.push_str(&expanded_local_hints); - } + let hints_filenames: Vec = std::env::var("CONTEXT_FILE_NAMES") + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_else(|| vec!["AGENTS.md".to_string(), GOOSE_HINTS_FILENAME.to_string()]); + let hints = load_hint_files(&cwd, &hints_filenames, &ignore_patterns); // Return base instructions directly when no hints are found let instructions = if hints.is_empty() { @@ -1995,69 +1749,10 @@ mod tests { use core::panic; use serde_json::json; use serial_test::serial; - use std::fs::{self, read_to_string}; + use std::fs::read_to_string; use tempfile::TempDir; use tokio::sync::OnceCell; - #[test] - #[serial] - fn test_global_goosehints() { - // if ~/.config/goose/.goosehints exists, it should be included in the instructions - // copy the existing global hints file to a .bak file - let global_hints_path = - PathBuf::from(shellexpand::tilde("~/.config/goose/.goosehints").to_string()); - let global_hints_bak_path = - PathBuf::from(shellexpand::tilde("~/.config/goose/.goosehints.bak").to_string()); - let mut globalhints_existed = false; - - if global_hints_path.is_file() { - globalhints_existed = true; - fs::copy(&global_hints_path, &global_hints_bak_path).unwrap(); - } - - fs::write(&global_hints_path, "These are my global goose hints.").unwrap(); - - let dir = TempDir::new().unwrap(); - std::env::set_current_dir(dir.path()).unwrap(); - - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - assert!(instructions.contains("### Global Hints")); - assert!(instructions.contains("my global goose hints.")); - - // restore backup if globalhints previously existed - if globalhints_existed { - fs::copy(&global_hints_bak_path, &global_hints_path).unwrap(); - fs::remove_file(&global_hints_bak_path).unwrap(); - } - } - - #[test] - #[serial] - fn test_goosehints_when_present() { - let dir = TempDir::new().unwrap(); - std::env::set_current_dir(dir.path()).unwrap(); - - fs::write(".goosehints", "Test hint content").unwrap(); - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - assert!(instructions.contains("Test hint content")); - } - - #[test] - #[serial] - fn test_goosehints_when_missing() { - let dir = TempDir::new().unwrap(); - std::env::set_current_dir(dir.path()).unwrap(); - - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - assert!(!instructions.contains("Project Hints")); - } - static DEV_ROUTER: OnceCell = OnceCell::const_new(); async fn get_router() -> &'static DeveloperRouter { @@ -2086,39 +1781,6 @@ mod tests { temp_dir.close().unwrap(); } - #[test] - #[serial] - fn test_goosehints_multiple_filenames() { - let dir = TempDir::new().unwrap(); - std::env::set_current_dir(dir.path()).unwrap(); - std::env::set_var("CONTEXT_FILE_NAMES", r#"["CLAUDE.md", ".goosehints"]"#); - - fs::write("CLAUDE.md", "Custom hints file content from CLAUDE.md").unwrap(); - fs::write(".goosehints", "Custom hints file content from .goosehints").unwrap(); - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - assert!(instructions.contains("Custom hints file content from CLAUDE.md")); - assert!(instructions.contains("Custom hints file content from .goosehints")); - std::env::remove_var("CONTEXT_FILE_NAMES"); - } - - #[test] - #[serial] - fn test_goosehints_configurable_filename() { - let dir = TempDir::new().unwrap(); - std::env::set_current_dir(dir.path()).unwrap(); - std::env::set_var("CONTEXT_FILE_NAMES", r#"["CLAUDE.md"]"#); - - fs::write("CLAUDE.md", "Custom hints file content").unwrap(); - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - assert!(instructions.contains("Custom hints file content")); - assert!(!instructions.contains(".goosehints")); // Make sure it's not loading the default - std::env::remove_var("CONTEXT_FILE_NAMES"); - } - #[tokio::test] #[serial] #[cfg(windows)] @@ -3932,315 +3594,6 @@ mod tests { assert_eq!(result.1, ""); } - // Tests for @-mention file reference functionality - #[test] - fn test_parse_file_references() { - let content = r#" - Basic file references: @README.md @./docs/guide.md @../shared/config.json @/absolute/path/file.txt - Inline references: @file1.txt and @file2.py - Files with extensions: @component.tsx @file.test.js @config.local.json - Files without extensions: @Makefile @LICENSE @Dockerfile @CHANGELOG - Complex paths: @src/utils/helper.js @docs/api/endpoints.md - - Should not match: - - Email addresses: user@example.com admin@company.org - - Social handles: @username @user123 - - URLs: https://example.com/@user - "#; - - let references = parse_file_references(content); - - // Should match basic file references - assert!(references.contains(&PathBuf::from("README.md"))); - assert!(references.contains(&PathBuf::from("./docs/guide.md"))); - assert!(references.contains(&PathBuf::from("../shared/config.json"))); - assert!(references.contains(&PathBuf::from("/absolute/path/file.txt"))); - assert!(references.contains(&PathBuf::from("file1.txt"))); - assert!(references.contains(&PathBuf::from("file2.py"))); - - // Should match files with extensions (including multiple dots) - assert!(references.contains(&PathBuf::from("component.tsx"))); - assert!(references.contains(&PathBuf::from("file.test.js"))); - assert!(references.contains(&PathBuf::from("config.local.json"))); - - // Should match files without extensions - assert!(references.contains(&PathBuf::from("Makefile"))); - assert!(references.contains(&PathBuf::from("LICENSE"))); - assert!(references.contains(&PathBuf::from("Dockerfile"))); - assert!(references.contains(&PathBuf::from("CHANGELOG"))); - - // Should match complex paths - assert!(references.contains(&PathBuf::from("src/utils/helper.js"))); - assert!(references.contains(&PathBuf::from("docs/api/endpoints.md"))); - - // Should not match email addresses or social handles - assert!(!references - .iter() - .any(|p| p.to_str().unwrap().contains("example.com"))); - assert!(!references - .iter() - .any(|p| p.to_str().unwrap().contains("company.org"))); - assert!(!references.iter().any(|p| p.to_str().unwrap() == "username")); - assert!(!references.iter().any(|p| p.to_str().unwrap() == "user123")); - } - - #[test] - #[serial] - fn test_file_expansion_normal_cases() { - let temp_dir = tempfile::tempdir().unwrap(); - let base_path = temp_dir.path(); - - // Test 1: Basic file reference - let basic_file = base_path.join("basic.md"); - std::fs::write(&basic_file, "This is basic content").unwrap(); - - let builder = GitignoreBuilder::new(base_path); - let ignore_patterns = builder.build().unwrap(); - - let mut visited = HashSet::new(); - let basic_content = "Main content\n@basic.md\nMore content"; - let expanded = - read_referenced_files(basic_content, base_path, &mut visited, 0, &ignore_patterns); - - assert!(expanded.contains("Main content")); - assert!(expanded.contains("--- Content from")); - assert!(expanded.contains("This is basic content")); - assert!(expanded.contains("--- End of")); - assert!(expanded.contains("More content")); - - // Test 2: Nested file references - let ref_file1 = base_path.join("level1.md"); - std::fs::write(&ref_file1, "Level 1 content\n@level2.md").unwrap(); - - let ref_file2 = base_path.join("level2.md"); - std::fs::write(&ref_file2, "Level 2 content").unwrap(); - - visited.clear(); - let nested_content = "Main content\n@level1.md"; - let expanded = - read_referenced_files(nested_content, base_path, &mut visited, 0, &ignore_patterns); - - assert!(expanded.contains("Main content")); - assert!(expanded.contains("Level 1 content")); - assert!(expanded.contains("Level 2 content")); - - temp_dir.close().unwrap(); - } - - #[test] - #[serial] - fn test_file_expansion_edge_cases() { - let temp_dir = tempfile::tempdir().unwrap(); - let base_path = temp_dir.path(); - let builder = GitignoreBuilder::new(base_path); - let ignore_patterns = builder.build().unwrap(); - - // Test 1: Circular references - let ref_file1 = base_path.join("file1.md"); - std::fs::write(&ref_file1, "File 1\n@file2.md").unwrap(); - let ref_file2 = base_path.join("file2.md"); - std::fs::write(&ref_file2, "File 2\n@file1.md").unwrap(); - - let mut visited = HashSet::new(); - let circular_content = "Main\n@file1.md"; - let expanded = read_referenced_files( - circular_content, - base_path, - &mut visited, - 0, - &ignore_patterns, - ); - - assert!(expanded.contains("File 1")); - assert!(expanded.contains("File 2")); - // Should only appear once due to circular reference protection - let file1_count = expanded.matches("File 1").count(); - assert_eq!(file1_count, 1); - - // Test 2: Max depth limit - for i in 1..=5 { - let content = if i < 5 { - format!("Level {} content\n@level{}.md", i, i + 1) - } else { - format!("Level {} content", i) - }; - let ref_file = base_path.join(format!("level{}.md", i)); - std::fs::write(&ref_file, content).unwrap(); - } - - visited.clear(); - let depth_content = "Main\n@level1.md"; - let expanded = - read_referenced_files(depth_content, base_path, &mut visited, 0, &ignore_patterns); - - // Should contain up to level 3 (MAX_DEPTH = 3) - assert!(expanded.contains("Level 1 content")); - assert!(expanded.contains("Level 2 content")); - assert!(expanded.contains("Level 3 content")); - // Should not contain level 4 or 5 due to depth limit - assert!(!expanded.contains("Level 4 content")); - assert!(!expanded.contains("Level 5 content")); - - // Test 3: Missing file - visited.clear(); - let missing_content = "Main\n@missing.md\nMore content"; - let expanded = read_referenced_files( - missing_content, - base_path, - &mut visited, - 0, - &ignore_patterns, - ); - - // Should keep the original reference unchanged - assert!(expanded.contains("@missing.md")); - assert!(!expanded.contains("--- Content from")); - - temp_dir.close().unwrap(); - } - - #[test] - #[serial] - fn test_read_referenced_files_respects_ignore() { - let temp_dir = tempfile::tempdir().unwrap(); - let base_path = temp_dir.path(); - - // Create referenced files - let allowed_file = base_path.join("allowed.md"); - std::fs::write(&allowed_file, "Allowed content").unwrap(); - - let ignored_file = base_path.join("secret.md"); - std::fs::write(&ignored_file, "Secret content").unwrap(); - - // Create main content with references - let content = "Main\n@allowed.md\n@secret.md"; - - // Create ignore patterns - let mut builder = GitignoreBuilder::new(base_path); - builder.add_line(None, "secret.md").unwrap(); - let ignore_patterns = builder.build().unwrap(); - - let mut visited = HashSet::new(); - let expanded = read_referenced_files(content, base_path, &mut visited, 0, &ignore_patterns); - - // Should contain allowed content but not ignored content - assert!(expanded.contains("Allowed content")); - assert!(!expanded.contains("Secret content")); - - // The @secret.md reference should remain unchanged - assert!(expanded.contains("@secret.md")); - - temp_dir.close().unwrap(); - } - - #[test] - #[serial] - fn test_goosehints_with_file_references() { - let temp_dir = tempfile::tempdir().unwrap(); - std::env::set_current_dir(&temp_dir).unwrap(); - - // Create referenced files - let readme_path = temp_dir.path().join("README.md"); - std::fs::write( - &readme_path, - "# Project README\n\nThis is the project documentation.", - ) - .unwrap(); - - let guide_path = temp_dir.path().join("guide.md"); - std::fs::write(&guide_path, "# Development Guide\n\nFollow these steps...").unwrap(); - - // Create .goosehints with references - let hints_content = r#"# Project Information - -Please refer to: -@README.md -@guide.md - -Additional instructions here. -"#; - let hints_path = temp_dir.path().join(".goosehints"); - std::fs::write(&hints_path, hints_content).unwrap(); - - // Create router and check instructions - let router = DeveloperRouter::new(); - let instructions = router.instructions(); - - // Should contain the .goosehints content - assert!(instructions.contains("Project Information")); - assert!(instructions.contains("Additional instructions here")); - - // Should contain the referenced files' content - assert!(instructions.contains("# Project README")); - assert!(instructions.contains("This is the project documentation")); - assert!(instructions.contains("# Development Guide")); - assert!(instructions.contains("Follow these steps")); - - // Should have attribution markers - assert!(instructions.contains("--- Content from")); - assert!(instructions.contains("--- End of")); - - temp_dir.close().unwrap(); - } - - #[test] - #[serial] - fn test_parse_file_references_redos_protection() { - // Test very large input to ensure ReDoS protection - let large_content = "@".repeat(2_000_000); // 2MB of @ symbols - let references = parse_file_references(&large_content); - // Should return empty due to size limit, not hang - assert!(references.is_empty()); - - // Test normal size content still works - let normal_content = "Check out @README.md for details"; - let references = parse_file_references(&normal_content); - assert_eq!(references.len(), 1); - assert_eq!(references[0], PathBuf::from("README.md")); - } - - #[test] - #[serial] - fn test_security_integration_with_file_expansion() { - let temp_dir = tempfile::tempdir().unwrap(); - let base_path = temp_dir.path(); - - // Create a config file attempting path traversal - let malicious_content = r#" - Normal content here. - @../../../etc/passwd - @/absolute/path/file.txt - @legitimate_file.md - "#; - - // Create a legitimate file - let legit_file = base_path.join("legitimate_file.md"); - std::fs::write(&legit_file, "This is safe content").unwrap(); - - // Create ignore patterns - let builder = GitignoreBuilder::new(base_path); - let ignore_patterns = builder.build().unwrap(); - - let mut visited = HashSet::new(); - let expanded = read_referenced_files( - malicious_content, - base_path, - &mut visited, - 0, - &ignore_patterns, - ); - - // Should contain the legitimate file but not the malicious attempts - assert!(expanded.contains("This is safe content")); - assert!(!expanded.contains("root:")); // Common content in /etc/passwd - - // The malicious references should still be present (not expanded) - assert!(expanded.contains("@../../../etc/passwd")); - assert!(expanded.contains("@/absolute/path/file.txt")); - - temp_dir.close().unwrap(); - } - #[tokio::test] #[serial] async fn test_shell_output_without_trailing_newline() { diff --git a/ui/desktop/src/components/settings/chat/ChatSettingsSection.tsx b/ui/desktop/src/components/settings/chat/ChatSettingsSection.tsx index 9e7c0fc402d8..d7661bebda80 100644 --- a/ui/desktop/src/components/settings/chat/ChatSettingsSection.tsx +++ b/ui/desktop/src/components/settings/chat/ChatSettingsSection.tsx @@ -49,7 +49,6 @@ export default function ChatSettingsSection() { - Tool Selection Strategy (preview)