diff --git a/Cargo.lock b/Cargo.lock index 7268ed204f01..f94461414626 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1193,9 +1193,9 @@ checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" [[package]] name = "blake3" -version = "1.6.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753" +checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" dependencies = [ "arrayref", "arrayvec", @@ -1346,13 +1346,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.16" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" dependencies = [ "jobserver", "libc", - "shlex", + "once_cell", ] [[package]] @@ -2024,6 +2024,16 @@ dependencies = [ "syn 2.0.99", ] +[[package]] +name = "devgen-tree-sitter-swift" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55c23625b6874f93a85934eb8fe4804a87d0a7d38ff1b74fc3d7ab4a06bd92ae" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "digest" version = "0.10.7" @@ -2365,6 +2375,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -2793,6 +2809,9 @@ dependencies = [ "async-trait", "base64 0.21.7", "chrono", + "clap", + "colored 2.2.0", + "devgen-tree-sitter-swift", "docx-rs", "etcetera", "glob", @@ -2805,11 +2824,13 @@ dependencies = [ "keyring", "lazy_static", "lopdf", + "lru", "mcp-core", "mcp-server", "mpatch", "oauth2", "once_cell", + "rayon", "regex", "reqwest 0.11.27", "rmcp", @@ -2819,6 +2840,7 @@ dependencies = [ "serde_with", "serial_test", "shellexpand", + "streaming-iterator", "sysinfo 0.32.1", "temp-env", "tempfile", @@ -2828,6 +2850,13 @@ dependencies = [ "tracing", "tracing-appender", "tracing-subscriber", + "tree-sitter", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-kotlin", + "tree-sitter-python", + "tree-sitter-rust", "umya-spreadsheet", "url", "utoipa", @@ -2967,6 +2996,11 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "hashlink" @@ -3980,6 +4014,15 @@ dependencies = [ "weezl", ] +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.2", +] + [[package]] name = "malloc_buf" version = "0.0.6" @@ -5536,9 +5579,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.14" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24" dependencies = [ "cc", "cfg-if", @@ -6224,6 +6267,12 @@ version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -6995,6 +7044,76 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "tree-sitter" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "705bf7c0958d0171dd7d3a6542f2f4f21d87ed5f1dc8db52919d3a6bed9a359a" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-go" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8710a71bc6779e33811a8067bdda3ed08bed1733296ff915e44faf60f8c533d7" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-kotlin" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54ff60aeb036f5762515ceb31404512ea4f9599764bcd3857074bb82867bdd34" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-python" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4066c6cf678f962f8c2c4561f205945c84834cce73d981e71392624fdc390a9" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/crates/goose-mcp/Cargo.toml b/crates/goose-mcp/Cargo.toml index 7e9956aacef5..9480e73c82b2 100644 --- a/crates/goose-mcp/Cargo.toml +++ b/crates/goose-mcp/Cargo.toml @@ -60,6 +60,17 @@ hyper = "1" serde_with = "3" which = "6.0" glob = "0.3" +lru = "0.12" +tree-sitter = "0.21" +tree-sitter-python = "0.21" +tree-sitter-rust = "0.21" +tree-sitter-javascript = "0.21" +tree-sitter-go = "0.21" +tree-sitter-java = "0.21" +tree-sitter-kotlin = "0.3.8" +devgen-tree-sitter-swift = "0.21.0" +streaming-iterator = "0.1" +rayon = "1.10" # TODO: Fork mpatch or replace with a custom implementation using `similar` crate # for fuzzy patch matching. Current crate has limited maintenance (single maintainer, # ~1000 downloads). Pinned to exact version to prevent supply chain attacks. @@ -70,6 +81,8 @@ mpatch = "=0.2.0" serial_test = "3.0.0" sysinfo = "0.32.1" temp-env = "0.3.6" +clap = { version = "4", features = ["derive"] } +colored = "2" [features] utoipa = ["dep:utoipa"] diff --git a/crates/goose-mcp/src/developer/analyze/cache.rs b/crates/goose-mcp/src/developer/analyze/cache.rs new file mode 100644 index 000000000000..a00804db038d --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/cache.rs @@ -0,0 +1,86 @@ +use lru::LruCache; +use std::num::NonZeroUsize; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; +use std::time::SystemTime; + +use super::lock_or_recover; +use crate::developer::analyze::types::AnalysisResult; + +#[derive(Clone)] +pub struct AnalysisCache { + cache: Arc>>>, + #[allow(dead_code)] + max_size: usize, +} + +#[derive(Hash, Eq, PartialEq, Debug, Clone)] +struct CacheKey { + path: PathBuf, + modified: SystemTime, +} + +impl AnalysisCache { + pub fn new(max_size: usize) -> Self { + tracing::info!("Initializing analysis cache with size {}", max_size); + + let size = NonZeroUsize::new(max_size).unwrap_or_else(|| { + tracing::warn!("Invalid cache size {}, using default 100", max_size); + NonZeroUsize::new(100).unwrap() + }); + + Self { + cache: Arc::new(Mutex::new(LruCache::new(size))), + max_size, + } + } + + pub fn get(&self, path: &PathBuf, modified: SystemTime) -> Option { + let mut cache = lock_or_recover(&self.cache, |c| c.clear()); + let key = CacheKey { + path: path.clone(), + modified, + }; + + if let Some(result) = cache.get(&key) { + tracing::trace!("Cache hit for {:?}", path); + Some((**result).clone()) + } else { + tracing::trace!("Cache miss for {:?}", path); + None + } + } + + pub fn put(&self, path: PathBuf, modified: SystemTime, result: AnalysisResult) { + let mut cache = lock_or_recover(&self.cache, |c| c.clear()); + let key = CacheKey { + path: path.clone(), + modified, + }; + + tracing::trace!("Caching result for {:?}", path); + cache.put(key, Arc::new(result)); + } + + pub fn clear(&self) { + let mut cache = lock_or_recover(&self.cache, |c| c.clear()); + cache.clear(); + tracing::debug!("Cache cleared"); + } + + pub fn len(&self) -> usize { + let cache = lock_or_recover(&self.cache, |c| c.clear()); + cache.len() + } + + pub fn is_empty(&self) -> bool { + let cache = lock_or_recover(&self.cache, |c| c.clear()); + cache.is_empty() + } +} + +impl Default for AnalysisCache { + fn default() -> Self { + Self::new(100) + } +} diff --git a/crates/goose-mcp/src/developer/analyze/formatter.rs b/crates/goose-mcp/src/developer/analyze/formatter.rs new file mode 100644 index 000000000000..504d44c1bfeb --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/formatter.rs @@ -0,0 +1,654 @@ +use rmcp::model::{Content, Role}; +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; + +use crate::developer::analyze::types::{ + AnalysisMode, AnalysisResult, CallChain, EntryType, FocusedAnalysisData, +}; +use crate::developer::lang; + +pub struct Formatter; + +impl Formatter { + pub fn format_results(output: String) -> Vec { + vec![ + Content::text(output.clone()).with_audience(vec![Role::Assistant]), + Content::text(output) + .with_audience(vec![Role::User]) + .with_priority(0.0), + ] + } + + /// Format analysis result based on mode + pub fn format_analysis_result( + path: &Path, + result: &AnalysisResult, + mode: &AnalysisMode, + ) -> String { + tracing::debug!("Formatting result for {:?} in {:?} mode", path, mode); + + match mode { + AnalysisMode::Structure => Self::format_structure_overview(path, result), + AnalysisMode::Semantic => Self::format_semantic_result(path, result), + AnalysisMode::Focused => { + // Focused mode is handled separately + tracing::warn!("format_analysis_result called with Focused mode"); + String::new() + } + } + } + + /// Format structure overview (compact format) + pub fn format_structure_overview(path: &Path, result: &AnalysisResult) -> String { + let mut output = String::new(); + + // Format as: path [LOC, FUNCTIONS, CLASSES] + output.push_str(&format!("{} [{}L", path.display(), result.line_count)); + + if result.function_count > 0 { + output.push_str(&format!(", {}F", result.function_count)); + } + + if result.class_count > 0 { + output.push_str(&format!(", {}C", result.class_count)); + } + + output.push(']'); + + // Add FLAGS if any + if let Some(main_line) = result.main_line { + output.push_str(&format!(" main:{}", main_line)); + } + + output.push('\n'); + output + } + + /// Format semantic analysis result (dense matrix format) + pub fn format_semantic_result(path: &Path, result: &AnalysisResult) -> String { + let mut output = format!( + "FILE: {} [{}L, {}F, {}C]\n\n", + path.display(), + result.line_count, + result.function_count, + result.class_count + ); + + // Classes on single/multiple lines with colon-separated line numbers + if !result.classes.is_empty() { + output.push_str("C: "); + let class_strs: Vec = result + .classes + .iter() + .map(|c| format!("{}:{}", c.name, c.line)) + .collect(); + output.push_str(&class_strs.join(" ")); + output.push_str("\n\n"); + } + + // Functions with call counts where significant + if !result.functions.is_empty() { + output.push_str("F: "); + + // Count how many times each function is called + let mut call_counts: HashMap = HashMap::new(); + for call in &result.calls { + *call_counts.entry(call.callee_name.clone()).or_insert(0) += 1; + } + + let func_strs: Vec = result + .functions + .iter() + .map(|f| { + let count = call_counts.get(&f.name).unwrap_or(&0); + if *count > 3 { + format!("{}:{}•{}", f.name, f.line, count) + } else { + format!("{}:{}", f.name, f.line) + } + }) + .collect(); + + // Format functions, wrapping at reasonable line length + let mut line_len = 3; // "F: " + for (i, func_str) in func_strs.iter().enumerate() { + if i > 0 && line_len + func_str.len() + 1 > 100 { + output.push_str("\n "); + line_len = 3; + } + if i > 0 { + output.push(' '); + line_len += 1; + } + output.push_str(func_str); + line_len += func_str.len(); + } + output.push_str("\n\n"); + } + + // Condensed imports + if !result.imports.is_empty() { + output.push_str("I: "); + + // Group imports by module/package + let mut grouped_imports: HashMap> = HashMap::new(); + for import in &result.imports { + // Simple heuristic: first word/module is the group + let group = if import.starts_with("use ") { + import.split("::").next().unwrap_or("use").to_string() + } else if import.starts_with("import ") { + import + .split_whitespace() + .nth(1) + .unwrap_or("import") + .to_string() + } else if import.starts_with("from ") { + import + .split_whitespace() + .nth(1) + .unwrap_or("from") + .to_string() + } else { + import.split_whitespace().next().unwrap_or("").to_string() + }; + grouped_imports + .entry(group) + .or_default() + .push(import.clone()); + } + + // Show condensed import summary + let import_summary: Vec = grouped_imports + .iter() + .map(|(group, imports)| { + if imports.len() > 1 { + format!("{}({})", group, imports.len()) + } else { + // For single imports, show more detail + let imp = &imports[0]; + if imp.len() > 40 { + format!("{}...", &imp[..37]) + } else { + imp.clone() + } + } + }) + .collect(); + + output.push_str(&import_summary.join("; ")); + output.push('\n'); + } + + output + } + + /// Format directory structure with summary + pub fn format_directory_structure( + base_path: &Path, + results: &[(PathBuf, EntryType)], + max_depth: u32, + ) -> String { + let mut output = String::new(); + + // Add summary section + Self::append_summary(&mut output, results, max_depth); + + output.push_str("\nPATH [LOC, FUNCTIONS, CLASSES] \n"); + + // Add tree structure + Self::append_tree_structure(&mut output, base_path, results); + + output + } + + /// Append summary section with statistics + fn append_summary(output: &mut String, results: &[(PathBuf, EntryType)], max_depth: u32) { + // Calculate totals (only from files) + let files: Vec<&AnalysisResult> = results + .iter() + .filter_map(|(_, entry)| match entry { + EntryType::File(result) => Some(result), + _ => None, + }) + .collect(); + + let total_files = files.len(); + let total_lines: usize = files.iter().map(|r| r.line_count).sum(); + let total_functions: usize = files.iter().map(|r| r.function_count).sum(); + let total_classes: usize = files.iter().map(|r| r.class_count).sum(); + + // Format summary with depth indicator + output.push_str("SUMMARY:\n"); + if max_depth == 0 { + output.push_str(&format!( + "Shown: {} files, {}L, {}F, {}C (unlimited depth)\n", + total_files, total_lines, total_functions, total_classes + )); + } else { + output.push_str(&format!( + "Shown: {} files, {}L, {}F, {}C (max_depth={})\n", + total_files, total_lines, total_functions, total_classes, max_depth + )); + } + + // Add language distribution + Self::append_language_stats(output, results, total_lines); + } + + /// Append language statistics + fn append_language_stats( + output: &mut String, + results: &[(PathBuf, EntryType)], + total_lines: usize, + ) { + // Calculate language distribution + let mut language_lines: HashMap = HashMap::new(); + for (path, entry) in results { + if let EntryType::File(result) = entry { + let lang = lang::get_language_identifier(path); + if !lang.is_empty() && result.line_count > 0 { + *language_lines.entry(lang.to_string()).or_insert(0) += result.line_count; + } + } + } + + // Format language percentages + if !language_lines.is_empty() && total_lines > 0 { + let mut languages: Vec<_> = language_lines.iter().collect(); + languages.sort_by(|a, b| b.1.cmp(a.1)); // Sort by lines descending + + let lang_str: Vec = languages + .iter() + .map(|(lang, lines)| { + let percentage = (**lines as f64 / total_lines as f64 * 100.0) as u32; + format!("{} ({}%)", lang, percentage) + }) + .collect(); + + output.push_str(&format!("Languages: {}\n", lang_str.join(", "))); + } + } + + /// Append tree structure for directory contents + fn append_tree_structure( + output: &mut String, + base_path: &Path, + results: &[(PathBuf, EntryType)], + ) { + // Sort results by path for consistent output + let mut sorted_results = results.to_vec(); + sorted_results.sort_by(|a, b| a.0.cmp(&b.0)); + + // Track which directories we've already printed to avoid duplicates + let mut printed_dirs = HashSet::new(); + + // Format each entry with tree-style indentation + for (path, entry) in sorted_results { + Self::format_tree_entry(output, base_path, &path, &entry, &mut printed_dirs); + } + } + + /// Format a single tree entry + fn format_tree_entry( + output: &mut String, + base_path: &Path, + path: &Path, + entry: &EntryType, + printed_dirs: &mut HashSet, + ) { + // Make path relative to base_path + let relative_path = path.strip_prefix(base_path).unwrap_or(path); + + // Get path components for determining structure + let components: Vec<_> = relative_path.components().collect(); + if components.is_empty() { + return; + } + + // Print parent directories if not already printed + for i in 0..components.len().saturating_sub(1) { + let parent_path: PathBuf = components[..=i].iter().collect(); + if !printed_dirs.contains(&parent_path) { + let indent = " ".repeat(i); + let dir_name = components[i].as_os_str().to_string_lossy(); + output.push_str(&format!("{}{}/\n", indent, dir_name)); + printed_dirs.insert(parent_path); + } + } + + // Determine indentation level for this entry + let indent_level = components.len().saturating_sub(1); + let indent = " ".repeat(indent_level); + + // Get the file/directory name (last component) + let name = components + .last() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .unwrap_or_else(|| relative_path.display().to_string()); + + // Format based on entry type + Self::format_entry_line( + output, + &indent, + &name, + entry, + base_path, + relative_path, + printed_dirs, + ); + } + + /// Format the line for a specific entry type + fn format_entry_line( + output: &mut String, + indent: &str, + name: &str, + entry: &EntryType, + base_path: &Path, + relative_path: &Path, + printed_dirs: &mut HashSet, + ) { + match entry { + EntryType::File(result) => { + output.push_str(&format!("{}{} [{}L", indent, name, result.line_count)); + if result.function_count > 0 { + output.push_str(&format!(", {}F", result.function_count)); + } + if result.class_count > 0 { + output.push_str(&format!(", {}C", result.class_count)); + } + output.push(']'); + if let Some(main_line) = result.main_line { + output.push_str(&format!(" main:{}", main_line)); + } + output.push('\n'); + } + EntryType::Directory => { + // Only print if not already printed as a parent + if !printed_dirs.contains(relative_path) { + output.push_str(&format!("{}{}/\n", indent, name)); + printed_dirs.insert(relative_path.to_path_buf()); + } + } + EntryType::SymlinkDir(target) | EntryType::SymlinkFile(target) => { + let is_dir = matches!(entry, EntryType::SymlinkDir(_)); + let target_display = if target.is_relative() { + target.display().to_string() + } else if let Ok(rel) = target.strip_prefix(base_path) { + rel.display().to_string() + } else { + target.display().to_string() + }; + let suffix = if is_dir { "/" } else { "" }; + output.push_str(&format!( + "{}{}{} -> {}\n", + indent, name, suffix, target_display + )); + } + } + } + + /// Format focused analysis output with call chains + pub fn format_focused_output(focus_data: &FocusedAnalysisData) -> String { + let mut output = format!("FOCUSED ANALYSIS: {}\n\n", focus_data.focus_symbol); + + // Build file alias mapping + let (file_map, sorted_files) = Self::build_file_aliases( + focus_data.definitions, + focus_data.incoming_chains, + focus_data.outgoing_chains, + ); + + // Section 1: Definitions + Self::append_definitions( + &mut output, + focus_data.definitions, + &file_map, + focus_data.focus_symbol, + ); + + // Section 2: Incoming Call Chains + Self::append_call_chains( + &mut output, + focus_data.incoming_chains, + &file_map, + focus_data.follow_depth, + true, + ); + + // Section 3: Outgoing Call Chains + Self::append_call_chains( + &mut output, + focus_data.outgoing_chains, + &file_map, + focus_data.follow_depth, + false, + ); + + // Section 4: Summary Statistics + Self::append_statistics( + &mut output, + focus_data.files_analyzed, + focus_data.definitions, + focus_data.incoming_chains, + focus_data.outgoing_chains, + focus_data.follow_depth, + ); + + // Section 5: File Legend + Self::append_file_legend( + &mut output, + &file_map, + &sorted_files, + focus_data.definitions, + focus_data.incoming_chains, + focus_data.outgoing_chains, + ); + + if focus_data.definitions.is_empty() + && focus_data.incoming_chains.is_empty() + && focus_data.outgoing_chains.is_empty() + { + output = format!( + "Symbol '{}' not found in any analyzed files.\n", + focus_data.focus_symbol + ); + } + + output + } + + /// Build file alias mapping for focused output + fn build_file_aliases( + definitions: &[(PathBuf, usize)], + incoming_chains: &[CallChain], + outgoing_chains: &[CallChain], + ) -> (HashMap, Vec) { + let mut all_files = HashSet::new(); + + for (file, _) in definitions { + all_files.insert(file.clone()); + } + + for chain in incoming_chains.iter().chain(outgoing_chains.iter()) { + for (file, _, _, _) in &chain.path { + all_files.insert(file.clone()); + } + } + + let mut sorted_files: Vec<_> = all_files.into_iter().collect(); + sorted_files.sort(); + + let mut file_map = HashMap::new(); + for (index, file) in sorted_files.iter().enumerate() { + let alias = if sorted_files.len() == 1 { + file.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown") + .to_string() + } else { + format!("F{}", index + 1) + }; + file_map.insert(file.clone(), alias); + } + + (file_map, sorted_files) + } + + /// Append definitions section to output + fn append_definitions( + output: &mut String, + definitions: &[(PathBuf, usize)], + file_map: &HashMap, + focus_symbol: &str, + ) { + if !definitions.is_empty() { + output.push_str("DEFINITIONS:\n"); + for (file, line) in definitions { + let alias = file_map.get(file).cloned().unwrap_or_else(|| { + file.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown") + .to_string() + }); + output.push_str(&format!("{}:{} - {}\n", alias, line, focus_symbol)); + } + output.push('\n'); + } + } + + /// Append call chains section to output + fn append_call_chains( + output: &mut String, + chains: &[CallChain], + file_map: &HashMap, + follow_depth: u32, + is_incoming: bool, + ) { + if !chains.is_empty() { + let chain_type = if is_incoming { "INCOMING" } else { "OUTGOING" }; + output.push_str(&format!( + "{} CALL CHAINS (depth={}):\n", + chain_type, follow_depth + )); + + let mut unique_chains = HashSet::new(); + for chain in chains { + let chain_str = Self::format_chain_path(&chain.path, file_map); + unique_chains.insert(chain_str); + } + + let mut sorted_chains: Vec<_> = unique_chains.into_iter().collect(); + sorted_chains.sort(); + + for chain in sorted_chains { + output.push_str(&format!("{}\n", chain)); + } + output.push('\n'); + } + } + + /// Format a single chain path + fn format_chain_path( + path: &[(PathBuf, usize, String, String)], + file_map: &HashMap, + ) -> String { + path.iter() + .map(|(file, line, from, to)| { + let alias = file_map.get(file).cloned().unwrap_or_else(|| { + file.file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown") + .to_string() + }); + format!("{}:{} ({} -> {})", alias, line, from, to) + }) + .collect::>() + .join(" -> ") + } + + /// Append statistics section to output + fn append_statistics( + output: &mut String, + files_analyzed: &[PathBuf], + definitions: &[(PathBuf, usize)], + incoming_chains: &[CallChain], + outgoing_chains: &[CallChain], + follow_depth: u32, + ) { + output.push_str("STATISTICS:\n"); + output.push_str(&format!(" Files analyzed: {}\n", files_analyzed.len())); + output.push_str(&format!(" Definitions found: {}\n", definitions.len())); + output.push_str(&format!(" Incoming chains: {}\n", incoming_chains.len())); + output.push_str(&format!(" Outgoing chains: {}\n", outgoing_chains.len())); + output.push_str(&format!(" Follow depth: {}\n", follow_depth)); + } + + /// Append file legend section to output + fn append_file_legend( + output: &mut String, + file_map: &HashMap, + sorted_files: &[PathBuf], + definitions: &[(PathBuf, usize)], + incoming_chains: &[CallChain], + outgoing_chains: &[CallChain], + ) { + if !file_map.is_empty() + && (sorted_files.len() > 1 + || !incoming_chains.is_empty() + || !outgoing_chains.is_empty() + || !definitions.is_empty()) + { + output.push_str("\nFILES:\n"); + let mut legend_entries: Vec<_> = file_map.iter().collect(); + legend_entries.sort_by_key(|(_, alias)| alias.as_str()); + + for (file_path, alias) in legend_entries { + if sorted_files.len() == 1 + && alias == file_path.file_name().and_then(|n| n.to_str()).unwrap_or("") + { + continue; + } + output.push_str(&format!(" {}: {}\n", alias, file_path.display())); + } + } + } + + /// Filter output by focus symbol + pub fn filter_by_focus(output: &str, focus: &str) -> String { + let mut filtered = String::new(); + let mut include_section = false; + + for line in output.lines() { + if line.starts_with("##") { + include_section = false; + } + + if line.contains(focus) { + include_section = true; + // Include the file header + if let Some(header_line) = output + .lines() + .rev() + .find(|l| l.starts_with("##") && line.contains(&l[3..])) + { + if !filtered.contains(header_line) { + filtered.push_str(header_line); + filtered.push('\n'); + } + } + } + + if include_section || line.starts_with('#') { + filtered.push_str(line); + filtered.push('\n'); + } + } + + if filtered.is_empty() { + format!("No results found for symbol: {}", focus) + } else { + filtered + } + } +} diff --git a/crates/goose-mcp/src/developer/analyze/graph.rs b/crates/goose-mcp/src/developer/analyze/graph.rs new file mode 100644 index 000000000000..e2163d022b9a --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/graph.rs @@ -0,0 +1,203 @@ +use std::collections::{HashMap, HashSet, VecDeque}; +use std::path::PathBuf; + +use crate::developer::analyze::types::{AnalysisResult, CallChain}; + +#[derive(Debug, Clone, Default)] +pub struct CallGraph { + callers: HashMap>, + callees: HashMap>, + pub definitions: HashMap>, +} + +impl CallGraph { + pub fn new() -> Self { + Self::default() + } + + pub fn build_from_results(results: &[(PathBuf, AnalysisResult)]) -> Self { + tracing::debug!("Building call graph from {} files", results.len()); + let mut graph = Self::new(); + + for (file_path, result) in results { + // Record definitions + for func in &result.functions { + graph + .definitions + .entry(func.name.clone()) + .or_default() + .push((file_path.clone(), func.line)); + } + + for class in &result.classes { + graph + .definitions + .entry(class.name.clone()) + .or_default() + .push((file_path.clone(), class.line)); + } + + // Record call relationships + for call in &result.calls { + let caller = call + .caller_name + .clone() + .unwrap_or_else(|| "".to_string()); + + // Add to callers map (who calls this function) + graph + .callers + .entry(call.callee_name.clone()) + .or_default() + .push((file_path.clone(), call.line, caller.clone())); + + // Add to callees map (what this function calls) + if caller != "" { + graph.callees.entry(caller).or_default().push(( + file_path.clone(), + call.line, + call.callee_name.clone(), + )); + } + } + } + + tracing::trace!( + "Graph built: {} definitions, {} caller entries, {} callee entries", + graph.definitions.len(), + graph.callers.len(), + graph.callees.len() + ); + + graph + } + + pub fn find_incoming_chains(&self, symbol: &str, max_depth: u32) -> Vec { + tracing::trace!( + "Finding incoming chains for {} with depth {}", + symbol, + max_depth + ); + + if max_depth == 0 { + return vec![]; + } + + let mut chains = Vec::new(); + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + + // Start with direct callers + if let Some(direct_callers) = self.callers.get(symbol) { + for (file, line, caller) in direct_callers { + let initial_path = vec![(file.clone(), *line, caller.clone(), symbol.to_string())]; + + if max_depth == 1 { + chains.push(CallChain { path: initial_path }); + } else { + queue.push_back((caller.clone(), initial_path, 1)); + } + } + } + + // BFS to find deeper chains + while let Some((current_symbol, path, depth)) = queue.pop_front() { + if depth >= max_depth { + chains.push(CallChain { path }); + continue; + } + + // Avoid cycles + if visited.contains(¤t_symbol) { + chains.push(CallChain { path }); // Still record the path we found + continue; + } + visited.insert(current_symbol.clone()); + + // Find who calls the current symbol + if let Some(callers) = self.callers.get(¤t_symbol) { + for (file, line, caller) in callers { + let mut new_path = + vec![(file.clone(), *line, caller.clone(), current_symbol.clone())]; + new_path.extend(path.clone()); + + if depth + 1 >= max_depth { + chains.push(CallChain { path: new_path }); + } else { + queue.push_back((caller.clone(), new_path, depth + 1)); + } + } + } else { + // No more callers, this is a chain end + chains.push(CallChain { path }); + } + } + + tracing::trace!("Found {} incoming chains", chains.len()); + chains + } + + pub fn find_outgoing_chains(&self, symbol: &str, max_depth: u32) -> Vec { + tracing::trace!( + "Finding outgoing chains for {} with depth {}", + symbol, + max_depth + ); + + if max_depth == 0 { + return vec![]; + } + + let mut chains = Vec::new(); + let mut visited = HashSet::new(); + let mut queue = VecDeque::new(); + + // Start with what this symbol calls + if let Some(direct_callees) = self.callees.get(symbol) { + for (file, line, callee) in direct_callees { + let initial_path = vec![(file.clone(), *line, symbol.to_string(), callee.clone())]; + + if max_depth == 1 { + chains.push(CallChain { path: initial_path }); + } else { + queue.push_back((callee.clone(), initial_path, 1)); + } + } + } + + // BFS to find deeper chains + while let Some((current_symbol, path, depth)) = queue.pop_front() { + if depth >= max_depth { + chains.push(CallChain { path }); + continue; + } + + // Avoid cycles + if visited.contains(¤t_symbol) { + chains.push(CallChain { path }); + continue; + } + visited.insert(current_symbol.clone()); + + // Find what the current symbol calls + if let Some(callees) = self.callees.get(¤t_symbol) { + for (file, line, callee) in callees { + let mut new_path = path.clone(); + new_path.push((file.clone(), *line, current_symbol.clone(), callee.clone())); + + if depth + 1 >= max_depth { + chains.push(CallChain { path: new_path }); + } else { + queue.push_back((callee.clone(), new_path, depth + 1)); + } + } + } else { + // No more callees, this is a chain end + chains.push(CallChain { path }); + } + } + + tracing::trace!("Found {} outgoing chains", chains.len()); + chains + } +} diff --git a/crates/goose-mcp/src/developer/analyze/languages/go.rs b/crates/goose-mcp/src/developer/analyze/languages/go.rs new file mode 100644 index 000000000000..4cef6ff4f508 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/go.rs @@ -0,0 +1,19 @@ +/// Tree-sitter query for extracting Go code elements +pub const ELEMENT_QUERY: &str = r#" + (function_declaration name: (identifier) @func) + (method_declaration name: (field_identifier) @func) + (type_declaration (type_spec name: (type_identifier) @struct)) + (import_declaration) @import +"#; + +/// Tree-sitter query for extracting Go function calls +pub const CALL_QUERY: &str = r#" + ; Function calls + (call_expression + function: (identifier) @function.call) + + ; Method calls + (call_expression + function: (selector_expression + field: (field_identifier) @method.call)) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/java.rs b/crates/goose-mcp/src/developer/analyze/languages/java.rs new file mode 100644 index 000000000000..11e616dc2df1 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/java.rs @@ -0,0 +1,17 @@ +/// Tree-sitter query for extracting Java code elements +pub const ELEMENT_QUERY: &str = r#" + (method_declaration name: (identifier) @func) + (class_declaration name: (identifier) @class) + (import_declaration) @import +"#; + +/// Tree-sitter query for extracting Java function calls +pub const CALL_QUERY: &str = r#" + ; Method invocations + (method_invocation + name: (identifier) @method.call) + + ; Constructor calls + (object_creation_expression + type: (type_identifier) @constructor.call) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/javascript.rs b/crates/goose-mcp/src/developer/analyze/languages/javascript.rs new file mode 100644 index 000000000000..48d923dca2ef --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/javascript.rs @@ -0,0 +1,22 @@ +/// Tree-sitter query for extracting JavaScript/TypeScript code elements +pub const ELEMENT_QUERY: &str = r#" + (function_declaration name: (identifier) @func) + (class_declaration name: (identifier) @class) + (import_statement) @import +"#; + +/// Tree-sitter query for extracting JavaScript/TypeScript function calls +pub const CALL_QUERY: &str = r#" + ; Function calls + (call_expression + function: (identifier) @function.call) + + ; Method calls + (call_expression + function: (member_expression + property: (property_identifier) @method.call)) + + ; Constructor calls + (new_expression + constructor: (identifier) @constructor.call) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/kotlin.rs b/crates/goose-mcp/src/developer/analyze/languages/kotlin.rs new file mode 100644 index 000000000000..5182fe89a94a --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/kotlin.rs @@ -0,0 +1,27 @@ +/// Tree-sitter query for extracting Kotlin code elements +pub const ELEMENT_QUERY: &str = r#" + ; Functions + (function_declaration (simple_identifier) @func) + + ; Classes + (class_declaration (type_identifier) @class) + + ; Objects (singleton classes) + (object_declaration (type_identifier) @class) + + ; Imports + (import_header) @import +"#; + +/// Tree-sitter query for extracting Kotlin function calls +pub const CALL_QUERY: &str = r#" + ; Simple function calls + (call_expression + (simple_identifier) @function.call) + + ; Method calls with navigation (obj.method()) + (call_expression + (navigation_expression + (navigation_suffix + (simple_identifier) @method.call))) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/mod.rs b/crates/goose-mcp/src/developer/analyze/languages/mod.rs new file mode 100644 index 000000000000..c5303ea9cdd8 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/mod.rs @@ -0,0 +1,35 @@ +pub mod go; +pub mod java; +pub mod javascript; +pub mod kotlin; +pub mod python; +pub mod rust; +pub mod swift; + +/// Get the tree-sitter query for extracting code elements for a language +pub fn get_element_query(language: &str) -> &'static str { + match language { + "python" => python::ELEMENT_QUERY, + "rust" => rust::ELEMENT_QUERY, + "javascript" | "typescript" => javascript::ELEMENT_QUERY, + "go" => go::ELEMENT_QUERY, + "java" => java::ELEMENT_QUERY, + "kotlin" => kotlin::ELEMENT_QUERY, + "swift" => swift::ELEMENT_QUERY, + _ => "", + } +} + +/// Get the tree-sitter query for extracting function calls for a language +pub fn get_call_query(language: &str) -> &'static str { + match language { + "python" => python::CALL_QUERY, + "rust" => rust::CALL_QUERY, + "javascript" | "typescript" => javascript::CALL_QUERY, + "go" => go::CALL_QUERY, + "java" => java::CALL_QUERY, + "kotlin" => kotlin::CALL_QUERY, + "swift" => swift::CALL_QUERY, + _ => "", + } +} diff --git a/crates/goose-mcp/src/developer/analyze/languages/python.rs b/crates/goose-mcp/src/developer/analyze/languages/python.rs new file mode 100644 index 000000000000..3dd117d75dec --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/python.rs @@ -0,0 +1,25 @@ +/// Tree-sitter query for extracting Python code elements +pub const ELEMENT_QUERY: &str = r#" + (function_definition name: (identifier) @func) + (class_definition name: (identifier) @class) + (import_statement) @import + (import_from_statement) @import + (aliased_import) @import + (assignment left: (identifier) @class) +"#; + +/// Tree-sitter query for extracting Python function calls +pub const CALL_QUERY: &str = r#" + ; Function calls + (call + function: (identifier) @function.call) + + ; Method calls + (call + function: (attribute + attribute: (identifier) @method.call)) + + ; Decorator applications + (decorator (identifier) @function.call) + (decorator (attribute attribute: (identifier) @method.call)) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/rust.rs b/crates/goose-mcp/src/developer/analyze/languages/rust.rs new file mode 100644 index 000000000000..1a40674486c3 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/rust.rs @@ -0,0 +1,28 @@ +/// Tree-sitter query for extracting Rust code elements +pub const ELEMENT_QUERY: &str = r#" + (function_item name: (identifier) @func) + (impl_item type: (type_identifier) @class) + (struct_item name: (type_identifier) @struct) + (use_declaration) @import +"#; + +/// Tree-sitter query for extracting Rust function calls +pub const CALL_QUERY: &str = r#" + ; Function calls + (call_expression + function: (identifier) @function.call) + + ; Method calls + (call_expression + function: (field_expression + field: (field_identifier) @method.call)) + + ; Associated function calls (e.g., Type::method()) + ; Now captures the full Type::method instead of just method + (call_expression + function: (scoped_identifier) @scoped.call) + + ; Macro calls (often contain function-like behavior) + (macro_invocation + macro: (identifier) @macro.call) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/languages/swift.rs b/crates/goose-mcp/src/developer/analyze/languages/swift.rs new file mode 100644 index 000000000000..bbf24239bcf5 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/languages/swift.rs @@ -0,0 +1,56 @@ +/// Tree-sitter query for extracting Swift code elements +pub const ELEMENT_QUERY: &str = r#" + ; Functions + (function_declaration name: (simple_identifier) @func) + + ; Classes + (class_declaration name: (type_identifier) @class) + + ; Protocols (interfaces) + (protocol_declaration name: (type_identifier) @class) + + ; Imports + (import_declaration) @import +"#; + +/// Tree-sitter query for extracting Swift function calls +pub const CALL_QUERY: &str = r#" + ; Function calls + (call_expression + (simple_identifier) @function.call) + + ; Method calls with navigation + (call_expression + (navigation_expression + target: (_) + suffix: (navigation_suffix + suffix: (simple_identifier) @method.call))) + + ; Constructor calls + (constructor_expression + (user_type + (type_identifier) @constructor.call)) + + ; Async function calls + (await_expression + (call_expression + (simple_identifier) @function.call)) + + ; Async method calls + (await_expression + (call_expression + (navigation_expression + suffix: (navigation_suffix + suffix: (simple_identifier) @method.call)))) + + ; Static method calls (Type.method()) + (call_expression + (navigation_expression + target: (user_type) + suffix: (navigation_suffix + suffix: (simple_identifier) @scoped.call))) + + ; Closure calls + (call_expression + (navigation_expression) @function.call) +"#; diff --git a/crates/goose-mcp/src/developer/analyze/mod.rs b/crates/goose-mcp/src/developer/analyze/mod.rs new file mode 100644 index 000000000000..5c36f0b318da --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/mod.rs @@ -0,0 +1,342 @@ +pub mod cache; +pub mod formatter; +pub mod graph; +pub mod languages; +pub mod parser; +pub mod traversal; +pub mod types; + +#[cfg(test)] +mod tests; + +use ignore::gitignore::Gitignore; +use rmcp::model::{CallToolResult, ErrorCode, ErrorData}; +use std::path::{Path, PathBuf}; + +use crate::developer::lang; + +use self::cache::AnalysisCache; +use self::formatter::Formatter; +use self::graph::CallGraph; +use self::parser::{ElementExtractor, ParserManager}; +use self::traversal::FileTraverser; +use self::types::{AnalysisMode, AnalysisResult, AnalyzeParams, FocusedAnalysisData}; + +/// Helper to safely lock a mutex with poison recovery +/// The recovery function is called on the mutex contents if the lock was poisoned +pub(crate) fn lock_or_recover( + mutex: &std::sync::Mutex, + recovery: F, +) -> std::sync::MutexGuard<'_, T> +where + F: FnOnce(&mut T), +{ + mutex.lock().unwrap_or_else(|poisoned| { + let mut guard = poisoned.into_inner(); + recovery(&mut guard); + tracing::warn!("Recovered from poisoned lock"); + guard + }) +} + +/// Code analyzer with caching and tree-sitter parsing +#[derive(Clone)] +pub struct CodeAnalyzer { + parser_manager: ParserManager, + cache: AnalysisCache, +} + +impl Default for CodeAnalyzer { + fn default() -> Self { + Self::new() + } +} + +impl CodeAnalyzer { + /// Create a new code analyzer + pub fn new() -> Self { + tracing::debug!("Initializing CodeAnalyzer"); + Self { + parser_manager: ParserManager::new(), + cache: AnalysisCache::new(100), + } + } + + /// Main analyze entry point + pub fn analyze( + &self, + params: AnalyzeParams, + path: PathBuf, + ignore_patterns: &Gitignore, + ) -> Result { + tracing::info!("Starting analysis of {:?} with params {:?}", path, params); + + let traverser = FileTraverser::new(ignore_patterns); + + traverser.validate_path(&path)?; + + let mode = self.determine_mode(¶ms, &path); + + tracing::debug!("Using analysis mode: {:?}", mode); + + let mut output = match mode { + AnalysisMode::Focused => self.analyze_focused(&path, ¶ms, &traverser)?, + AnalysisMode::Semantic => { + if path.is_file() { + let result = self.analyze_file(&path, &mode)?; + Formatter::format_analysis_result(&path, &result, &mode) + } else { + // Semantic mode on directory - analyze all files + self.analyze_directory(&path, ¶ms, &traverser, &mode)? + } + } + AnalysisMode::Structure => { + if path.is_file() { + let result = self.analyze_file(&path, &mode)?; + Formatter::format_analysis_result(&path, &result, &mode) + } else { + self.analyze_directory(&path, ¶ms, &traverser, &mode)? + } + } + }; + + // If focus is specified with non-focused mode, filter results + if let Some(focus) = ¶ms.focus { + if mode != AnalysisMode::Focused { + output = Formatter::filter_by_focus(&output, focus); + } + } + + // Check output size and warn if too large (unless force flag is set) + const OUTPUT_LIMIT: usize = 1000; + if !params.force { + let line_count = output.lines().count(); + if line_count > OUTPUT_LIMIT { + let warning = format!( + "LARGE OUTPUT WARNING\n\n\ + The analysis would produce {} lines (~{} tokens).\n\ + This exceeds the {} line limit.\n\n\ + To proceed anyway, add 'force: true' to your parameters:\n\ + analyze path=\"{}\" force=true{}\n\n\ + Or narrow your scope by:\n\ + • Analyzing a subdirectory instead\n\ + • Using focus mode: focus=\"symbol_name\"\n\ + • Reducing depth: max_depth=1", + line_count, + line_count * 10, // rough token estimate + OUTPUT_LIMIT, + path.display(), + if let Some(f) = ¶ms.focus { + format!(" focus=\"{}\"", f) + } else { + String::new() + } + ); + return Ok(CallToolResult::success(vec![rmcp::model::Content::text( + warning, + )])); + } + } + + tracing::info!("Analysis complete"); + Ok(CallToolResult::success(Formatter::format_results(output))) + } + + /// Determine the analysis mode based on parameters and path + fn determine_mode(&self, params: &AnalyzeParams, path: &Path) -> AnalysisMode { + // If focus is specified, use focused mode + if params.focus.is_some() { + return AnalysisMode::Focused; + } + + // Otherwise, use semantic for files, structure for directories + if path.is_file() { + AnalysisMode::Semantic + } else { + AnalysisMode::Structure + } + } + + /// Analyze a single file + fn analyze_file(&self, path: &Path, mode: &AnalysisMode) -> Result { + tracing::debug!("Analyzing file {:?} in {:?} mode", path, mode); + + // Check cache first + let metadata = std::fs::metadata(path).map_err(|e| { + tracing::error!("Failed to get file metadata for {:?}: {}", path, e); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to get metadata for '{}': {}", path.display(), e), + None, + ) + })?; + + let modified = metadata.modified().map_err(|e| { + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!( + "Failed to get modification time for '{}': {}", + path.display(), + e + ), + None, + ) + })?; + + // Check cache + if let Some(cached) = self.cache.get(&path.to_path_buf(), modified) { + tracing::trace!("Using cached result for {:?}", path); + return Ok(cached); + } + + // Read file content - handle binary files gracefully + let content = match std::fs::read_to_string(path) { + Ok(content) => content, + Err(e) => { + // Binary or non-UTF-8 file, skip parsing + tracing::trace!("Skipping binary/non-UTF-8 file {:?}: {}", path, e); + return Ok(AnalysisResult::empty(0)); + } + }; + + // Count lines + let line_count = content.lines().count(); + + // Get language + let language = lang::get_language_identifier(path); + if language.is_empty() { + tracing::trace!("Unsupported file type: {:?}", path); + // Unsupported language, return empty result + return Ok(AnalysisResult::empty(line_count)); + } + + // Check if we support this language for parsing + let supported = matches!( + language, + "python" | "rust" | "javascript" | "typescript" | "go" | "java" | "kotlin" | "swift" + ); + + if !supported { + tracing::trace!("Language {} not supported for parsing", language); + return Ok(AnalysisResult::empty(line_count)); + } + + // Parse the file + let tree = self.parser_manager.parse(&content, language)?; + + // Extract information based on mode + let depth = mode.as_str(); + let mut result = ElementExtractor::extract_with_depth(&tree, &content, language, depth)?; + + // Add line count to the result + result.line_count = line_count; + + // Cache the result + self.cache.put(path.to_path_buf(), modified, result.clone()); + + Ok(result) + } + + /// Analyze a directory + fn analyze_directory( + &self, + path: &Path, + params: &AnalyzeParams, + traverser: &FileTraverser<'_>, + mode: &AnalysisMode, + ) -> Result { + tracing::debug!("Analyzing directory {:?} in {:?} mode", path, mode); + + let mode = *mode; + + // Collect directory results with parallel processing + let results = traverser.collect_directory_results(path, params.max_depth, |file_path| { + self.analyze_file(file_path, &mode) + })?; + + // Format based on mode + Ok(Formatter::format_directory_structure( + path, + &results, + params.max_depth, + )) + } + + /// Focused mode analysis - track a symbol across files + fn analyze_focused( + &self, + path: &Path, + params: &AnalyzeParams, + traverser: &FileTraverser<'_>, + ) -> Result { + // Focused mode requires focus parameter + let focus_symbol = params.focus.as_ref().ok_or_else(|| { + ErrorData::new( + ErrorCode::INVALID_PARAMS, + "Focused mode requires 'focus' parameter to specify the symbol to track" + .to_string(), + None, + ) + })?; + + tracing::info!("Running focused analysis for symbol '{}'", focus_symbol); + + // Step 1: Collect all files to analyze + let files_to_analyze = if path.is_file() { + vec![path.to_path_buf()] + } else { + traverser.collect_files_for_focused(path, params.max_depth)? + }; + + tracing::debug!( + "Analyzing {} files for focused analysis", + files_to_analyze.len() + ); + + // Step 2: Analyze all files and collect results using parallel processing + use rayon::prelude::*; + let all_results: Result, _> = files_to_analyze + .par_iter() + .map(|file_path| { + self.analyze_file(file_path, &AnalysisMode::Semantic) + .map(|result| (file_path.clone(), result)) + }) + .collect(); + let all_results = all_results?; + + // Step 3: Build the call graph + let graph = CallGraph::build_from_results(&all_results); + + // Step 4: Find call chains based on follow_depth + let incoming_chains = if params.follow_depth > 0 { + graph.find_incoming_chains(focus_symbol, params.follow_depth) + } else { + vec![] + }; + + let outgoing_chains = if params.follow_depth > 0 { + graph.find_outgoing_chains(focus_symbol, params.follow_depth) + } else { + vec![] + }; + + // Step 5: Get definitions from graph + let definitions = graph + .definitions + .get(focus_symbol) + .cloned() + .unwrap_or_default(); + + // Step 6: Format the output + let focus_data = FocusedAnalysisData { + focus_symbol, + follow_depth: params.follow_depth, + files_analyzed: &files_to_analyze, + definitions: &definitions, + incoming_chains: &incoming_chains, + outgoing_chains: &outgoing_chains, + }; + + Ok(Formatter::format_focused_output(&focus_data)) + } +} diff --git a/crates/goose-mcp/src/developer/analyze/parser.rs b/crates/goose-mcp/src/developer/analyze/parser.rs new file mode 100644 index 000000000000..e44de3cbc991 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/parser.rs @@ -0,0 +1,422 @@ +use rmcp::model::{ErrorCode, ErrorData}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use tree_sitter::{Language, Parser, Tree}; + +use super::lock_or_recover; +use crate::developer::analyze::types::{ + AnalysisResult, CallInfo, ClassInfo, ElementQueryResult, FunctionInfo, ReferenceInfo, + ReferenceType, +}; + +/// Manages tree-sitter parsers for different languages +#[derive(Clone)] +pub struct ParserManager { + parsers: Arc>>>>, +} + +impl ParserManager { + pub fn new() -> Self { + tracing::debug!("Initializing ParserManager"); + Self { + parsers: Arc::new(Mutex::new(HashMap::new())), + } + } + + /// Get or create a parser for the specified language + pub fn get_or_create_parser(&self, language: &str) -> Result>, ErrorData> { + let mut cache = lock_or_recover(&self.parsers, |c| c.clear()); + + if let Some(parser) = cache.get(language) { + tracing::trace!("Reusing cached parser for {}", language); + return Ok(Arc::clone(parser)); + } + + tracing::debug!("Creating new parser for {}", language); + let mut parser = Parser::new(); + let language_config: Language = match language { + "python" => tree_sitter_python::language(), + "rust" => tree_sitter_rust::language(), + "javascript" | "typescript" => tree_sitter_javascript::language(), + "go" => tree_sitter_go::language(), + "java" => tree_sitter_java::language(), + "kotlin" => tree_sitter_kotlin::language(), + "swift" => devgen_tree_sitter_swift::language(), + _ => { + tracing::warn!("Unsupported language: {}", language); + return Err(ErrorData::new( + ErrorCode::INVALID_PARAMS, + format!("Unsupported language: {}", language), + None, + )); + } + }; + + parser.set_language(&language_config).map_err(|e| { + tracing::error!("Failed to set language for {}: {}", language, e); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to set language: {}", e), + None, + ) + })?; + + let parser_arc = Arc::new(Mutex::new(parser)); + cache.insert(language.to_string(), Arc::clone(&parser_arc)); + Ok(parser_arc) + } + + /// Parse source code and return the syntax tree + pub fn parse(&self, content: &str, language: &str) -> Result { + let parser_arc = self.get_or_create_parser(language)?; + // Parser doesn't have a clear() method, so we just continue with it + let mut parser = lock_or_recover(&parser_arc, |_| {}); + + parser.parse(content, None).ok_or_else(|| { + tracing::error!("Failed to parse content as {}", language); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to parse file as {}", language), + None, + ) + }) + } +} + +impl Default for ParserManager { + fn default() -> Self { + Self::new() + } +} + +/// Extract code elements from a parsed tree +pub struct ElementExtractor; + +impl ElementExtractor { + /// Extract code elements with optional semantic analysis + pub fn extract_with_depth( + tree: &Tree, + source: &str, + language: &str, + depth: &str, + ) -> Result { + tracing::trace!( + "Extracting elements from {} code with depth {}", + language, + depth + ); + + // First get the structural analysis + let mut result = Self::extract_elements(tree, source, language)?; + + // For structure mode, clear the detailed vectors but keep the counts + if depth == "structure" { + result.functions.clear(); + result.classes.clear(); + result.imports.clear(); + } else if depth == "semantic" { + // For semantic mode, also extract calls + let calls = Self::extract_calls(tree, source, language)?; + result.calls = calls; + + // Also populate references from the calls + for call in &result.calls { + result.references.push(ReferenceInfo { + symbol: call.callee_name.clone(), + ref_type: ReferenceType::Call, + line: call.line, + context: call.context.clone(), + }); + } + } + + Ok(result) + } + + /// Extract basic code elements (functions, classes, imports) + pub fn extract_elements( + tree: &Tree, + source: &str, + language: &str, + ) -> Result { + // Get language-specific query + let query_str = Self::get_element_query(language); + if query_str.is_empty() { + return Ok(Self::empty_analysis_result()); + } + + // Parse and process the query + let (functions, classes, imports) = Self::process_element_query(tree, source, query_str)?; + + // Detect main function + let main_line = functions.iter().find(|f| f.name == "main").map(|f| f.line); + + Ok(AnalysisResult { + function_count: functions.len(), + class_count: classes.len(), + import_count: imports.len(), + functions, + classes, + imports, + calls: vec![], + references: vec![], + line_count: 0, + main_line, + }) + } + + /// Get language-specific query for elements + fn get_element_query(language: &str) -> &'static str { + use crate::developer::analyze::languages; + + match language { + "python" => languages::python::ELEMENT_QUERY, + "rust" => languages::rust::ELEMENT_QUERY, + "javascript" | "typescript" => languages::javascript::ELEMENT_QUERY, + "go" => languages::go::ELEMENT_QUERY, + "java" => languages::java::ELEMENT_QUERY, + "kotlin" => languages::kotlin::ELEMENT_QUERY, + "swift" => languages::swift::ELEMENT_QUERY, + _ => "", + } + } + + /// Process element query and extract functions, classes, imports + fn process_element_query( + tree: &Tree, + source: &str, + query_str: &str, + ) -> Result { + use tree_sitter::{Query, QueryCursor}; + + let mut functions = Vec::new(); + let mut classes = Vec::new(); + let mut imports = Vec::new(); + + let query = Query::new(&tree.language(), query_str).map_err(|e| { + tracing::error!("Failed to create query: {}", e); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to create query: {}", e), + None, + ) + })?; + + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + + for match_ in matches.by_ref() { + for capture in match_.captures { + let node = capture.node; + let text = &source[node.byte_range()]; + let line = source[..node.start_byte()].lines().count() + 1; + + match query.capture_names()[capture.index as usize] { + "func" => { + functions.push(FunctionInfo { + name: text.to_string(), + line, + params: vec![], // Simplified for now + }); + } + "class" | "struct" => { + classes.push(ClassInfo { + name: text.to_string(), + line, + methods: vec![], // Simplified for now + }); + } + "import" => { + imports.push(text.to_string()); + } + _ => {} + } + } + } + + tracing::trace!( + "Extracted {} functions, {} classes, {} imports", + functions.len(), + classes.len(), + imports.len() + ); + + Ok((functions, classes, imports)) + } + + /// Get language-specific query for finding function calls + fn get_call_query(language: &str) -> &'static str { + use crate::developer::analyze::languages; + + match language { + "python" => languages::python::CALL_QUERY, + "rust" => languages::rust::CALL_QUERY, + "javascript" | "typescript" => languages::javascript::CALL_QUERY, + "go" => languages::go::CALL_QUERY, + "java" => languages::java::CALL_QUERY, + "kotlin" => languages::kotlin::CALL_QUERY, + "swift" => languages::swift::CALL_QUERY, + _ => "", + } + } + + /// Extract function calls from the parse tree + fn extract_calls( + tree: &Tree, + source: &str, + language: &str, + ) -> Result, ErrorData> { + use tree_sitter::{Query, QueryCursor}; + + let mut calls = Vec::new(); + + // Get language-specific call query + let query_str = Self::get_call_query(language); + if query_str.is_empty() { + return Ok(calls); // No call query for this language + } + + let query = Query::new(&tree.language(), query_str).map_err(|e| { + tracing::error!("Failed to create call query: {}", e); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to create call query: {}", e), + None, + ) + })?; + + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + + for match_ in matches.by_ref() { + for capture in match_.captures { + let node = capture.node; + let text = &source[node.byte_range()]; + let start_pos = node.start_position(); + + // Get the line of code for context + let line_start = source[..node.start_byte()] + .rfind('\n') + .map(|i| i + 1) + .unwrap_or(0); + let line_end = source[node.end_byte()..] + .find('\n') + .map(|i| node.end_byte() + i) + .unwrap_or(source.len()); + let context = source[line_start..line_end].trim().to_string(); + + // Find the containing function + let caller_name = Self::find_containing_function(&node, source, language); + + // Add the call based on capture name + match query.capture_names()[capture.index as usize] { + "function.call" | "method.call" | "scoped.call" | "macro.call" + | "constructor.call" => { + calls.push(CallInfo { + caller_name, + callee_name: text.to_string(), + line: start_pos.row + 1, + column: start_pos.column, + context, + }); + } + _ => {} + } + } + } + + tracing::trace!("Extracted {} calls", calls.len()); + Ok(calls) + } + + /// Find which function contains a given node + fn find_containing_function( + node: &tree_sitter::Node, + source: &str, + language: &str, + ) -> Option { + let mut current = *node; + + // Walk up the tree to find a function definition + while let Some(parent) = current.parent() { + let kind = parent.kind(); + + // Check for function-like nodes based on language + let is_function = match language { + "python" => kind == "function_definition", + "rust" => kind == "function_item" || kind == "impl_item", + "javascript" | "typescript" => { + kind == "function_declaration" + || kind == "method_definition" + || kind == "arrow_function" + } + "go" => kind == "function_declaration" || kind == "method_declaration", + "java" => kind == "method_declaration" || kind == "constructor_declaration", + "kotlin" => kind == "function_declaration" || kind == "class_body", + "swift" => { + kind == "function_declaration" + || kind == "init_declaration" + || kind == "deinit_declaration" + || kind == "subscript_declaration" + } + _ => false, + }; + + if is_function { + // Try to extract the function name + for i in 0..parent.child_count() { + if let Some(child) = parent.child(i) { + // Look for identifier nodes that represent the function name + if child.kind() == "identifier" + || child.kind() == "field_identifier" + || child.kind() == "property_identifier" + || (language == "swift" && child.kind() == "simple_identifier") + { + // For Python, skip the first identifier if it's 'def' + if language == "python" && i == 0 { + continue; + } + // For Swift init/deinit, use special names + if language == "swift" { + if kind == "init_declaration" { + return Some("init".to_string()); + } else if kind == "deinit_declaration" { + return Some("deinit".to_string()); + } + } + return Some(source[child.byte_range()].to_string()); + } + + // For Rust impl blocks, look for the type + if language == "rust" + && kind == "impl_item" + && child.kind() == "type_identifier" + { + return Some(format!("impl {}", &source[child.byte_range()])); + } + } + } + } + + current = parent; + } + + None // No containing function found (module-level call) + } + + /// Create an empty analysis result + fn empty_analysis_result() -> AnalysisResult { + AnalysisResult { + functions: vec![], + classes: vec![], + imports: vec![], + calls: vec![], + references: vec![], + function_count: 0, + class_count: 0, + line_count: 0, + import_count: 0, + main_line: None, + } + } +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/cache_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/cache_tests.rs new file mode 100644 index 000000000000..a6cf0c84c7c0 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/cache_tests.rs @@ -0,0 +1,94 @@ +// Tests for the cache module + +use crate::developer::analyze::cache::AnalysisCache; +use crate::developer::analyze::types::{AnalysisResult, FunctionInfo}; +use std::path::PathBuf; +use std::time::SystemTime; + +fn create_test_result() -> AnalysisResult { + AnalysisResult { + functions: vec![FunctionInfo { + name: "test_func".to_string(), + line: 1, + params: vec![], + }], + classes: vec![], + imports: vec![], + calls: vec![], + references: vec![], + function_count: 1, + class_count: 0, + line_count: 10, + import_count: 0, + main_line: None, + } +} + +#[test] +fn test_cache_hit_miss() { + let cache = AnalysisCache::new(10); + let path = PathBuf::from("test.rs"); + let time = SystemTime::now(); + let result = create_test_result(); + + // Initial miss + assert!(cache.get(&path, time).is_none()); + + // Store and hit + cache.put(path.clone(), time, result.clone()); + assert!(cache.get(&path, time).is_some()); + + // Different time = miss + let later = time + std::time::Duration::from_secs(1); + assert!(cache.get(&path, later).is_none()); +} + +#[test] +fn test_cache_eviction() { + let cache = AnalysisCache::new(2); + let result = create_test_result(); + let time = SystemTime::now(); + + // Fill cache + cache.put(PathBuf::from("file1.rs"), time, result.clone()); + cache.put(PathBuf::from("file2.rs"), time, result.clone()); + assert_eq!(cache.len(), 2); + + // Add third item, should evict first + cache.put(PathBuf::from("file3.rs"), time, result.clone()); + assert_eq!(cache.len(), 2); + + // First item should be evicted + assert!(cache.get(&PathBuf::from("file1.rs"), time).is_none()); + assert!(cache.get(&PathBuf::from("file2.rs"), time).is_some()); + assert!(cache.get(&PathBuf::from("file3.rs"), time).is_some()); +} + +#[test] +fn test_cache_clear() { + let cache = AnalysisCache::new(10); + let path = PathBuf::from("test.rs"); + let time = SystemTime::now(); + let result = create_test_result(); + + cache.put(path.clone(), time, result); + assert!(!cache.is_empty()); + + cache.clear(); + assert!(cache.is_empty()); + assert!(cache.get(&path, time).is_none()); +} + +#[test] +fn test_cache_default() { + let cache = AnalysisCache::default(); + assert!(cache.is_empty()); + + // Default cache should work normally + let path = PathBuf::from("test.rs"); + let time = SystemTime::now(); + let result = create_test_result(); + + cache.put(path.clone(), time, result); + assert!(cache.get(&path, time).is_some()); +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/fixtures.rs b/crates/goose-mcp/src/developer/analyze/tests/fixtures.rs new file mode 100644 index 000000000000..3c8d7d26be1d --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/fixtures.rs @@ -0,0 +1,87 @@ +// Shared test fixtures and utilities + +use crate::developer::analyze::types::{AnalysisResult, CallInfo, ClassInfo, FunctionInfo}; +use ignore::gitignore::Gitignore; + +/// Create a test AnalysisResult with sample data +pub fn create_test_result() -> AnalysisResult { + AnalysisResult { + functions: vec![ + FunctionInfo { + name: "main".to_string(), + line: 10, + params: vec![], + }, + FunctionInfo { + name: "helper".to_string(), + line: 20, + params: vec![], + }, + ], + classes: vec![ClassInfo { + name: "TestClass".to_string(), + line: 5, + methods: vec![], + }], + imports: vec!["use std::fs".to_string()], + calls: vec![], + references: vec![], + function_count: 2, + class_count: 1, + line_count: 100, + import_count: 1, + main_line: Some(10), + } +} + +/// Create a test result with specific functions and call relationships +pub fn create_test_result_with_calls( + functions: Vec<&str>, + calls: Vec<(&str, &str)>, +) -> AnalysisResult { + AnalysisResult { + functions: functions + .into_iter() + .map(|name| FunctionInfo { + name: name.to_string(), + line: 1, + params: vec![], + }) + .collect(), + classes: vec![], + imports: vec![], + calls: calls + .into_iter() + .map(|(caller, callee)| CallInfo { + caller_name: Some(caller.to_string()), + callee_name: callee.to_string(), + line: 1, + column: 0, + context: String::new(), + }) + .collect(), + references: vec![], + function_count: 0, + class_count: 0, + line_count: 0, + import_count: 0, + main_line: None, + } +} + +/// Create a simple test gitignore +pub fn create_test_gitignore() -> Gitignore { + let mut builder = ignore::gitignore::GitignoreBuilder::new("."); + builder.add_line(None, "*.log").unwrap(); + builder.add_line(None, "node_modules/").unwrap(); + builder.build().unwrap() +} + +/// Create a test gitignore with custom base path +#[allow(dead_code)] +pub fn create_test_gitignore_at(base_path: &std::path::Path) -> Gitignore { + let mut builder = ignore::gitignore::GitignoreBuilder::new(base_path); + builder.add_line(None, "*.log").unwrap(); + builder.add_line(None, "node_modules/").unwrap(); + builder.build().unwrap() +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/formatter_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/formatter_tests.rs new file mode 100644 index 000000000000..ee315f194e63 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/formatter_tests.rs @@ -0,0 +1,151 @@ +// Tests for the formatter module + +use crate::developer::analyze::formatter::Formatter; +use crate::developer::analyze::tests::fixtures::create_test_result; +use crate::developer::analyze::types::{AnalysisMode, CallChain, EntryType, FocusedAnalysisData}; +use std::path::{Path, PathBuf}; + +#[test] +fn test_format_structure_overview() { + let result = create_test_result(); + let output = Formatter::format_structure_overview(Path::new("test.rs"), &result); + + assert!(output.contains("[100L, 2F, 1C]")); + assert!(output.contains("main:10")); +} + +#[test] +fn test_format_semantic_result() { + let result = create_test_result(); + let output = Formatter::format_semantic_result(Path::new("test.rs"), &result); + + assert!(output.contains("FILE: test.rs")); + assert!(output.contains("C: TestClass:5")); + assert!(output.contains("F: main:10 helper:20")); + assert!(output.contains("I: use std::fs")); +} + +#[test] +fn test_filter_by_focus() { + // The filter_by_focus function includes the whole section when it finds a match + // This is the expected behavior - if a symbol is found in a file, show the whole file section + let output = "## test.rs\nfunction main at line 10\nfunction helper at line 20\n## other.rs\nfunction foo at line 5\n"; + let filtered = Formatter::filter_by_focus(output, "main"); + + assert!(filtered.contains("main")); + // When we find 'main' in test.rs, we include the whole test.rs section including 'helper' + assert!(filtered.contains("helper")); + assert!(!filtered.contains("foo")); // But we don't include other.rs +} + +#[test] +fn test_format_analysis_result_modes() { + let result = create_test_result(); + let path = Path::new("test.rs"); + + // Test structure mode + let output = Formatter::format_analysis_result(path, &result, &AnalysisMode::Structure); + assert!(output.contains("[100L, 2F, 1C]")); + + // Test semantic mode + let output = Formatter::format_analysis_result(path, &result, &AnalysisMode::Semantic); + assert!(output.contains("FILE: test.rs")); + assert!(output.contains("C: TestClass:5")); + + // Test focused mode (should return empty string with warning) + let output = Formatter::format_analysis_result(path, &result, &AnalysisMode::Focused); + assert_eq!(output, ""); +} + +#[test] +fn test_format_directory_structure() { + let base_path = Path::new("/test"); + let result1 = create_test_result(); + let mut result2 = create_test_result(); + result2.line_count = 200; + + let results = vec![ + (PathBuf::from("/test/file1.rs"), EntryType::File(result1)), + (PathBuf::from("/test/dir"), EntryType::Directory), + ( + PathBuf::from("/test/dir/file2.rs"), + EntryType::File(result2), + ), + ]; + + let output = Formatter::format_directory_structure(base_path, &results, 2); + + // Check summary + assert!(output.contains("SUMMARY:")); + assert!(output.contains("2 files, 300L, 4F, 2C")); + assert!(output.contains("Languages: rust (100%)")); + + // Check file entries + assert!(output.contains("file1.rs [100L, 2F, 1C]")); + assert!(output.contains("file2.rs [200L, 2F, 1C]")); +} + +#[test] +fn test_format_focused_output() { + let focus_data = FocusedAnalysisData { + focus_symbol: "test_func", + definitions: &[(PathBuf::from("test.rs"), 10)], + incoming_chains: &[CallChain { + path: vec![( + PathBuf::from("test.rs"), + 20, + "caller".to_string(), + "test_func".to_string(), + )], + }], + outgoing_chains: &[CallChain { + path: vec![( + PathBuf::from("test.rs"), + 30, + "test_func".to_string(), + "callee".to_string(), + )], + }], + files_analyzed: &[PathBuf::from("test.rs")], + follow_depth: 2, + }; + + let output = Formatter::format_focused_output(&focus_data); + + assert!(output.contains("FOCUSED ANALYSIS: test_func")); + assert!(output.contains("DEFINITIONS:")); + assert!(output.contains("INCOMING CALL CHAINS")); + assert!(output.contains("OUTGOING CALL CHAINS")); + assert!(output.contains("STATISTICS:")); +} + +#[test] +fn test_format_focused_output_empty() { + let focus_data = FocusedAnalysisData { + focus_symbol: "nonexistent", + definitions: &[], + incoming_chains: &[], + outgoing_chains: &[], + files_analyzed: &[PathBuf::from("test.rs")], + follow_depth: 2, + }; + + let output = Formatter::format_focused_output(&focus_data); + + assert!(output.contains("Symbol 'nonexistent' not found")); +} + +#[test] +fn test_format_results_wrapper() { + let text = "Test output"; + let contents = Formatter::format_results(text.to_string()); + + assert_eq!(contents.len(), 2); + + // Check that both assistant and user content are created + let assistant_content = contents[0].as_text().unwrap(); + assert_eq!(assistant_content.text, "Test output"); + + let user_content = contents[1].as_text().unwrap(); + assert_eq!(user_content.text, "Test output"); +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/graph_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/graph_tests.rs new file mode 100644 index 000000000000..9d5a1574002c --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/graph_tests.rs @@ -0,0 +1,116 @@ +// Tests for the graph module + +use crate::developer::analyze::graph::CallGraph; +use crate::developer::analyze::tests::fixtures::create_test_result_with_calls; +use std::path::PathBuf; + +#[test] +fn test_simple_call_chain() { + let results = vec![( + PathBuf::from("test.rs"), + create_test_result_with_calls(vec!["a", "b", "c"], vec![("a", "b"), ("b", "c")]), + )]; + + let graph = CallGraph::build_from_results(&results); + + // Test incoming chains for 'c' + let chains = graph.find_incoming_chains("c", 2); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].path.len(), 2); // b->c, a->b + + // Test outgoing chains for 'a' + let chains = graph.find_outgoing_chains("a", 2); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].path.len(), 2); // a->b, b->c +} + +#[test] +fn test_circular_dependency() { + let results = vec![( + PathBuf::from("test.rs"), + create_test_result_with_calls(vec!["a", "b"], vec![("a", "b"), ("b", "a")]), + )]; + + let graph = CallGraph::build_from_results(&results); + + // Should handle cycles without infinite loop + let chains = graph.find_incoming_chains("a", 3); + assert!(!chains.is_empty()); +} + +#[test] +fn test_empty_graph() { + let graph = CallGraph::new(); + + // Should return empty results for non-existent symbols + let chains = graph.find_incoming_chains("nonexistent", 2); + assert!(chains.is_empty()); + + let chains = graph.find_outgoing_chains("nonexistent", 2); + assert!(chains.is_empty()); +} + +#[test] +fn test_max_depth_zero() { + let results = vec![( + PathBuf::from("test.rs"), + create_test_result_with_calls(vec!["a", "b"], vec![("a", "b")]), + )]; + + let graph = CallGraph::build_from_results(&results); + + // max_depth of 0 should return empty results + let chains = graph.find_incoming_chains("b", 0); + assert!(chains.is_empty()); + + let chains = graph.find_outgoing_chains("a", 0); + assert!(chains.is_empty()); +} + +#[test] +fn test_multiple_callers() { + let results = vec![( + PathBuf::from("test.rs"), + create_test_result_with_calls( + vec!["a", "b", "c", "target"], + vec![("a", "target"), ("b", "target"), ("c", "target")], + ), + )]; + + let graph = CallGraph::build_from_results(&results); + + // Should find all three callers + let chains = graph.find_incoming_chains("target", 1); + assert_eq!(chains.len(), 3); + + // Each chain should have exactly one call + for chain in chains { + assert_eq!(chain.path.len(), 1); + } +} + +#[test] +fn test_deep_chain() { + let results = vec![( + PathBuf::from("test.rs"), + create_test_result_with_calls( + vec!["a", "b", "c", "d", "e"], + vec![("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")], + ), + )]; + + let graph = CallGraph::build_from_results(&results); + + // Test various depths + let chains = graph.find_incoming_chains("e", 1); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].path.len(), 1); // Just d->e + + let chains = graph.find_incoming_chains("e", 2); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].path.len(), 2); // c->d, d->e + + let chains = graph.find_incoming_chains("e", 4); + assert_eq!(chains.len(), 1); + assert_eq!(chains[0].path.len(), 4); // Full chain a->b->c->d->e +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/integration_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/integration_tests.rs new file mode 100644 index 000000000000..464424f3cd37 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/integration_tests.rs @@ -0,0 +1,236 @@ +// Integration tests for the analyze module + +use crate::developer::analyze::tests::fixtures::create_test_gitignore; +use crate::developer::analyze::{types::AnalyzeParams, CodeAnalyzer}; +use std::fs; +use tempfile::TempDir; + +#[test] +fn test_analyze_python_file() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("test.py"); + fs::write(&file_path, "def main():\n pass").unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: file_path.to_string_lossy().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, file_path, &ignore); + + assert!(result.is_ok()); + let result = result.unwrap(); + + // Check that we got content back + assert!(!result.content.is_empty()); +} + +#[test] +fn test_analyze_directory() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create test files + fs::write(dir_path.join("test1.rs"), "fn main() {}").unwrap(); + fs::write(dir_path.join("test2.py"), "def test(): pass").unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: dir_path.to_string_lossy().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, dir_path.to_path_buf(), &ignore); + + assert!(result.is_ok()); + let result = result.unwrap(); + + // Check that we got content back + assert!(!result.content.is_empty()); + + // Extract text content and verify it contains expected information + if let Some(text_content) = result.content[0].as_text() { + assert!(text_content.text.contains("SUMMARY:")); + assert!(text_content.text.contains("test1.rs")); + assert!(text_content.text.contains("test2.py")); + } +} + +#[test] +fn test_focused_analysis() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("test.py"); + fs::write( + &file_path, + "def main():\n helper()\n\ndef helper():\n pass", + ) + .unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: file_path.to_string_lossy().to_string(), + focus: Some("helper".to_string()), + follow_depth: 1, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, file_path, &ignore); + + assert!(result.is_ok()); + let result = result.unwrap(); + + // Check that focused analysis output is generated + if let Some(text_content) = result.content[0].as_text() { + assert!(text_content.text.contains("FOCUSED ANALYSIS: helper")); + assert!(text_content.text.contains("DEFINITIONS:")); + } +} + +#[test] +fn test_analyze_with_cache() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("test.rs"); + fs::write(&file_path, "fn main() {\n println!(\"Hello\");\n}").unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: file_path.to_string_lossy().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + + // First analysis - should cache + let result1 = analyzer.analyze(params.clone(), file_path.clone(), &ignore); + assert!(result1.is_ok()); + + // Second analysis - should use cache + let result2 = analyzer.analyze(params, file_path, &ignore); + assert!(result2.is_ok()); + + // Results should be identical + let content1 = result1.unwrap().content[0].as_text().unwrap().text.clone(); + let content2 = result2.unwrap().content[0].as_text().unwrap().text.clone(); + assert_eq!(content1, content2); +} + +#[test] +fn test_analyze_unsupported_file() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("test.txt"); + fs::write(&file_path, "This is not code").unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: file_path.to_string_lossy().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, file_path, &ignore); + + // Should succeed but return minimal information + assert!(result.is_ok()); +} + +#[test] +fn test_analyze_nonexistent_path() { + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: "/nonexistent/path".to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, "/nonexistent/path".into(), &ignore); + + // Should return an error + assert!(result.is_err()); +} + +#[test] +fn test_focused_without_symbol() { + let temp_dir = TempDir::new().unwrap(); + let file_path = temp_dir.path().join("test.py"); + fs::write(&file_path, "def main(): pass").unwrap(); + + let analyzer = CodeAnalyzer::new(); + + // This should trigger focused mode due to having focus parameter + let params = AnalyzeParams { + path: file_path.to_string_lossy().to_string(), + focus: Some("nonexistent_symbol".to_string()), + follow_depth: 1, + max_depth: 3, + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, file_path, &ignore); + + assert!(result.is_ok()); + let result = result.unwrap(); + + // Should indicate symbol not found + if let Some(text_content) = result.content[0].as_text() { + assert!(text_content + .text + .contains("Symbol 'nonexistent_symbol' not found")); + } +} + +#[test] +fn test_nested_directory_analysis() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create nested structure + let src_dir = dir_path.join("src"); + fs::create_dir(&src_dir).unwrap(); + fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap(); + + let lib_dir = src_dir.join("lib"); + fs::create_dir(&lib_dir).unwrap(); + fs::write(lib_dir.join("utils.rs"), "pub fn util() {}").unwrap(); + + let analyzer = CodeAnalyzer::new(); + let params = AnalyzeParams { + path: dir_path.to_string_lossy().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, // Increase max_depth to ensure we reach nested files + force: false, + }; + + let ignore = create_test_gitignore(); + let result = analyzer.analyze(params, dir_path.to_path_buf(), &ignore); + + assert!(result.is_ok()); + let result = result.unwrap(); + + if let Some(text_content) = result.content[0].as_text() { + assert!(text_content.text.contains("main.rs")); + // The directory structure analysis should show both files + assert!(text_content.text.contains("src")); + } +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/large_output_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/large_output_tests.rs new file mode 100644 index 000000000000..a2c1409a6b54 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/large_output_tests.rs @@ -0,0 +1,137 @@ +use super::fixtures::create_test_gitignore; +use crate::developer::analyze::{types::AnalyzeParams, CodeAnalyzer}; +use std::fs; +use tempfile::TempDir; + +#[test] +fn test_large_output_warning() { + let analyzer = CodeAnalyzer::new(); + let gitignore = create_test_gitignore(); + + // Create a temp directory with many files to trigger the warning + let temp_dir = TempDir::new().unwrap(); + + // Create many Python files with lots of functions to ensure we exceed 1000 lines + // Each file generates about 1 line in structure mode, so we need 1000+ files + for i in 0..1100 { + let file_path = temp_dir.path().join(format!("file{}.py", i)); + // Each file will have multiple functions to generate more output + let mut content = String::new(); + for j in 0..10 { + content.push_str(&format!("def function_{}_{}():\n pass\n\n", i, j)); + } + for j in 0..5 { + content.push_str(&format!( + "class Class_{}_{}:\n def method(self):\n pass\n\n", + i, j + )); + } + fs::write(&file_path, content).unwrap(); + } + + let params = AnalyzeParams { + path: temp_dir.path().to_str().unwrap().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, // Should trigger warning + }; + + let result = analyzer + .analyze(params, temp_dir.path().to_path_buf(), &gitignore) + .unwrap(); + + // Check that we got a warning, not the actual analysis + assert_eq!(result.content.len(), 1); + if let Some(text_content) = result.content[0].as_text() { + assert!(text_content.text.contains("LARGE OUTPUT WARNING")); + assert!(text_content.text.contains("force=true")); + assert!(text_content.text.contains("exceed")); + } else { + panic!("Expected text content"); + } +} + +#[test] +fn test_force_flag_bypasses_warning() { + let analyzer = CodeAnalyzer::new(); + let gitignore = create_test_gitignore(); + + // Create a temp directory with many files + let temp_dir = TempDir::new().unwrap(); + + // Create many Python files with lots of functions to ensure we exceed 1000 lines + for i in 0..50 { + let file_path = temp_dir.path().join(format!("file{}.py", i)); + // Each file will have multiple functions to generate more output + let mut content = String::new(); + for j in 0..10 { + content.push_str(&format!("def function_{}_{}():\n pass\n\n", i, j)); + } + for j in 0..5 { + content.push_str(&format!( + "class Class_{}_{}:\n def method(self):\n pass\n\n", + i, j + )); + } + fs::write(&file_path, content).unwrap(); + } + + let params = AnalyzeParams { + path: temp_dir.path().to_str().unwrap().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: true, // Should bypass warning + }; + + let result = analyzer + .analyze(params, temp_dir.path().to_path_buf(), &gitignore) + .unwrap(); + + // Check that we got the actual analysis, not a warning + if let Some(text_content) = result.content[0].as_text() { + assert!(!text_content.text.contains("LARGE OUTPUT WARNING")); + // Should contain actual file analysis + assert!(text_content.text.contains("file0.py")); + assert!(text_content.text.contains("file29.py")); + } else { + panic!("Expected text content"); + } +} + +#[test] +fn test_small_output_no_warning() { + let analyzer = CodeAnalyzer::new(); + let gitignore = create_test_gitignore(); + + // Create a temp directory with just a few files + let temp_dir = TempDir::new().unwrap(); + + // Create only 2 Python files - should not trigger warning + for i in 0..2 { + let file_path = temp_dir.path().join(format!("file{}.py", i)); + fs::write(&file_path, format!("def function_{}():\n pass\n", i)).unwrap(); + } + + let params = AnalyzeParams { + path: temp_dir.path().to_str().unwrap().to_string(), + focus: None, + follow_depth: 2, + max_depth: 3, + force: false, // Shouldn't matter for small output + }; + + let result = analyzer + .analyze(params, temp_dir.path().to_path_buf(), &gitignore) + .unwrap(); + + // Check that we got the actual analysis, not a warning + if let Some(text_content) = result.content[0].as_text() { + assert!(!text_content.text.contains("LARGE OUTPUT WARNING")); + assert!(text_content.text.contains("file0.py")); + assert!(text_content.text.contains("file1.py")); + } else { + panic!("Expected text content"); + } +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/mod.rs b/crates/goose-mcp/src/developer/analyze/tests/mod.rs new file mode 100644 index 000000000000..efc42587736c --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/mod.rs @@ -0,0 +1,10 @@ +// Test modules for the analyze tool + +pub mod cache_tests; +pub mod fixtures; +pub mod formatter_tests; +pub mod graph_tests; +pub mod integration_tests; +pub mod large_output_tests; +pub mod parser_tests; +pub mod traversal_tests; diff --git a/crates/goose-mcp/src/developer/analyze/tests/parser_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/parser_tests.rs new file mode 100644 index 000000000000..ae5e7a752f54 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/parser_tests.rs @@ -0,0 +1,228 @@ +// Tests for the parser module + +use crate::developer::analyze::parser::{ElementExtractor, ParserManager}; +use std::sync::Arc; + +#[test] +fn test_parser_initialization() { + let manager = ParserManager::new(); + assert!(manager.get_or_create_parser("python").is_ok()); + assert!(manager.get_or_create_parser("rust").is_ok()); + assert!(manager.get_or_create_parser("unknown").is_err()); +} + +#[test] +fn test_parser_caching() { + let manager = ParserManager::new(); + + // First call creates parser + let parser1 = manager.get_or_create_parser("python").unwrap(); + + // Second call should return cached parser + let parser2 = manager.get_or_create_parser("python").unwrap(); + + // They should be the same Arc + assert!(Arc::ptr_eq(&parser1, &parser2)); +} + +#[test] +fn test_parse_python() { + let manager = ParserManager::new(); + let content = "def hello():\n pass"; + + let tree = manager.parse(content, "python").unwrap(); + assert!(tree.root_node().child_count() > 0); +} + +#[test] +fn test_parse_rust() { + let manager = ParserManager::new(); + let content = "fn main() {\n println!(\"Hello\");\n}"; + + let tree = manager.parse(content, "rust").unwrap(); + assert!(tree.root_node().child_count() > 0); +} + +#[test] +fn test_parse_javascript() { + let manager = ParserManager::new(); + let content = "function hello() {\n console.log('Hello');\n}"; + + let tree = manager.parse(content, "javascript").unwrap(); + assert!(tree.root_node().child_count() > 0); +} + +#[test] +fn test_extract_python_elements() { + let manager = ParserManager::new(); + let content = r#" +import os + +class MyClass: + def method(self): + pass + +def main(): + print("hello") +"#; + + let tree = manager.parse(content, "python").unwrap(); + let result = ElementExtractor::extract_elements(&tree, content, "python").unwrap(); + + assert_eq!(result.function_count, 2); // main and method + assert_eq!(result.class_count, 1); // MyClass + assert_eq!(result.import_count, 1); // import os + assert!(result.main_line.is_some()); +} + +#[test] +fn test_extract_rust_elements() { + let manager = ParserManager::new(); + let content = r#" +use std::fs; + +struct MyStruct { + field: i32, +} + +impl MyStruct { + fn new() -> Self { + Self { field: 0 } + } +} + +fn main() { + let s = MyStruct::new(); +} +"#; + + let tree = manager.parse(content, "rust").unwrap(); + let result = ElementExtractor::extract_elements(&tree, content, "rust").unwrap(); + + assert_eq!(result.function_count, 2); // main and new + assert_eq!(result.class_count, 2); // MyStruct (struct) and MyStruct (impl) + assert_eq!(result.import_count, 1); // use std::fs + assert!(result.main_line.is_some()); +} + +#[test] +fn test_extract_with_depth_structure() { + let manager = ParserManager::new(); + let content = r#" +def func1(): + pass + +def func2(): + func1() +"#; + + let tree = manager.parse(content, "python").unwrap(); + let result = + ElementExtractor::extract_with_depth(&tree, content, "python", "structure").unwrap(); + + // In structure mode, detailed vectors should be empty but counts preserved + assert_eq!(result.function_count, 2); + assert!(result.functions.is_empty()); + assert!(result.calls.is_empty()); +} + +#[test] +fn test_extract_with_depth_semantic() { + let manager = ParserManager::new(); + let content = r#" +def func1(): + pass + +def func2(): + func1() +"#; + + let tree = manager.parse(content, "python").unwrap(); + let result = + ElementExtractor::extract_with_depth(&tree, content, "python", "semantic").unwrap(); + + // In semantic mode, should have both elements and calls + assert_eq!(result.function_count, 2); + assert_eq!(result.functions.len(), 2); + assert!(!result.calls.is_empty()); + assert_eq!(result.calls[0].callee_name, "func1"); +} + +#[test] +fn test_parse_invalid_syntax() { + let manager = ParserManager::new(); + let content = "def invalid syntax here"; + + // Should still parse (tree-sitter is error-tolerant) + let tree = manager.parse(content, "python"); + assert!(tree.is_ok()); +} + +#[test] +fn test_multiple_languages() { + let manager = ParserManager::new(); + + // Test that we can handle multiple languages in the same manager + assert!(manager.get_or_create_parser("python").is_ok()); + assert!(manager.get_or_create_parser("rust").is_ok()); + assert!(manager.get_or_create_parser("javascript").is_ok()); + assert!(manager.get_or_create_parser("go").is_ok()); + assert!(manager.get_or_create_parser("java").is_ok()); + assert!(manager.get_or_create_parser("kotlin").is_ok()); +} + +#[test] +fn test_parse_kotlin() { + let manager = ParserManager::new(); + let content = r#" +package com.example + +import kotlin.math.* + +class Example(val name: String) { + fun greet() { + println("Hello, $name") + } +} + +fun main() { + val example = Example("World") + example.greet() +} +"#; + + let tree = manager.parse(content, "kotlin").unwrap(); + assert!(tree.root_node().child_count() > 0); +} + +#[test] +fn test_extract_kotlin_elements() { + let manager = ParserManager::new(); + let content = r#" +package com.example + +import kotlin.math.* + +class MyClass { + fun method() { + println("method") + } +} + +fun main() { + println("hello") +} + +fun helper() { + main() +} +"#; + + let tree = manager.parse(content, "kotlin").unwrap(); + let result = ElementExtractor::extract_elements(&tree, content, "kotlin").unwrap(); + + assert_eq!(result.function_count, 3); // main, helper, method + assert_eq!(result.class_count, 1); // MyClass + assert!(result.import_count > 0); // import statements + assert!(result.main_line.is_some()); +} diff --git a/crates/goose-mcp/src/developer/analyze/tests/traversal_tests.rs b/crates/goose-mcp/src/developer/analyze/tests/traversal_tests.rs new file mode 100644 index 000000000000..8cc860ae533d --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/tests/traversal_tests.rs @@ -0,0 +1,190 @@ +// Tests for the traversal module + +use crate::developer::analyze::tests::fixtures::create_test_gitignore; +use crate::developer::analyze::traversal::FileTraverser; +use ignore::gitignore::Gitignore; +use std::fs; +use std::path::Path; +use tempfile::TempDir; + +#[test] +fn test_is_ignored() { + // Create a temporary directory for testing + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create actual files and directories to test + fs::write(dir_path.join("test.log"), "log content").unwrap(); + fs::write(dir_path.join("test.rs"), "fn main() {}").unwrap(); + + // Create gitignore that ignores .log files + let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path); + builder.add_line(None, "*.log").unwrap(); + let ignore = builder.build().unwrap(); + + let traverser = FileTraverser::new(&ignore); + + // Test that .log files are ignored and .rs files are not + assert!(traverser.is_ignored(&dir_path.join("test.log"))); + assert!(!traverser.is_ignored(&dir_path.join("test.rs"))); +} + +#[test] +fn test_validate_path() { + let ignore = create_test_gitignore(); + let traverser = FileTraverser::new(&ignore); + + // Test non-existent path + assert!(traverser + .validate_path(Path::new("/nonexistent/path")) + .is_err()); + + // Test ignored path + assert!(traverser.validate_path(Path::new("test.log")).is_err()); +} + +#[test] +fn test_collect_files() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create test files + fs::write(dir_path.join("test.rs"), "fn main() {}").unwrap(); + fs::write(dir_path.join("test.py"), "def main(): pass").unwrap(); + fs::write(dir_path.join("test.txt"), "not code").unwrap(); + + // Create subdirectory with file + let sub_dir = dir_path.join("src"); + fs::create_dir(&sub_dir).unwrap(); + fs::write(sub_dir.join("lib.rs"), "pub fn test() {}").unwrap(); + + let ignore = Gitignore::empty(); + let traverser = FileTraverser::new(&ignore); + + let files = traverser.collect_files_for_focused(dir_path, 0).unwrap(); + + // Should find .rs and .py files but not .txt + assert_eq!(files.len(), 3); + assert!(files.iter().any(|p| p.ends_with("test.rs"))); + assert!(files.iter().any(|p| p.ends_with("test.py"))); + assert!(files.iter().any(|p| p.ends_with("lib.rs"))); +} + +#[test] +fn test_max_depth() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create nested structure + fs::write(dir_path.join("root.rs"), "").unwrap(); + + let level1 = dir_path.join("level1"); + fs::create_dir(&level1).unwrap(); + fs::write(level1.join("file1.rs"), "").unwrap(); + + let level2 = level1.join("level2"); + fs::create_dir(&level2).unwrap(); + fs::write(level2.join("file2.rs"), "").unwrap(); + + let level3 = level2.join("level3"); + fs::create_dir(&level3).unwrap(); + fs::write(level3.join("file3.rs"), "").unwrap(); + + let ignore = Gitignore::empty(); + let traverser = FileTraverser::new(&ignore); + + // Test that limiting depth works - exact counts may vary based on implementation + // The important thing is that deeper files are excluded with lower max_depth + + // With a small max_depth, we should find fewer files + let files_limited = traverser.collect_files_for_focused(dir_path, 2).unwrap(); + + // With unlimited depth, we should find all files + let files_unlimited = traverser.collect_files_for_focused(dir_path, 0).unwrap(); + + // The unlimited search should find more files than the limited one + assert!( + files_unlimited.len() > files_limited.len(), + "Unlimited depth should find more files than limited depth" + ); + + // Should always find the root file + assert!(files_unlimited.iter().any(|p| p.ends_with("root.rs"))); + + // With unlimited, should find all 4 files + assert_eq!( + files_unlimited.len(), + 4, + "Should find all 4 files with unlimited depth" + ); +} + +#[test] +fn test_symlink_handling() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create a file and directory + fs::write(dir_path.join("target.rs"), "fn main() {}").unwrap(); + let target_dir = dir_path.join("target_dir"); + fs::create_dir(&target_dir).unwrap(); + fs::write(target_dir.join("inner.rs"), "fn test() {}").unwrap(); + + // Create symlinks (if supported by the OS) + #[cfg(unix)] + { + use std::os::unix::fs::symlink; + let _ = symlink(&dir_path.join("target.rs"), dir_path.join("link.rs")); + let _ = symlink(&target_dir, dir_path.join("link_dir")); + } + + let ignore = Gitignore::empty(); + let traverser = FileTraverser::new(&ignore); + + // Collect files - symlinks should be handled appropriately + let files = traverser.collect_files_for_focused(dir_path, 0).unwrap(); + + // Should find the actual files + assert!(files.iter().any(|p| p.ends_with("target.rs"))); + assert!(files.iter().any(|p| p.ends_with("inner.rs"))); +} + +#[test] +fn test_empty_directory() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + let ignore = Gitignore::empty(); + let traverser = FileTraverser::new(&ignore); + + let files = traverser.collect_files_for_focused(dir_path, 0).unwrap(); + + assert_eq!(files.len(), 0); +} + +#[test] +fn test_gitignore_patterns() { + let temp_dir = TempDir::new().unwrap(); + let dir_path = temp_dir.path(); + + // Create files + fs::write(dir_path.join("test.log"), "log").unwrap(); + fs::write(dir_path.join("debug.log"), "debug").unwrap(); + fs::write(dir_path.join("test.rs"), "fn main() {}").unwrap(); + fs::write(dir_path.join("main.py"), "def main(): pass").unwrap(); + + // Create gitignore that only ignores .log files + let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path); + builder.add_line(None, "*.log").unwrap(); + let ignore = builder.build().unwrap(); + + let traverser = FileTraverser::new(&ignore); + + let files = traverser.collect_files_for_focused(dir_path, 0).unwrap(); + + // Should find .rs and .py files, but not .log files + assert_eq!(files.len(), 2, "Should find 2 non-log files"); + assert!(files.iter().any(|p| p.ends_with("test.rs"))); + assert!(files.iter().any(|p| p.ends_with("main.py"))); + assert!(!files.iter().any(|p| p.ends_with(".log"))); +} diff --git a/crates/goose-mcp/src/developer/analyze/traversal.rs b/crates/goose-mcp/src/developer/analyze/traversal.rs new file mode 100644 index 000000000000..bc0b3701be75 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/traversal.rs @@ -0,0 +1,171 @@ +use ignore::gitignore::Gitignore; +use rayon::prelude::*; +use rmcp::model::{ErrorCode, ErrorData}; +use std::path::{Path, PathBuf}; + +use crate::developer::analyze::types::{AnalysisResult, EntryType}; +use crate::developer::lang; + +/// Handles file system traversal with ignore patterns +pub struct FileTraverser<'a> { + ignore_patterns: &'a Gitignore, +} + +impl<'a> FileTraverser<'a> { + /// Create a new file traverser with the given ignore patterns + pub fn new(ignore_patterns: &'a Gitignore) -> Self { + Self { ignore_patterns } + } + + /// Check if a path should be ignored + pub fn is_ignored(&self, path: &Path) -> bool { + let ignored = self.ignore_patterns.matched(path, false).is_ignore(); + if ignored { + tracing::trace!("Path {:?} is ignored", path); + } + ignored + } + + /// Validate that a path exists and is not ignored + pub fn validate_path(&self, path: &Path) -> Result<(), ErrorData> { + // Check if path is ignored + if self.is_ignored(path) { + return Err(ErrorData::new( + ErrorCode::INVALID_PARAMS, + format!( + "Access to '{}' is restricted by .gooseignore", + path.display() + ), + None, + )); + } + + // Check if path exists + if !path.exists() { + return Err(ErrorData::new( + ErrorCode::INVALID_PARAMS, + format!("Path '{}' does not exist", path.display()), + None, + )); + } + + Ok(()) + } + + /// Collect all files for focused analysis + pub fn collect_files_for_focused( + &self, + path: &Path, + max_depth: u32, + ) -> Result, ErrorData> { + tracing::debug!( + "Collecting files from {:?} with max_depth {}", + path, + max_depth + ); + + if max_depth == 0 { + tracing::warn!("Unlimited depth traversal requested for {:?}", path); + } + + let files = self.collect_files_recursive(path, 0, max_depth)?; + + tracing::info!("Collected {} files from {:?}", files.len(), path); + Ok(files) + } + + /// Recursively collect files + fn collect_files_recursive( + &self, + path: &Path, + current_depth: u32, + max_depth: u32, + ) -> Result, ErrorData> { + let mut files = Vec::new(); + + // Check if we're at a file (base case) + if path.is_file() { + let lang = lang::get_language_identifier(path); + if !lang.is_empty() { + tracing::trace!("Including file {:?} (language: {})", path, lang); + files.push(path.to_path_buf()); + } + return Ok(files); + } + + // max_depth of 0 means unlimited depth + // current_depth starts at 0, max_depth is the number of directory levels to traverse + if max_depth > 0 && current_depth >= max_depth { + tracing::trace!("Reached max depth {} at {:?}", max_depth, path); + return Ok(files); + } + + let entries = std::fs::read_dir(path).map_err(|e| { + tracing::error!("Failed to read directory {:?}: {}", path, e); + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to read directory: {}", e), + None, + ) + })?; + + for entry in entries { + let entry = entry.map_err(|e| { + ErrorData::new( + ErrorCode::INTERNAL_ERROR, + format!("Failed to read directory entry: {}", e), + None, + ) + })?; + + let entry_path = entry.path(); + + // Skip ignored paths + if self.is_ignored(&entry_path) { + continue; + } + + if entry_path.is_file() { + // Only include supported file types + let lang = lang::get_language_identifier(&entry_path); + if !lang.is_empty() { + tracing::trace!("Including file {:?} (language: {})", entry_path, lang); + files.push(entry_path); + } + } else if entry_path.is_dir() { + // Recurse into subdirectory + let mut sub_files = + self.collect_files_recursive(&entry_path, current_depth + 1, max_depth)?; + files.append(&mut sub_files); + } + } + + Ok(files) + } + + /// Collect directory results for analysis with parallel processing + pub fn collect_directory_results( + &self, + path: &Path, + max_depth: u32, + analyze_file: F, + ) -> Result, ErrorData> + where + F: Fn(&Path) -> Result + Sync, + { + tracing::debug!("Collecting directory results from {:?}", path); + + // First collect all files to analyze + let files_to_analyze = self.collect_files_recursive(path, 0, max_depth)?; + + // Then analyze them in parallel using Rayon + let results: Result, ErrorData> = files_to_analyze + .par_iter() + .map(|file_path| { + analyze_file(file_path).map(|result| (file_path.clone(), EntryType::File(result))) + }) + .collect(); + + results + } +} diff --git a/crates/goose-mcp/src/developer/analyze/types.rs b/crates/goose-mcp/src/developer/analyze/types.rs new file mode 100644 index 000000000000..d2c172e80599 --- /dev/null +++ b/crates/goose-mcp/src/developer/analyze/types.rs @@ -0,0 +1,160 @@ +use rmcp::schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +/// Parameters for the analyze tool +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct AnalyzeParams { + /// Absolute path. Step 1: Directory for overview. Step 2: File for details. Step 3: Directory with focus param for call graphs + pub path: String, + + /// Symbol name for call graph analysis (Step 3). Requires directory path with broad enough scope to capture all relevant symbol references + pub focus: Option, + + /// Call graph depth. 0=where defined, 1=direct callers/callees, 2+=transitive chains + #[serde(default = "default_follow_depth")] + pub follow_depth: u32, + + /// Directory recursion limit. 0=unlimited (warning: fails on binary files) + #[serde(default = "default_max_depth")] + pub max_depth: u32, + + /// Allow large outputs without warning (default: false) + #[serde(default)] + pub force: bool, +} + +fn default_follow_depth() -> u32 { + 2 +} + +fn default_max_depth() -> u32 { + 3 +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnalysisResult { + pub functions: Vec, + pub classes: Vec, + pub imports: Vec, + // Semantic analysis fields + pub calls: Vec, + pub references: Vec, + // Structure mode fields (for compact overview) + pub function_count: usize, + pub class_count: usize, + pub line_count: usize, + pub import_count: usize, + pub main_line: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FunctionInfo { + pub name: String, + pub line: usize, + pub params: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClassInfo { + pub name: String, + pub line: usize, + pub methods: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CallInfo { + pub caller_name: Option, // Function containing this call + pub callee_name: String, // Function being called + pub line: usize, + pub column: usize, + pub context: String, // Line of code containing the call +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReferenceInfo { + pub symbol: String, + pub ref_type: ReferenceType, + pub line: usize, + pub context: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ReferenceType { + Definition, + Call, + Import, + Assignment, +} + +// Entry type for directory results - cleaner than overloading AnalysisResult +#[derive(Debug, Clone)] +pub enum EntryType { + File(AnalysisResult), + Directory, + SymlinkDir(PathBuf), + SymlinkFile(PathBuf), +} + +// Type alias for complex query results +pub type ElementQueryResult = (Vec, Vec, Vec); + +#[derive(Debug, Clone)] +pub struct CallChain { + pub path: Vec<(PathBuf, usize, String, String)>, // (file, line, from, to) +} + +// Data structure to pass to format_focused_output_with_chains +pub struct FocusedAnalysisData<'a> { + pub focus_symbol: &'a str, + pub follow_depth: u32, + pub files_analyzed: &'a [PathBuf], + pub definitions: &'a [(PathBuf, usize)], + pub incoming_chains: &'a [CallChain], + pub outgoing_chains: &'a [CallChain], +} + +/// Analysis modes +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum AnalysisMode { + Structure, // Directory overview + Semantic, // File details + Focused, // Symbol tracking +} + +impl AnalysisMode { + pub fn as_str(&self) -> &str { + match self { + AnalysisMode::Structure => "structure", + AnalysisMode::Semantic => "semantic", + AnalysisMode::Focused => "focused", + } + } + + pub fn parse(s: &str) -> Self { + match s { + "structure" => AnalysisMode::Structure, + "semantic" => AnalysisMode::Semantic, + "focused" => AnalysisMode::Focused, + _ => AnalysisMode::Structure, + } + } +} + +impl AnalysisResult { + /// Create an empty analysis result with only line count + pub fn empty(line_count: usize) -> Self { + Self { + functions: vec![], + classes: vec![], + imports: vec![], + calls: vec![], + references: vec![], + function_count: 0, + class_count: 0, + line_count, + import_count: 0, + main_line: None, + } + } +} diff --git a/crates/goose-mcp/src/developer/mod.rs b/crates/goose-mcp/src/developer/mod.rs index 8719fd54def7..5eac4ff0eef6 100644 --- a/crates/goose-mcp/src/developer/mod.rs +++ b/crates/goose-mcp/src/developer/mod.rs @@ -1,3 +1,4 @@ +pub mod analyze; mod editor_models; mod goose_hints; mod lang; diff --git a/crates/goose-mcp/src/developer/rmcp_developer.rs b/crates/goose-mcp/src/developer/rmcp_developer.rs index 9552e63cafc0..a848155ff8d3 100644 --- a/crates/goose-mcp/src/developer/rmcp_developer.rs +++ b/crates/goose-mcp/src/developer/rmcp_developer.rs @@ -31,6 +31,7 @@ use tokio::{ }; use tokio_stream::{wrappers::SplitStream, StreamExt as _}; +use super::analyze::{types::AnalyzeParams, CodeAnalyzer}; use super::editor_models::{create_editor_model, EditorModel}; use super::goose_hints::load_hints::{load_hint_files, GOOSE_HINTS_FILENAME}; use super::shell::{expand_path, get_shell_config, is_absolute_path}; @@ -164,13 +165,14 @@ fn load_prompt_files() -> HashMap { } /// Developer MCP Server using official RMCP SDK -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct DeveloperServer { tool_router: ToolRouter, file_history: Arc>>>, ignore_patterns: Gitignore, editor_model: Option, prompts: HashMap, + code_analyzer: CodeAnalyzer, } #[tool_handler(router = self.tool_router)] @@ -192,6 +194,9 @@ impl ServerHandler for DeveloperServer { Use the shell tool as needed to locate files or interact with the project. + Leverage `analyze` through `return_last_only=true` subagents for deep codebase understanding with lean context + - delegate analysis, retain summaries + Your windows/screen tools can be used for visual debugging. You should not use these tools unless prompted to, but you can mention they are available if they are relevant. @@ -210,9 +215,14 @@ impl ServerHandler for DeveloperServer { You can use the shell tool to run any command that would work on the relevant operating system. Use the shell tool as needed to locate files or interact with the project. + Leverage `analyze` through `return_last_only=true` subagents for deep codebase understanding with lean context + - delegate analysis, retain summaries + Your windows/screen tools can be used for visual debugging. You should not use these tools unless prompted to, but you can mention they are available if they are relevant. + Always prefer ripgrep (rg -C 3) to grep. + operating system: {os} current directory: {cwd} {container_info} @@ -516,6 +526,7 @@ impl DeveloperServer { ignore_patterns, editor_model, prompts: load_prompt_files(), + code_analyzer: CodeAnalyzer::new(), } } @@ -1002,6 +1013,31 @@ impl DeveloperServer { Ok(()) } + /// Analyze code structure and relationships. + /// + /// Automatically selects the appropriate analysis: + /// - Files: Semantic analysis with call graphs + /// - Directories: Structure overview with metrics + /// - With focus parameter: Track symbol across files + /// + /// Examples: + /// analyze(path="file.py") -> semantic analysis + /// analyze(path="src/") -> structure overview down to max_depth subdirs + /// analyze(path="src/", focus="main") -> track main() across files in src/ down to max_depth subdirs + #[tool( + name = "analyze", + description = "Analyze code structure in 3 modes: 1) Directory overview - file tree with LOC/function/class counts to max_depth. 2) File details - functions, classes, imports. 3) Symbol focus - call graphs across directory to max_depth (requires directory path, case-sensitive). Typical flow: directory → files → symbols. Functions called >3x show •N." + )] + pub async fn analyze( + &self, + params: Parameters, + ) -> Result { + let params = params.0; + let path = self.resolve_path(¶ms.path)?; + self.code_analyzer + .analyze(params, path, &self.ignore_patterns) + } + /// Process an image file from disk. /// /// The image will be: @@ -1366,6 +1402,7 @@ mod tests { let running_service = serve_directly(server.clone(), create_test_transport(), None); let peer = running_service.peer().clone(); + // Test directly on the server instead of using peer.call_tool let result = server .shell( Parameters(ShellParams {