|
| 1 | +use std::collections::{HashMap, HashSet}; |
| 2 | +use std::path::Path; |
| 3 | + |
| 4 | +use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator}; |
| 5 | +use tree_sitter_go; |
| 6 | + |
| 7 | +use walkdir::WalkDir; |
| 8 | + |
| 9 | +/// Represents a Go package with its dependencies and coupling metrics. |
| 10 | +/// |
| 11 | +/// The coupling score (instability) is calculated as: |
| 12 | +/// I = Ce/(Ca+Ce) where: |
| 13 | +/// - Ca = Afferent coupling (incoming dependencies) |
| 14 | +/// - Ce = Efferent coupling (outgoing dependencies) |
| 15 | +#[derive(Debug, PartialEq)] |
| 16 | +struct Package { |
| 17 | + /// Name of the package |
| 18 | + name: String, |
| 19 | + // Set of packages that this package imports |
| 20 | + imports: HashSet<String>, |
| 21 | + /// Instability score (0.0 to 1.0, higher means more unstable) |
| 22 | + coupling_score: f64, |
| 23 | +} |
| 24 | + |
| 25 | +/// Analyzes dependencies between Go packages and calculates coupling metrics. |
| 26 | +/// |
| 27 | +/// The analyzer walks through Go source files, extracts package dependencies, |
| 28 | +/// and computes various coupling metrics to help identify highly coupled or |
| 29 | +/// unstable packages. |
| 30 | +#[derive(Default, Debug)] |
| 31 | +struct DependencyAnalyzer { |
| 32 | + /// Map of package names to their corresponding Package instances |
| 33 | + packages: HashMap<String, Package>, |
| 34 | +} |
| 35 | + |
| 36 | +impl DependencyAnalyzer { |
| 37 | + /// Creates a new DependencyAnalyzer instance. |
| 38 | + pub fn new() -> Self { |
| 39 | + Self::default() |
| 40 | + } |
| 41 | + |
| 42 | + /// Analyzes a single Go source file and extracts its package dependencies. |
| 43 | + /// |
| 44 | + /// Uses tree-sitter to parse the Go source file and extract: |
| 45 | + /// - Package declaration |
| 46 | + /// - Import statements |
| 47 | + /// |
| 48 | + /// # Arguments |
| 49 | + /// |
| 50 | + /// * `path` - Path to the Go source file |
| 51 | + /// |
| 52 | + /// # Returns |
| 53 | + /// |
| 54 | + /// * `Ok(())` if analysis succeeds |
| 55 | + /// * `Err` with a description if any error occurs during analysis |
| 56 | + pub fn analyze_file(&mut self, path: &Path) -> Result<(), Box<dyn std::error::Error>> { |
| 57 | + let source_code = std::fs::read_to_string(path)?; |
| 58 | + |
| 59 | + let mut parser = Parser::new(); |
| 60 | + let language = tree_sitter_go::LANGUAGE; |
| 61 | + parser.set_language(&language.into())?; |
| 62 | + |
| 63 | + let tree = parser |
| 64 | + .parse(&source_code, None) |
| 65 | + .ok_or("Failed to parse source code")?; |
| 66 | + |
| 67 | + // handle group and single import |
| 68 | + let query = Query::new( |
| 69 | + &language.into(), |
| 70 | + r#" |
| 71 | + (package_clause |
| 72 | + (package_identifier) @package) |
| 73 | + |
| 74 | + ; single import |
| 75 | + (import_declaration |
| 76 | + (import_spec |
| 77 | + (interpreted_string_literal) @import)) |
| 78 | + |
| 79 | + ; group import |
| 80 | + (import_declaration |
| 81 | + (import_spec_list |
| 82 | + (import_spec |
| 83 | + (interpreted_string_literal) @import))) |
| 84 | + "#, |
| 85 | + )?; |
| 86 | + |
| 87 | + let mut cursor = QueryCursor::new(); |
| 88 | + let mut matches = cursor.matches(&query, tree.root_node(), source_code.as_bytes()); |
| 89 | + |
| 90 | + let mut current_package = String::new(); |
| 91 | + let mut imports = HashSet::new(); |
| 92 | + |
| 93 | + while let Some(matched) = matches.next() { |
| 94 | + for capture in matched.captures { |
| 95 | + let capture_text = capture |
| 96 | + .node |
| 97 | + .utf8_text(source_code.as_bytes())? |
| 98 | + .trim_matches('"'); |
| 99 | + |
| 100 | + match query.capture_names()[capture.index as usize] { |
| 101 | + "package" => { |
| 102 | + current_package = capture_text.to_string(); |
| 103 | + } |
| 104 | + "import" => { |
| 105 | + imports.insert(capture_text.to_string()); |
| 106 | + } |
| 107 | + _ => {} |
| 108 | + } |
| 109 | + } |
| 110 | + } |
| 111 | + |
| 112 | + if !current_package.is_empty() { |
| 113 | + self.packages.insert( |
| 114 | + current_package.clone(), |
| 115 | + Package { |
| 116 | + name: current_package, |
| 117 | + imports, |
| 118 | + coupling_score: 0.0, |
| 119 | + }, |
| 120 | + ); |
| 121 | + } |
| 122 | + |
| 123 | + Ok(()) |
| 124 | + } |
| 125 | + |
| 126 | + /// Calculates coupling scores for all analyzed packages. |
| 127 | + /// |
| 128 | + /// For each package, computes: |
| 129 | + /// 1. Afferent coupling (Ca) - number of packages that depend on it |
| 130 | + /// 2. Efferent coupling (Ce) - number of packages it depends on |
| 131 | + /// 3. Instability (I) = Ce/(Ca+Ce) |
| 132 | + /// |
| 133 | + /// A higher score (closer to 1.0) indicates that the package is more unstable |
| 134 | + /// and dependent on other packages. |
| 135 | + pub fn calculate_coupling_scores(&mut self) { |
| 136 | + let package_imports: HashMap<String, f64> = self |
| 137 | + .packages |
| 138 | + .keys() |
| 139 | + .map(|name| { |
| 140 | + let afferent = self |
| 141 | + .packages |
| 142 | + .values() |
| 143 | + .filter(|p| p.imports.contains(name)) |
| 144 | + .count() as f64; |
| 145 | + (name.clone(), afferent) |
| 146 | + }) |
| 147 | + .collect(); |
| 148 | + |
| 149 | + for package in self.packages.values_mut() { |
| 150 | + let afferent = *package_imports.get(&package.name).unwrap_or(&0.0); |
| 151 | + let efferent = package.imports.len() as f64; |
| 152 | + |
| 153 | + if (afferent + efferent) > 0.0 { |
| 154 | + package.coupling_score = efferent / (afferent + efferent); |
| 155 | + println!( |
| 156 | + "{}: {:.2} - {} imports", |
| 157 | + package.name, |
| 158 | + package.coupling_score, |
| 159 | + package.imports.len() |
| 160 | + ); |
| 161 | + } |
| 162 | + } |
| 163 | + } |
| 164 | + |
| 165 | + /// Returns a vector of package references sorted by coupling score in descending order. |
| 166 | + /// |
| 167 | + /// Packages with higher coupling scores (more unstable) appear first in the result. |
| 168 | + pub fn get_sorted_packages(&self) -> Vec<&Package> { |
| 169 | + let mut packages: Vec<&Package> = self.packages.values().collect(); |
| 170 | + |
| 171 | + packages.sort_by(|a, b| { |
| 172 | + b.coupling_score |
| 173 | + .partial_cmp(&a.coupling_score) |
| 174 | + .unwrap_or(std::cmp::Ordering::Equal) |
| 175 | + }); |
| 176 | + packages |
| 177 | + } |
| 178 | +} |
| 179 | + |
| 180 | +fn main() -> Result<(), Box<dyn std::error::Error>> { |
| 181 | + let args: Vec<String> = std::env::args().collect(); |
| 182 | + if args.len() < 2 { |
| 183 | + eprintln!("Usage: {} <go-project-directory>", args[0]); |
| 184 | + std::process::exit(1); |
| 185 | + } |
| 186 | + |
| 187 | + let mut analyzer = DependencyAnalyzer::new(); |
| 188 | + |
| 189 | + for entry in WalkDir::new(&args[1]) |
| 190 | + .into_iter() |
| 191 | + .filter_map(|e| e.ok()) |
| 192 | + .filter(|e| e.path().extension().is_some_and(|ext| ext == "go")) |
| 193 | + { |
| 194 | + analyzer.analyze_file(entry.path())?; |
| 195 | + } |
| 196 | + |
| 197 | + analyzer.calculate_coupling_scores(); |
| 198 | + |
| 199 | + println!("Packages sorted by coupling score (higher score = more unstable):"); |
| 200 | + for package in analyzer.get_sorted_packages() { |
| 201 | + println!( |
| 202 | + "{}: {:.2} - {} imports", |
| 203 | + package.name, |
| 204 | + package.coupling_score, |
| 205 | + package.imports.len() |
| 206 | + ); |
| 207 | + } |
| 208 | + |
| 209 | + Ok(()) |
| 210 | +} |
| 211 | + |
| 212 | +#[cfg(test)] |
| 213 | +mod tests { |
| 214 | + use super::*; |
| 215 | + use std::io::Write; |
| 216 | + use tempfile::NamedTempFile; |
| 217 | + |
| 218 | + #[test] |
| 219 | + fn test_single_file_analysis() { |
| 220 | + let mut file = NamedTempFile::new().expect("Failed to create temp file"); |
| 221 | + |
| 222 | + let go_source = r#" |
| 223 | + package main |
| 224 | + import ( |
| 225 | + "fmt" |
| 226 | + "os" |
| 227 | + ) |
| 228 | + func main() { |
| 229 | + fmt.Println("Hello World") |
| 230 | + os.Exit(1) |
| 231 | + } |
| 232 | + "#; |
| 233 | + |
| 234 | + write!(file, "{}", go_source).unwrap(); |
| 235 | + |
| 236 | + let mut analyzer = DependencyAnalyzer::new(); |
| 237 | + analyzer |
| 238 | + .analyze_file(file.path()) |
| 239 | + .expect("Failed to analyze temp file"); |
| 240 | + |
| 241 | + assert_eq!(analyzer.packages.len(), 1); |
| 242 | + |
| 243 | + let pkg_main = analyzer.packages.get("main").unwrap(); |
| 244 | + assert_eq!(pkg_main.name, "main"); |
| 245 | + assert_eq!(pkg_main.imports.len(), 2); |
| 246 | + |
| 247 | + let expected_imports: HashSet<String> = |
| 248 | + ["fmt", "os"].iter().map(|s| s.to_string()).collect(); |
| 249 | + assert_eq!(pkg_main.imports, expected_imports); |
| 250 | + } |
| 251 | + |
| 252 | + #[test] |
| 253 | + fn test_coupling_scores() { |
| 254 | + // temp file 1: package "main" -> import "foo" |
| 255 | + let mut file_main = NamedTempFile::new().expect("Failed to create temp file"); |
| 256 | + let main_code = r#" |
| 257 | + package main |
| 258 | + import "foo" |
| 259 | + "#; |
| 260 | + write!(file_main, "{}", main_code).unwrap(); |
| 261 | + |
| 262 | + // temp file 2: package "foo" -> import "bar" |
| 263 | + let mut file_foo = NamedTempFile::new().expect("Failed to create temp file"); |
| 264 | + let foo_code = r#" |
| 265 | + package foo |
| 266 | + import "bar" |
| 267 | + "#; |
| 268 | + write!(file_foo, "{}", foo_code).unwrap(); |
| 269 | + |
| 270 | + // temp file 3: package "bar" -> no import |
| 271 | + let mut file_bar = NamedTempFile::new().expect("Failed to create temp file"); |
| 272 | + let bar_code = r#" |
| 273 | + package bar |
| 274 | + "#; |
| 275 | + write!(file_bar, "{}", bar_code).unwrap(); |
| 276 | + |
| 277 | + // analyze each files and calculate coupling scores |
| 278 | + let mut analyzer = DependencyAnalyzer::new(); |
| 279 | + analyzer.analyze_file(file_main.path()).unwrap(); |
| 280 | + analyzer.analyze_file(file_foo.path()).unwrap(); |
| 281 | + analyzer.analyze_file(file_bar.path()).unwrap(); |
| 282 | + analyzer.calculate_coupling_scores(); |
| 283 | + |
| 284 | + // "main" -> import {"foo"} |
| 285 | + // "foo" -> import {"bar"} |
| 286 | + // "bar" -> import {} |
| 287 | + |
| 288 | + // afferent: |
| 289 | + // main : (no one imports main) -> Ca=0 |
| 290 | + // foo : (main imports foo) -> Ca=1 |
| 291 | + // bar : (foo imports bar) -> Ca=1 |
| 292 | + // |
| 293 | + // efferent: |
| 294 | + // main : imports 1 package -> Ce=1 |
| 295 | + // foo : imports 1 package -> Ce=1 |
| 296 | + // bar : imports 0 package -> Ce=0 |
| 297 | + // |
| 298 | + // instability I = Ce / (Ca + Ce) |
| 299 | + // main : I=1/(0+1)=1.0 |
| 300 | + // foo : I=1/(1+1)=0.5 |
| 301 | + // bar : I=0/(1+0)=0.0 |
| 302 | + |
| 303 | + let pkg_main = analyzer.packages.get("main").unwrap(); |
| 304 | + let pkg_foo = analyzer.packages.get("foo").unwrap(); |
| 305 | + let pkg_bar = analyzer.packages.get("bar").unwrap(); |
| 306 | + |
| 307 | + println!("Package main imports: {:?}", pkg_main.imports); |
| 308 | + println!("Package foo imports: {:?}", pkg_foo.imports); |
| 309 | + println!("Package bar imports: {:?}", pkg_bar.imports); |
| 310 | + |
| 311 | + assert!((pkg_main.coupling_score - 1.0).abs() < f64::EPSILON); |
| 312 | + assert!((pkg_foo.coupling_score - 0.5).abs() < f64::EPSILON); |
| 313 | + assert!((pkg_bar.coupling_score - 0.0).abs() < f64::EPSILON); |
| 314 | + |
| 315 | + let sorted = analyzer.get_sorted_packages(); |
| 316 | + assert_eq!(sorted[0].name, "main"); // 1.0 |
| 317 | + assert_eq!(sorted[1].name, "foo"); // 0.5 |
| 318 | + assert_eq!(sorted[2].name, "bar"); // 0.0 |
| 319 | + } |
| 320 | +} |
0 commit comments