From e7de9a8a6136ba9b3546557275a89e4674fe3794 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 09:11:51 +0200 Subject: [PATCH 01/28] Claude prototype for real world benchmarks --- Cargo.lock | 1 + crates/ruff_benchmark/Cargo.toml | 5 +- crates/ruff_benchmark/benches/ty.rs | 83 +++- crates/ruff_benchmark/src/lib.rs | 1 + .../ruff_benchmark/src/real_world_projects.rs | 459 ++++++++++++++++++ crates/ruff_db/src/system/test.rs | 7 + crates/ty_project/src/metadata.rs | 2 +- 7 files changed, 553 insertions(+), 5 deletions(-) create mode 100644 crates/ruff_benchmark/src/real_world_projects.rs diff --git a/Cargo.lock b/Cargo.lock index 892dd3afd5562..545c94c455f3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2597,6 +2597,7 @@ dependencies = [ "ruff_python_parser", "ruff_python_trivia", "rustc-hash 2.1.1", + "tempfile", "tikv-jemallocator", "ty_project", ] diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index b5c2a50a133a0..bf8a7a9b1cb42 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -41,11 +41,12 @@ codspeed-criterion-compat = { workspace = true, default-features = false, option criterion = { workspace = true, default-features = false } rayon = { workspace = true } rustc-hash = { workspace = true } +ruff_db = { workspace = true } +ruff_python_ast = { workspace = true } +tempfile = { workspace = true } [dev-dependencies] -ruff_db = { workspace = true } ruff_linter = { workspace = true } -ruff_python_ast = { workspace = true } ruff_python_formatter = { workspace = true } ruff_python_parser = { workspace = true } ruff_python_trivia = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index fe974de25b47a..9af10a7cd8824 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -8,10 +8,12 @@ use rayon::ThreadPoolBuilder; use rustc_hash::FxHashSet; use ruff_benchmark::TestFile; +use ruff_benchmark::real_world_projects::{projects, setup_real_world_project}; +use ruff_db::Db as _; use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity}; use ruff_db::files::{File, system_path_to_file}; use ruff_db::source::source_text; -use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; +use ruff_db::system::{InMemorySystem, MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::RangedValue; @@ -347,10 +349,87 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { }); } +fn benchmark_real_world_colour_science(criterion: &mut Criterion) { + setup_rayon(); + + // Setup the colour-science project (expensive, done once) + let project = projects::colour_science(); + let setup_project = + setup_real_world_project(project).expect("Failed to setup colour-science project"); + + // Create system and metadata (expensive, done once) + let fs = setup_project.memory_fs().clone(); + let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); + + let src_root = SystemPath::new("/src"); + let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(setup_project.config.python_version)), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let benchmark_paths = setup_project.benchmark_paths(); + + fn setup( + metadata: &ProjectMetadata, + system: &TestSystem, + benchmark_paths: &[SystemPathBuf], + ) -> ProjectDatabase { + // Create new database instance and collect files for this instance + let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); + + let mut files_to_open = FxHashSet::default(); + for path in benchmark_paths { + collect_python_files(&db, path, &mut files_to_open); + } + + db.project().set_open_files(&mut db, files_to_open.clone()); + + db + } + + fn check_project(db: &mut ProjectDatabase) { + let result = db.check(); + // Don't assert specific diagnostic count for real-world projects + // as they may have legitimate type issues + let _ = result.len(); + } + + criterion.bench_function("ty_real_world[colour_science]", |b| { + b.iter_batched_ref( + || setup(&metadata, &system, &benchmark_paths), + check_project, + BatchSize::SmallInput, + ); + }); +} + +/// Collect all Python files in a directory for opening in the database +fn collect_python_files(db: &ProjectDatabase, path: &SystemPath, files: &mut FxHashSet) { + if let Ok(file) = system_path_to_file(db, path.to_path_buf()) { + if path.as_str().ends_with(".py") || path.as_str().ends_with(".pyi") { + files.insert(file); + } + } + + if let Ok(entries) = db.system().read_directory(path) { + for entry in entries { + if let Ok(dir_entry) = entry { + collect_python_files(db, dir_entry.path(), files); + } + } + } +} + criterion_group!(check_file, benchmark_cold, benchmark_incremental); criterion_group!( micro, benchmark_many_string_assignments, benchmark_many_tuple_assignments ); -criterion_main!(check_file, micro); +criterion_group!(real_world, benchmark_real_world_colour_science); +criterion_main!(check_file, micro, real_world); diff --git a/crates/ruff_benchmark/src/lib.rs b/crates/ruff_benchmark/src/lib.rs index 3ecde5e8f8ee2..8b23f832c9903 100644 --- a/crates/ruff_benchmark/src/lib.rs +++ b/crates/ruff_benchmark/src/lib.rs @@ -1,6 +1,7 @@ use std::path::PathBuf; pub mod criterion; +pub mod real_world_projects; pub static NUMPY_GLOBALS: TestFile = TestFile::new( "numpy/globals.py", diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs new file mode 100644 index 0000000000000..572a8b71f6d0a --- /dev/null +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -0,0 +1,459 @@ +//! Infrastructure for benchmarking real-world Python projects. +//! +//! This module provides functionality to: +//! 1. Clone external repositories to temporary directories +//! 2. Install dependencies using uv with date constraints +//! 3. Load project files into MemoryFileSystem for benchmarking + +use std::collections::HashMap; +use std::path::Path; +use std::process::Command; + +use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf}; +use ruff_python_ast::PythonVersion; + +/// Configuration for a real-world project to benchmark +#[derive(Debug, Clone)] +pub struct RealWorldProject { + /// Git repository URL + pub location: String, + /// Specific commit hash to checkout (optional, uses latest if None) + pub commit: Option, + /// List of paths within the project to check + pub paths: Vec, + /// Dependencies to install via uv + pub deps: Vec, + /// Date constraint for dependencies (ISO 8601 format) + pub max_dep_date: String, + /// Python version to use + pub python_version: PythonVersion, + /// Estimated benchmark costs for reference + pub cost: HashMap, +} + +impl RealWorldProject { + /// Create a new real-world project configuration + pub fn new(location: impl Into) -> Self { + Self { + location: location.into(), + commit: None, + paths: Vec::new(), + deps: Vec::new(), + max_dep_date: "2025-06-17".to_string(), // Default to today's date + python_version: PythonVersion::PY311, // Default Python version + cost: HashMap::new(), + } + } + + /// Set the specific commit to checkout + pub fn with_commit(mut self, commit: impl Into) -> Self { + self.commit = Some(commit.into()); + self + } + + /// Add paths to benchmark within the project + pub fn with_paths(mut self, paths: impl IntoIterator>) -> Self { + self.paths.extend(paths.into_iter().map(|p| p.into())); + self + } + + /// Add dependencies to install + pub fn with_deps(mut self, deps: impl IntoIterator>) -> Self { + self.deps.extend(deps.into_iter().map(|d| d.into())); + self + } + + /// Set the maximum date for dependencies + pub fn with_max_dep_date(mut self, date: impl Into) -> Self { + self.max_dep_date = date.into(); + self + } + + /// Add cost estimates + pub fn with_cost(mut self, tool: impl Into, cost: u32) -> Self { + self.cost.insert(tool.into(), cost); + self + } + + /// Set the Python version to use + pub fn with_python_version(mut self, version: PythonVersion) -> Self { + self.python_version = version; + self + } +} + +/// A setup real-world project ready for benchmarking +pub struct SetupProject { + /// Memory filesystem containing the project files + pub memory_fs: MemoryFileSystem, + /// Project configuration + pub config: RealWorldProject, +} + +impl SetupProject { + /// Get the memory filesystem for benchmarking + pub fn memory_fs(&self) -> &MemoryFileSystem { + &self.memory_fs + } + + /// Get the project configuration + pub fn config(&self) -> &RealWorldProject { + &self.config + } + + /// Get the benchmark paths as SystemPathBuf + pub fn benchmark_paths(&self) -> Vec { + self.config + .paths + .iter() + .map(|path| SystemPathBuf::from("src").join(path)) + .collect() + } +} + +/// Setup a real-world project for benchmarking +pub fn setup_real_world_project( + project: RealWorldProject, +) -> std::result::Result { + // Create temporary directory + let temp_dir = tempfile::TempDir::new().map_err(SetupError::TempDir)?; + let project_root = temp_dir.path().to_path_buf(); + + // Clone the repository + clone_repository(&project.location, &project_root, project.commit.as_deref())?; + + // Install dependencies if specified + if !project.deps.is_empty() { + install_dependencies( + &project_root, + &project.deps, + &project.max_dep_date, + project.python_version, + )?; + } + + // Load files into memory filesystem + let memory_fs = load_into_memory_fs(&project_root, &project.paths)?; + + Ok(SetupProject { + memory_fs, + config: project, + }) +} + +/// Clone a git repository to the specified directory +fn clone_repository( + repo_url: &str, + target_dir: &Path, + commit: Option<&str>, +) -> std::result::Result<(), SetupError> { + // Clone the repository + let output = Command::new("git") + .args(["clone", "--depth", "1", repo_url, "."]) + .current_dir(target_dir) + .output() + .map_err(SetupError::GitCommand)?; + + if !output.status.success() { + return Err(SetupError::GitClone { + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }); + } + + // Checkout specific commit if provided + if let Some(commit_hash) = commit { + // First, unshallow the repository to get all commits + let output = Command::new("git") + .args(["fetch", "--unshallow"]) + .current_dir(target_dir) + .output() + .map_err(SetupError::GitCommand)?; + + if !output.status.success() { + return Err(SetupError::GitFetch { + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }); + } + + // Checkout the specific commit + let output = Command::new("git") + .args(["checkout", commit_hash]) + .current_dir(target_dir) + .output() + .map_err(SetupError::GitCommand)?; + + if !output.status.success() { + return Err(SetupError::GitCheckout { + commit: commit_hash.to_string(), + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }); + } + } + + Ok(()) +} + +/// Install dependencies using uv with date constraints +fn install_dependencies( + project_root: &Path, + deps: &[String], + max_date: &str, + python_version: PythonVersion, +) -> std::result::Result<(), SetupError> { + // Check if uv is available + let uv_check = Command::new("uv") + .arg("--version") + .output() + .map_err(SetupError::UvCommand)?; + + if !uv_check.status.success() { + return Err(SetupError::UvNotFound); + } + + // Create an isolated virtual environment to avoid picking up ruff's pyproject.toml + let venv_path = project_root.join(".benchmark_venv"); + let python_version_str = python_version.to_string(); + let output = Command::new("uv") + .args(["venv", "--python", &python_version_str]) + .arg(&venv_path) + .current_dir(project_root) + .output() + .map_err(SetupError::UvCommand)?; + + if !output.status.success() { + return Err(SetupError::VenvCreation { + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }); + } + + // Create a requirements file with dependencies + let requirements_content = deps.join("\n"); + let requirements_path = project_root.join("benchmark_requirements.txt"); + std::fs::write(&requirements_path, requirements_content) + .map_err(SetupError::WriteRequirements)?; + + // Install dependencies with date constraint in the isolated environment + let mut cmd = Command::new("uv"); + cmd.args([ + "pip", + "install", + "--python", + venv_path.to_str().unwrap(), + "--requirement", + requirements_path.to_str().unwrap(), + ]); + + // Add date constraint if specified + if !max_date.is_empty() { + cmd.args(["--exclude-newer", max_date]); + } + + let output = cmd + .current_dir(project_root) + .output() + .map_err(SetupError::UvCommand)?; + + if !output.status.success() { + return Err(SetupError::DependencyInstall { + stderr: String::from_utf8_lossy(&output.stderr).to_string(), + }); + } + + Ok(()) +} + +/// Load project files into a MemoryFileSystem +fn load_into_memory_fs( + project_root: &Path, + benchmark_paths: &[String], +) -> std::result::Result { + let fs = MemoryFileSystem::new(); + + // Walk through each benchmark path and load Python files + for path in benchmark_paths { + let full_path = project_root.join(path); + if !full_path.exists() { + return Err(SetupError::PathNotFound(path.clone())); + } + + load_directory_recursive(&fs, &full_path, &SystemPathBuf::from("src").join(path))?; + } + + Ok(fs) +} + +/// Recursively load a directory into the memory filesystem +fn load_directory_recursive( + fs: &MemoryFileSystem, + source_path: &Path, + dest_path: &SystemPath, +) -> std::result::Result<(), SetupError> { + if source_path.is_file() { + // Load single file + if let Some(ext) = source_path.extension() { + if ext == "py" || ext == "pyi" { + let content = + std::fs::read_to_string(source_path).map_err(|e| SetupError::ReadFile { + path: source_path.to_path_buf(), + error: e, + })?; + if let Err(e) = fs.write_file_all(dest_path.to_path_buf(), content) { + return Err(SetupError::WriteMemoryFs(format!("{:?}", e))); + } + } + } + } else if source_path.is_dir() { + // Create directory in memory fs + if let Err(e) = fs.create_directory_all(dest_path.to_path_buf()) { + return Err(SetupError::WriteMemoryFs(format!("{:?}", e))); + } + + // Read directory contents + let entries = std::fs::read_dir(source_path).map_err(|e| SetupError::ReadDir { + path: source_path.to_path_buf(), + error: e, + })?; + + for entry in entries { + let entry = entry.map_err(|e| SetupError::ReadDir { + path: source_path.to_path_buf(), + error: e, + })?; + + let file_name = entry.file_name(); + let source_child = source_path.join(&file_name); + let dest_child = dest_path.join(file_name.to_string_lossy().as_ref()); + + // Skip hidden files and common non-Python directories + let file_name_str = file_name.to_string_lossy(); + if file_name_str.starts_with('.') + || file_name_str == "__pycache__" + || file_name_str == "node_modules" + || file_name_str == ".git" + { + continue; + } + + load_directory_recursive(fs, &source_child, &dest_child)?; + } + } + + Ok(()) +} + +/// Errors that can occur during project setup +#[derive(Debug)] +pub enum SetupError { + TempDir(std::io::Error), + GitCommand(std::io::Error), + GitClone { + stderr: String, + }, + GitFetch { + stderr: String, + }, + GitCheckout { + commit: String, + stderr: String, + }, + UvCommand(std::io::Error), + UvNotFound, + VenvCreation { + stderr: String, + }, + WriteRequirements(std::io::Error), + DependencyInstall { + stderr: String, + }, + PathNotFound(String), + ReadFile { + path: std::path::PathBuf, + error: std::io::Error, + }, + ReadDir { + path: std::path::PathBuf, + error: std::io::Error, + }, + WriteMemoryFs(String), +} + +impl std::fmt::Display for SetupError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SetupError::TempDir(e) => write!(f, "Failed to create temporary directory: {}", e), + SetupError::GitCommand(e) => write!(f, "Failed to execute git command: {}", e), + SetupError::GitClone { stderr } => write!(f, "Git clone failed: {}", stderr), + SetupError::GitFetch { stderr } => write!(f, "Git fetch failed: {}", stderr), + SetupError::GitCheckout { commit, stderr } => { + write!(f, "Git checkout of commit {} failed: {}", commit, stderr) + } + SetupError::UvCommand(e) => write!(f, "Failed to execute uv command: {}", e), + SetupError::UvNotFound => write!(f, "uv is not installed or not found in PATH"), + SetupError::VenvCreation { stderr } => { + write!(f, "Failed to create virtual environment: {}", stderr) + } + SetupError::WriteRequirements(e) => { + write!(f, "Failed to write requirements file: {}", e) + } + SetupError::DependencyInstall { stderr } => { + write!(f, "Dependency installation failed: {}", stderr) + } + SetupError::PathNotFound(path) => write!(f, "Benchmark path not found: {}", path), + SetupError::ReadFile { path, error } => { + write!(f, "Failed to read file {}: {}", path.display(), error) + } + SetupError::ReadDir { path, error } => { + write!(f, "Failed to read directory {}: {}", path.display(), error) + } + SetupError::WriteMemoryFs(e) => { + write!(f, "Failed to write to memory filesystem: {}", e) + } + } + } +} + +impl std::error::Error for SetupError {} + +/// Pre-defined real-world projects for benchmarking +pub mod projects { + use super::*; + + /// The colour-science/colour project + pub fn colour_science() -> RealWorldProject { + RealWorldProject::new("https://github.com/colour-science/colour") + .with_paths(["colour"]) + .with_deps([ + "matplotlib", + "numpy", + "pandas-stubs", + "pytest", + "scipy-stubs", + ]) + .with_max_dep_date("2025-06-17") + .with_python_version(PythonVersion::PY311) + .with_cost("mypy", 800) + .with_cost("pyright", 180) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_real_world_project_builder() { + let project = RealWorldProject::new("https://github.com/example/repo") + .with_commit("abc123") + .with_paths(["src", "tests"]) + .with_deps(["requests", "pytest"]) + .with_max_dep_date("2024-01-01") + .with_cost("mypy", 100); + + assert_eq!(project.location, "https://github.com/example/repo"); + assert_eq!(project.commit, Some("abc123".to_string())); + assert_eq!(project.paths, vec!["src", "tests"]); + assert_eq!(project.deps, vec!["requests", "pytest"]); + assert_eq!(project.max_dep_date, "2024-01-01"); + assert_eq!(project.cost.get("mypy"), Some(&100)); + } +} diff --git a/crates/ruff_db/src/system/test.rs b/crates/ruff_db/src/system/test.rs index 943469127c92f..cfdf204bb0b94 100644 --- a/crates/ruff_db/src/system/test.rs +++ b/crates/ruff_db/src/system/test.rs @@ -280,6 +280,13 @@ impl InMemorySystem { } } + pub fn from_memory_fs(memory_fs: MemoryFileSystem) -> Self { + Self { + user_config_directory: Mutex::new(None), + memory_fs, + } + } + pub fn fs(&self) -> &MemoryFileSystem { &self.memory_fs } diff --git a/crates/ty_project/src/metadata.rs b/crates/ty_project/src/metadata.rs index 47896e503113d..2c280a15a5b1a 100644 --- a/crates/ty_project/src/metadata.rs +++ b/crates/ty_project/src/metadata.rs @@ -17,7 +17,7 @@ pub mod pyproject; pub mod settings; pub mod value; -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(test, derive(serde::Serialize))] pub struct ProjectMetadata { pub(super) name: Name, From e62bf33a3003e778b13ae4ba8ff1f830068507b7 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 09:12:56 +0200 Subject: [PATCH 02/28] Add uv --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a18c5e393918a..f73aca898e961 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -905,6 +905,7 @@ jobs: persist-credentials: false - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 + - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0 - name: "Install Rust toolchain" run: rustup show From 6b0f777d61bb8e868b0fba2e9d1cf7fb9448b9b4 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 10:43:41 +0200 Subject: [PATCH 03/28] Move to separate file, allow concurrent checking --- Cargo.lock | 2 +- crates/ruff_benchmark/Cargo.toml | 6 +- crates/ruff_benchmark/benches/ty.rs | 85 +-- crates/ruff_benchmark/benches/ty_project.rs | 93 ++++ .../ruff_benchmark/src/real_world_projects.rs | 500 +++++++----------- 5 files changed, 295 insertions(+), 391 deletions(-) create mode 100644 crates/ruff_benchmark/benches/ty_project.rs diff --git a/Cargo.lock b/Cargo.lock index 545c94c455f3f..eed806d5122b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2586,6 +2586,7 @@ dependencies = [ name = "ruff_benchmark" version = "0.0.0" dependencies = [ + "anyhow", "codspeed-criterion-compat", "criterion", "mimalloc", @@ -2597,7 +2598,6 @@ dependencies = [ "ruff_python_parser", "ruff_python_trivia", "rustc-hash 2.1.1", - "tempfile", "tikv-jemallocator", "ty_project", ] diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index bf8a7a9b1cb42..8a03a2c1a5c7a 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -36,6 +36,10 @@ harness = false name = "ty" harness = false +[[bench]] +name = "ty_project" +harness = false + [dependencies] codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } criterion = { workspace = true, default-features = false } @@ -43,7 +47,7 @@ rayon = { workspace = true } rustc-hash = { workspace = true } ruff_db = { workspace = true } ruff_python_ast = { workspace = true } -tempfile = { workspace = true } +anyhow = { workspace = true } [dev-dependencies] ruff_linter = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 9af10a7cd8824..49399b7db9e47 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -8,12 +8,10 @@ use rayon::ThreadPoolBuilder; use rustc_hash::FxHashSet; use ruff_benchmark::TestFile; -use ruff_benchmark::real_world_projects::{projects, setup_real_world_project}; -use ruff_db::Db as _; use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity}; use ruff_db::files::{File, system_path_to_file}; use ruff_db::source::source_text; -use ruff_db::system::{InMemorySystem, MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; +use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::RangedValue; @@ -349,87 +347,10 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { }); } -fn benchmark_real_world_colour_science(criterion: &mut Criterion) { - setup_rayon(); - - // Setup the colour-science project (expensive, done once) - let project = projects::colour_science(); - let setup_project = - setup_real_world_project(project).expect("Failed to setup colour-science project"); - - // Create system and metadata (expensive, done once) - let fs = setup_project.memory_fs().clone(); - let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); - - let src_root = SystemPath::new("/src"); - let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); - - metadata.apply_options(Options { - environment: Some(EnvironmentOptions { - python_version: Some(RangedValue::cli(setup_project.config.python_version)), - ..EnvironmentOptions::default() - }), - ..Options::default() - }); - - let benchmark_paths = setup_project.benchmark_paths(); - - fn setup( - metadata: &ProjectMetadata, - system: &TestSystem, - benchmark_paths: &[SystemPathBuf], - ) -> ProjectDatabase { - // Create new database instance and collect files for this instance - let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); - - let mut files_to_open = FxHashSet::default(); - for path in benchmark_paths { - collect_python_files(&db, path, &mut files_to_open); - } - - db.project().set_open_files(&mut db, files_to_open.clone()); - - db - } - - fn check_project(db: &mut ProjectDatabase) { - let result = db.check(); - // Don't assert specific diagnostic count for real-world projects - // as they may have legitimate type issues - let _ = result.len(); - } - - criterion.bench_function("ty_real_world[colour_science]", |b| { - b.iter_batched_ref( - || setup(&metadata, &system, &benchmark_paths), - check_project, - BatchSize::SmallInput, - ); - }); -} - -/// Collect all Python files in a directory for opening in the database -fn collect_python_files(db: &ProjectDatabase, path: &SystemPath, files: &mut FxHashSet) { - if let Ok(file) = system_path_to_file(db, path.to_path_buf()) { - if path.as_str().ends_with(".py") || path.as_str().ends_with(".pyi") { - files.insert(file); - } - } - - if let Ok(entries) = db.system().read_directory(path) { - for entry in entries { - if let Ok(dir_entry) = entry { - collect_python_files(db, dir_entry.path(), files); - } - } - } -} - criterion_group!(check_file, benchmark_cold, benchmark_incremental); criterion_group!( micro, benchmark_many_string_assignments, - benchmark_many_tuple_assignments + benchmark_many_tuple_assignments, ); -criterion_group!(real_world, benchmark_real_world_colour_science); -criterion_main!(check_file, micro, real_world); +criterion_main!(check_file, micro); diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs new file mode 100644 index 0000000000000..54ccc8aeefbc9 --- /dev/null +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -0,0 +1,93 @@ +#![allow(clippy::disallowed_names)] +use ruff_benchmark::criterion; + +use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; +use ruff_benchmark::real_world_projects::RealWorldProject; +use ruff_db::system::{InMemorySystem, SystemPath, TestSystem}; +use ruff_python_ast::PythonVersion; +use ty_project::metadata::options::{EnvironmentOptions, Options}; +use ty_project::metadata::value::{RangedValue, RelativePathBuf}; +use ty_project::{Db, ProjectDatabase, ProjectMetadata}; + +fn bench_project(project: RealWorldProject, criterion: &mut Criterion) { + let setup_project = project + .setup() + .expect("Failed to setup colour-science project"); + + // Create system and metadata (expensive, done once) + let fs = setup_project.memory_fs().clone(); + let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); + + let src_root = SystemPath::new("/"); + let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(setup_project.config.python_version)), + python: Some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let check_paths = setup_project.check_paths(); + + fn setup( + metadata: &ProjectMetadata, + system: &TestSystem, + check_paths: &[&SystemPath], + ) -> ProjectDatabase { + // Create new database instance and collect files for this instance + let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); + + db.project().set_included_paths( + &mut db, + check_paths + .into_iter() + .map(|path| path.to_path_buf()) + .collect(), + ); + db + } + + fn check_project(db: &mut ProjectDatabase) { + let result = db.check(); + // Don't assert specific diagnostic count for real-world projects + // as they may have legitimate type issues + let diagnostics = result.len(); + + assert!(diagnostics > 1 && diagnostics <= 477); + } + + criterion.bench_function(&setup_project.config.name, |b| { + b.iter_batched_ref( + || setup(&metadata, &system, &check_paths), + check_project, + BatchSize::SmallInput, + ); + }); +} + +fn benchmark_real_world_colour_science(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "colour-science", + location: "https://github.com/colour-science/colour", + commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", + paths: &[SystemPath::new("colour")], + dependencies: &[ + "matplotlib", + "numpy", + "pandas-stubs", + "pytest", + "scipy-stubs", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY311, + }; + + bench_project(project, criterion); +} + +criterion_group!(real_world, benchmark_real_world_colour_science); +criterion_main!(real_world); diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index 572a8b71f6d0a..6e664339b8cf8 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -3,234 +3,243 @@ //! This module provides functionality to: //! 1. Clone external repositories to temporary directories //! 2. Install dependencies using uv with date constraints -//! 3. Load project files into MemoryFileSystem for benchmarking +//! 3. Load project files into `MemoryFileSystem` for benchmarking -use std::collections::HashMap; -use std::path::Path; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; use std::process::Command; +use anyhow::{Context, Result}; use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf}; use ruff_python_ast::PythonVersion; /// Configuration for a real-world project to benchmark #[derive(Debug, Clone)] -pub struct RealWorldProject { +pub struct RealWorldProject<'a> { + pub name: &'a str, + /// Git repository URL - pub location: String, - /// Specific commit hash to checkout (optional, uses latest if None) - pub commit: Option, + pub location: &'a str, + /// Specific commit hash to checkout + pub commit: &'a str, /// List of paths within the project to check - pub paths: Vec, + pub paths: &'a [&'a SystemPath], /// Dependencies to install via uv - pub deps: Vec, + pub dependencies: &'a [&'a str], /// Date constraint for dependencies (ISO 8601 format) - pub max_dep_date: String, + pub max_dep_date: &'a str, /// Python version to use pub python_version: PythonVersion, - /// Estimated benchmark costs for reference - pub cost: HashMap, } -impl RealWorldProject { - /// Create a new real-world project configuration - pub fn new(location: impl Into) -> Self { - Self { - location: location.into(), - commit: None, - paths: Vec::new(), - deps: Vec::new(), - max_dep_date: "2025-06-17".to_string(), // Default to today's date - python_version: PythonVersion::PY311, // Default Python version - cost: HashMap::new(), +impl<'a> RealWorldProject<'a> { + /// Setup a real-world project for benchmarking + pub fn setup(self) -> Result> { + // Create project directory in cargo target + let project_root = get_project_cache_dir(self.name)?; + + // Clone the repository if it doesn't exist, or update if it does + if project_root.exists() { + update_repository(&project_root, self.commit)?; + } else { + clone_repository(self.location, &project_root, self.commit)?; } - } - /// Set the specific commit to checkout - pub fn with_commit(mut self, commit: impl Into) -> Self { - self.commit = Some(commit.into()); - self - } + let checkout = Checkout { + path: project_root, + project: self, + }; - /// Add paths to benchmark within the project - pub fn with_paths(mut self, paths: impl IntoIterator>) -> Self { - self.paths.extend(paths.into_iter().map(|p| p.into())); - self - } + // Install dependencies if specified + if !checkout.project().dependencies.is_empty() { + install_dependencies(&checkout)?; + } - /// Add dependencies to install - pub fn with_deps(mut self, deps: impl IntoIterator>) -> Self { - self.deps.extend(deps.into_iter().map(|d| d.into())); - self - } + // Load files into memory filesystem + let memory_fs = load_into_memory_fs(&checkout.path)?; - /// Set the maximum date for dependencies - pub fn with_max_dep_date(mut self, date: impl Into) -> Self { - self.max_dep_date = date.into(); - self + Ok(SetupProject { + path: checkout.path, + memory_fs, + config: checkout.project, + }) } +} - /// Add cost estimates - pub fn with_cost(mut self, tool: impl Into, cost: u32) -> Self { - self.cost.insert(tool.into(), cost); - self +struct Checkout<'a> { + project: RealWorldProject<'a>, + path: PathBuf, +} + +impl<'a> Checkout<'a> { + /// Get the virtual environment path + fn venv_path(&self) -> PathBuf { + self.path.join(".venv") } - /// Set the Python version to use - pub fn with_python_version(mut self, version: PythonVersion) -> Self { - self.python_version = version; - self + fn project(&self) -> &RealWorldProject<'a> { + &self.project } } /// A setup real-world project ready for benchmarking -pub struct SetupProject { +pub struct SetupProject<'a> { + /// Path to the cloned project + pub path: PathBuf, /// Memory filesystem containing the project files pub memory_fs: MemoryFileSystem, /// Project configuration - pub config: RealWorldProject, + pub config: RealWorldProject<'a>, } -impl SetupProject { +impl<'a> SetupProject<'a> { /// Get the memory filesystem for benchmarking pub fn memory_fs(&self) -> &MemoryFileSystem { &self.memory_fs } /// Get the project configuration - pub fn config(&self) -> &RealWorldProject { + pub fn config(&self) -> &RealWorldProject<'a> { &self.config } - /// Get the benchmark paths as SystemPathBuf - pub fn benchmark_paths(&self) -> Vec { - self.config - .paths - .iter() - .map(|path| SystemPathBuf::from("src").join(path)) - .collect() + /// Get the benchmark paths as `SystemPathBuf` + pub fn check_paths(&self) -> &'a [&'a SystemPath] { + self.config.paths + } + + /// Get the virtual environment path + pub fn venv_path(&self) -> PathBuf { + self.path.join(".venv") } } -/// Setup a real-world project for benchmarking -pub fn setup_real_world_project( - project: RealWorldProject, -) -> std::result::Result { - // Create temporary directory - let temp_dir = tempfile::TempDir::new().map_err(SetupError::TempDir)?; - let project_root = temp_dir.path().to_path_buf(); +/// Get the cache directory for a project in the cargo target directory +fn get_project_cache_dir(project_name: &str) -> Result { + // Create cache directory in target + let target_dir = std::env::var("CARGO_TARGET_DIR") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from("target")); - // Clone the repository - clone_repository(&project.location, &project_root, project.commit.as_deref())?; - - // Install dependencies if specified - if !project.deps.is_empty() { - install_dependencies( - &project_root, - &project.deps, - &project.max_dep_date, - project.python_version, - )?; + let cache_dir = target_dir.join("benchmark_cache").join(project_name); + + if let Some(parent) = cache_dir.parent() { + std::fs::create_dir_all(parent).context("Failed to create cache directory")?; + } + + Ok(cache_dir) +} + +/// Update an existing repository +fn update_repository(project_root: &Path, commit: &str) -> Result<()> { + // Fetch latest changes + let output = Command::new("git") + .args(["fetch", "origin"]) + .current_dir(project_root) + .output() + .context("Failed to execute git fetch command")?; + + if !output.status.success() { + anyhow::bail!( + "Git fetch failed: {}", + String::from_utf8_lossy(&output.stderr) + ); } - // Load files into memory filesystem - let memory_fs = load_into_memory_fs(&project_root, &project.paths)?; + // Checkout specific commit + let target = commit; + let output = Command::new("git") + .args(["reset", "--hard", target]) + .current_dir(project_root) + .output() + .context("Failed to execute git reset command")?; + + if !output.status.success() { + anyhow::bail!( + "Git checkout of commit {} failed: {}", + target, + String::from_utf8_lossy(&output.stderr) + ); + } - Ok(SetupProject { - memory_fs, - config: project, - }) + Ok(()) } /// Clone a git repository to the specified directory -fn clone_repository( - repo_url: &str, - target_dir: &Path, - commit: Option<&str>, -) -> std::result::Result<(), SetupError> { +fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<()> { + // Create parent directory if it doesn't exist + if let Some(parent) = target_dir.parent() { + std::fs::create_dir_all(parent).context("Failed to create parent directory for clone")?; + } + // Clone the repository let output = Command::new("git") - .args(["clone", "--depth", "1", repo_url, "."]) - .current_dir(target_dir) + .args(["clone", repo_url, target_dir.to_str().unwrap()]) .output() - .map_err(SetupError::GitCommand)?; + .context("Failed to execute git clone command")?; if !output.status.success() { - return Err(SetupError::GitClone { - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }); + anyhow::bail!( + "Git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ); } - // Checkout specific commit if provided - if let Some(commit_hash) = commit { - // First, unshallow the repository to get all commits - let output = Command::new("git") - .args(["fetch", "--unshallow"]) - .current_dir(target_dir) - .output() - .map_err(SetupError::GitCommand)?; - - if !output.status.success() { - return Err(SetupError::GitFetch { - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }); - } - - // Checkout the specific commit - let output = Command::new("git") - .args(["checkout", commit_hash]) - .current_dir(target_dir) - .output() - .map_err(SetupError::GitCommand)?; + // Checkout specific commit + let output = Command::new("git") + .args(["checkout", commit]) + .current_dir(target_dir) + .output() + .context("Failed to execute git checkout command")?; - if !output.status.success() { - return Err(SetupError::GitCheckout { - commit: commit_hash.to_string(), - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }); - } + if !output.status.success() { + anyhow::bail!( + "Git checkout of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); } Ok(()) } /// Install dependencies using uv with date constraints -fn install_dependencies( - project_root: &Path, - deps: &[String], - max_date: &str, - python_version: PythonVersion, -) -> std::result::Result<(), SetupError> { +fn install_dependencies(checkout: &Checkout) -> Result<()> { // Check if uv is available let uv_check = Command::new("uv") .arg("--version") .output() - .map_err(SetupError::UvCommand)?; + .context("Failed to execute uv version check")?; if !uv_check.status.success() { - return Err(SetupError::UvNotFound); + anyhow::bail!("uv is not installed or not found in PATH"); } // Create an isolated virtual environment to avoid picking up ruff's pyproject.toml - let venv_path = project_root.join(".benchmark_venv"); - let python_version_str = python_version.to_string(); - let output = Command::new("uv") - .args(["venv", "--python", &python_version_str]) - .arg(&venv_path) - .current_dir(project_root) - .output() - .map_err(SetupError::UvCommand)?; + let venv_path = checkout.venv_path(); + let python_version_str = checkout.project().python_version.to_string(); + + // Only create venv if it doesn't exist + if !venv_path.exists() { + let output = Command::new("uv") + .args(["venv", "--python", &python_version_str]) + .arg(&venv_path) + .output() + .context("Failed to execute uv venv command")?; - if !output.status.success() { - return Err(SetupError::VenvCreation { - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }); + if !output.status.success() { + anyhow::bail!( + "Failed to create virtual environment: {}", + String::from_utf8_lossy(&output.stderr) + ); + } } // Create a requirements file with dependencies - let requirements_content = deps.join("\n"); - let requirements_path = project_root.join("benchmark_requirements.txt"); + let requirements_content = checkout.project().dependencies.join("\n"); + let requirements_path = checkout.path.join("benchmark_requirements.txt"); std::fs::write(&requirements_path, requirements_content) - .map_err(SetupError::WriteRequirements)?; + .context("Failed to write requirements file")?; // Install dependencies with date constraint in the isolated environment let mut cmd = Command::new("uv"); @@ -244,40 +253,28 @@ fn install_dependencies( ]); // Add date constraint if specified - if !max_date.is_empty() { - cmd.args(["--exclude-newer", max_date]); - } + + cmd.args(["--exclude-newer", &checkout.project().max_dep_date]); let output = cmd - .current_dir(project_root) .output() - .map_err(SetupError::UvCommand)?; + .context("Failed to execute uv pip install command")?; if !output.status.success() { - return Err(SetupError::DependencyInstall { - stderr: String::from_utf8_lossy(&output.stderr).to_string(), - }); + anyhow::bail!( + "Dependency installation failed: {}", + String::from_utf8_lossy(&output.stderr) + ); } Ok(()) } -/// Load project files into a MemoryFileSystem -fn load_into_memory_fs( - project_root: &Path, - benchmark_paths: &[String], -) -> std::result::Result { +/// Load project files into a `MemoryFileSystem` +fn load_into_memory_fs(path: &Path) -> Result { let fs = MemoryFileSystem::new(); - // Walk through each benchmark path and load Python files - for path in benchmark_paths { - let full_path = project_root.join(path); - if !full_path.exists() { - return Err(SetupError::PathNotFound(path.clone())); - } - - load_directory_recursive(&fs, &full_path, &SystemPathBuf::from("src").join(path))?; - } + load_directory_recursive(&fs, path, &SystemPathBuf::from("/"))?; Ok(fs) } @@ -287,37 +284,43 @@ fn load_directory_recursive( fs: &MemoryFileSystem, source_path: &Path, dest_path: &SystemPath, -) -> std::result::Result<(), SetupError> { +) -> Result<()> { if source_path.is_file() { - // Load single file - if let Some(ext) = source_path.extension() { - if ext == "py" || ext == "pyi" { - let content = - std::fs::read_to_string(source_path).map_err(|e| SetupError::ReadFile { - path: source_path.to_path_buf(), - error: e, + if source_path.file_name().and_then(OsStr::to_str) == Some("pyvenv.cfg") { + // Skip pyenv.cfg files because the Python path will be invalid. + return Ok(()); + } + + match std::fs::read_to_string(source_path) { + Ok(content) => { + fs.write_file_all(dest_path.to_path_buf(), content) + .with_context(|| { + format!("Failed to write file to memory filesystem: {dest_path}") })?; - if let Err(e) = fs.write_file_all(dest_path.to_path_buf(), content) { - return Err(SetupError::WriteMemoryFs(format!("{:?}", e))); + } + Err(error) => { + if error.kind() == std::io::ErrorKind::InvalidData { + // Skip non UTF-8 files + return Ok(()); } + return Err(error) + .with_context(|| format!("Failed to read file: {}", source_path.display())); } } } else if source_path.is_dir() { // Create directory in memory fs - if let Err(e) = fs.create_directory_all(dest_path.to_path_buf()) { - return Err(SetupError::WriteMemoryFs(format!("{:?}", e))); - } + fs.create_directory_all(dest_path.to_path_buf()) + .with_context(|| { + format!("Failed to create directory in memory filesystem: {dest_path}") + })?; // Read directory contents - let entries = std::fs::read_dir(source_path).map_err(|e| SetupError::ReadDir { - path: source_path.to_path_buf(), - error: e, - })?; + let entries = std::fs::read_dir(source_path) + .with_context(|| format!("Failed to read directory: {}", source_path.display()))?; for entry in entries { - let entry = entry.map_err(|e| SetupError::ReadDir { - path: source_path.to_path_buf(), - error: e, + let entry = entry.with_context(|| { + format!("Failed to read directory entry: {}", source_path.display()) })?; let file_name = entry.file_name(); @@ -326,7 +329,7 @@ fn load_directory_recursive( // Skip hidden files and common non-Python directories let file_name_str = file_name.to_string_lossy(); - if file_name_str.starts_with('.') + if file_name != ".venv" && file_name_str.starts_with('.') || file_name_str == "__pycache__" || file_name_str == "node_modules" || file_name_str == ".git" @@ -340,120 +343,3 @@ fn load_directory_recursive( Ok(()) } - -/// Errors that can occur during project setup -#[derive(Debug)] -pub enum SetupError { - TempDir(std::io::Error), - GitCommand(std::io::Error), - GitClone { - stderr: String, - }, - GitFetch { - stderr: String, - }, - GitCheckout { - commit: String, - stderr: String, - }, - UvCommand(std::io::Error), - UvNotFound, - VenvCreation { - stderr: String, - }, - WriteRequirements(std::io::Error), - DependencyInstall { - stderr: String, - }, - PathNotFound(String), - ReadFile { - path: std::path::PathBuf, - error: std::io::Error, - }, - ReadDir { - path: std::path::PathBuf, - error: std::io::Error, - }, - WriteMemoryFs(String), -} - -impl std::fmt::Display for SetupError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SetupError::TempDir(e) => write!(f, "Failed to create temporary directory: {}", e), - SetupError::GitCommand(e) => write!(f, "Failed to execute git command: {}", e), - SetupError::GitClone { stderr } => write!(f, "Git clone failed: {}", stderr), - SetupError::GitFetch { stderr } => write!(f, "Git fetch failed: {}", stderr), - SetupError::GitCheckout { commit, stderr } => { - write!(f, "Git checkout of commit {} failed: {}", commit, stderr) - } - SetupError::UvCommand(e) => write!(f, "Failed to execute uv command: {}", e), - SetupError::UvNotFound => write!(f, "uv is not installed or not found in PATH"), - SetupError::VenvCreation { stderr } => { - write!(f, "Failed to create virtual environment: {}", stderr) - } - SetupError::WriteRequirements(e) => { - write!(f, "Failed to write requirements file: {}", e) - } - SetupError::DependencyInstall { stderr } => { - write!(f, "Dependency installation failed: {}", stderr) - } - SetupError::PathNotFound(path) => write!(f, "Benchmark path not found: {}", path), - SetupError::ReadFile { path, error } => { - write!(f, "Failed to read file {}: {}", path.display(), error) - } - SetupError::ReadDir { path, error } => { - write!(f, "Failed to read directory {}: {}", path.display(), error) - } - SetupError::WriteMemoryFs(e) => { - write!(f, "Failed to write to memory filesystem: {}", e) - } - } - } -} - -impl std::error::Error for SetupError {} - -/// Pre-defined real-world projects for benchmarking -pub mod projects { - use super::*; - - /// The colour-science/colour project - pub fn colour_science() -> RealWorldProject { - RealWorldProject::new("https://github.com/colour-science/colour") - .with_paths(["colour"]) - .with_deps([ - "matplotlib", - "numpy", - "pandas-stubs", - "pytest", - "scipy-stubs", - ]) - .with_max_dep_date("2025-06-17") - .with_python_version(PythonVersion::PY311) - .with_cost("mypy", 800) - .with_cost("pyright", 180) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_real_world_project_builder() { - let project = RealWorldProject::new("https://github.com/example/repo") - .with_commit("abc123") - .with_paths(["src", "tests"]) - .with_deps(["requests", "pytest"]) - .with_max_dep_date("2024-01-01") - .with_cost("mypy", 100); - - assert_eq!(project.location, "https://github.com/example/repo"); - assert_eq!(project.commit, Some("abc123".to_string())); - assert_eq!(project.paths, vec!["src", "tests"]); - assert_eq!(project.deps, vec!["requests", "pytest"]); - assert_eq!(project.max_dep_date, "2024-01-01"); - assert_eq!(project.cost.get("mypy"), Some(&100)); - } -} From 7d587207cbb6674a35a334db4f64246c8966496d Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 11:22:53 +0200 Subject: [PATCH 04/28] Add different projects --- crates/ruff_benchmark/benches/ty_project.rs | 100 +++++++++++++++++--- 1 file changed, 89 insertions(+), 11 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs index 54ccc8aeefbc9..7a9bd2d2b51f2 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -1,4 +1,5 @@ #![allow(clippy::disallowed_names)] +use rayon::ThreadPoolBuilder; use ruff_benchmark::criterion; use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; @@ -9,10 +10,11 @@ use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; -fn bench_project(project: RealWorldProject, criterion: &mut Criterion) { - let setup_project = project - .setup() - .expect("Failed to setup colour-science project"); +#[track_caller] +fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { + setup_rayon(); + + let setup_project = project.setup().expect("Failed to setup project"); // Create system and metadata (expensive, done once) let fs = setup_project.memory_fs().clone(); @@ -50,25 +52,25 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion) { db } - fn check_project(db: &mut ProjectDatabase) { + fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { let result = db.check(); // Don't assert specific diagnostic count for real-world projects // as they may have legitimate type issues let diagnostics = result.len(); - assert!(diagnostics > 1 && diagnostics <= 477); + assert!(diagnostics > 1 && diagnostics <= max_diagnostics); } criterion.bench_function(&setup_project.config.name, |b| { b.iter_batched_ref( || setup(&metadata, &system, &check_paths), - check_project, + |db| check_project(db, max_diagnostics), BatchSize::SmallInput, ); }); } -fn benchmark_real_world_colour_science(criterion: &mut Criterion) { +fn colour_science(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "colour-science", @@ -83,11 +85,87 @@ fn benchmark_real_world_colour_science(criterion: &mut Criterion) { "scipy-stubs", ], max_dep_date: "2025-06-17", - python_version: PythonVersion::PY311, + python_version: PythonVersion::PY310, + }; + + bench_project(project, criterion, 477); +} + +fn pydantic(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "pydantic", + location: "https://github.com/pydantic/pydantic", + commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", + paths: &[SystemPath::new("pydantic")], + dependencies: &[ + "annotated-types", + "pydantic-core", + "typing-extensions", + "typing-inspection", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY39, + }; + + bench_project(project, criterion, 1000); +} + +fn freqtrade(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "freqtrade", + location: "https://github.com/freqtrade/freqtrade", + commit: "2d842ea129e56575852ee0c45383c8c3f706be19", + paths: &[SystemPath::new("freqtrade")], + dependencies: &[ + "numpy", + "pandas-stubs", + "pydantic", + "sqlalchemy", + "types-cachetools", + "types-filelock", + "types-python-dateutil", + "types-requests", + "types-tabulate", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }; + + bench_project(project, criterion, 10000); +} + +fn hydra(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "hydra-zen", + location: "https://github.com/mit-ll-responsible-ai/hydra-zen", + commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", + paths: &[SystemPath::new("src")], + dependencies: &["pydantic", "beartype", "hydra-core"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, }; - bench_project(project, criterion); + bench_project(project, criterion, 100000); +} + +static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); + +fn setup_rayon() { + // Initialize the rayon thread pool outside the benchmark because it has a significant cost. + // We limit the thread pool to only one (the current thread) because we're focused on + // where ty spends time and less about how well the code runs concurrently. + // We might want to add a benchmark focusing on concurrency to detect congestion in the future. + RAYON_INITIALIZED.call_once(|| { + ThreadPoolBuilder::new() + .num_threads(1) + .use_current_thread() + .build_global() + .unwrap(); + }); } -criterion_group!(real_world, benchmark_real_world_colour_science); +criterion_group!(real_world, pydantic, freqtrade, hydra); criterion_main!(real_world); From 35d0e866aff17ff4545e1c94c8d3df723c1909f2 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 12:47:32 +0200 Subject: [PATCH 05/28] Use different projects --- Cargo.lock | 2 + crates/ruff_benchmark/Cargo.toml | 2 + crates/ruff_benchmark/benches/ty_project.rs | 48 ++++++++++++++++++- .../ruff_benchmark/src/real_world_projects.rs | 36 +++++++++++--- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eed806d5122b5..87e7aaba379d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2598,6 +2598,8 @@ dependencies = [ "ruff_python_parser", "ruff_python_trivia", "rustc-hash 2.1.1", + "serde", + "serde_json", "tikv-jemallocator", "ty_project", ] diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index 8a03a2c1a5c7a..e9a652a653676 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -43,6 +43,8 @@ harness = false [dependencies] codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } criterion = { workspace = true, default-features = false } +serde = { workspace = true } +serde_json = { workspace = true } rayon = { workspace = true } rustc-hash = { workspace = true } ruff_db = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs index 7a9bd2d2b51f2..63726a0914570 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -26,7 +26,8 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn metadata.apply_options(Options { environment: Some(EnvironmentOptions { python_version: Some(RangedValue::cli(setup_project.config.python_version)), - python: Some(RelativePathBuf::cli(SystemPath::new(".venv"))), + python: (!setup_project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), ..EnvironmentOptions::default() }), ..Options::default() @@ -70,6 +71,7 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn }); } +#[cfg(not(feature = "codspeed"))] fn colour_science(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { @@ -111,6 +113,7 @@ fn pydantic(criterion: &mut Criterion) { bench_project(project, criterion, 1000); } +#[cfg(not(feature = "codspeed"))] fn freqtrade(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { @@ -151,6 +154,36 @@ fn hydra(criterion: &mut Criterion) { bench_project(project, criterion, 100000); } +fn attrs(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "attrs", + location: "https://github.com/python-attrs/attrs", + commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", + paths: &[SystemPath::new("src")], + dependencies: &[], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }; + + bench_project(project, criterion, 100000); +} + +fn anyio(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "anyio", + location: "https://github.com/agronholm/anyio", + commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", + paths: &[SystemPath::new("src")], + dependencies: &[], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }; + + bench_project(project, criterion, 100000); +} + static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); fn setup_rayon() { @@ -167,5 +200,16 @@ fn setup_rayon() { }); } -criterion_group!(real_world, pydantic, freqtrade, hydra); +#[cfg(feature = "codspeed")] +criterion_group!(real_world, anyio, attrs, pydantic, hydra); +#[cfg(not(feature = "codspeed"))] +criterion_group!( + real_world, + anyio, + attrs, + colour_science, + freqtrade, + hydra, + pydantic +); criterion_main!(real_world); diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index 6e664339b8cf8..1ccb4d153cf71 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -104,7 +104,7 @@ impl<'a> SetupProject<'a> { } /// Get the benchmark paths as `SystemPathBuf` - pub fn check_paths(&self) -> &'a [&'a SystemPath] { + pub fn check_paths(&self) -> &'a [&SystemPath] { self.config.paths } @@ -116,11 +116,9 @@ impl<'a> SetupProject<'a> { /// Get the cache directory for a project in the cargo target directory fn get_project_cache_dir(project_name: &str) -> Result { - // Create cache directory in target - let target_dir = std::env::var("CARGO_TARGET_DIR") - .map(std::path::PathBuf::from) - .unwrap_or_else(|_| std::path::PathBuf::from("target")); - + let target_dir = cargo_target_directory() + .cloned() + .unwrap_or_else(|| PathBuf::from("target")); let cache_dir = target_dir.join("benchmark_cache").join(project_name); if let Some(parent) = cache_dir.parent() { @@ -254,7 +252,7 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { // Add date constraint if specified - cmd.args(["--exclude-newer", &checkout.project().max_dep_date]); + cmd.args(["--exclude-newer", checkout.project().max_dep_date]); let output = cmd .output() @@ -343,3 +341,27 @@ fn load_directory_recursive( Ok(()) } + +static CARGO_TARGET_DIR: std::sync::OnceLock> = std::sync::OnceLock::new(); + +fn cargo_target_directory() -> Option<&'static PathBuf> { + CARGO_TARGET_DIR + .get_or_init(|| { + #[derive(serde::Deserialize)] + struct Metadata { + target_directory: PathBuf, + } + + std::env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .or_else(|| { + let output = Command::new(std::env::var_os("CARGO")?) + .args(["metadata", "--format-version", "1"]) + .output() + .ok()?; + let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?; + Some(metadata.target_directory) + }) + }) + .as_ref() +} From c2acebf66e1c8592b8fa472b8e771f7a6fb6e866 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 13:32:35 +0200 Subject: [PATCH 06/28] Add some logging --- crates/ruff_benchmark/benches/ty_project.rs | 19 ++-- .../ruff_benchmark/src/real_world_projects.rs | 99 +++++++++++-------- 2 files changed, 70 insertions(+), 48 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs index 63726a0914570..6b796b627dc17 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -1,4 +1,6 @@ #![allow(clippy::disallowed_names)] +use std::time::Instant; + use rayon::ThreadPoolBuilder; use ruff_benchmark::criterion; @@ -11,12 +13,15 @@ use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; #[track_caller] +#[allow(clippy::print_stderr)] fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { setup_rayon(); + let start = Instant::now(); + eprintln!("Setting up project {}", project.name); let setup_project = project.setup().expect("Failed to setup project"); + eprintln!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); - // Create system and metadata (expensive, done once) let fs = setup_project.memory_fs().clone(); let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); @@ -76,7 +81,7 @@ fn colour_science(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "colour-science", - location: "https://github.com/colour-science/colour", + repository: "https://github.com/colour-science/colour", commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", paths: &[SystemPath::new("colour")], dependencies: &[ @@ -97,7 +102,7 @@ fn pydantic(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "pydantic", - location: "https://github.com/pydantic/pydantic", + repository: "https://github.com/pydantic/pydantic", commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", paths: &[SystemPath::new("pydantic")], dependencies: &[ @@ -118,7 +123,7 @@ fn freqtrade(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "freqtrade", - location: "https://github.com/freqtrade/freqtrade", + repository: "https://github.com/freqtrade/freqtrade", commit: "2d842ea129e56575852ee0c45383c8c3f706be19", paths: &[SystemPath::new("freqtrade")], dependencies: &[ @@ -143,7 +148,7 @@ fn hydra(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "hydra-zen", - location: "https://github.com/mit-ll-responsible-ai/hydra-zen", + repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", paths: &[SystemPath::new("src")], dependencies: &["pydantic", "beartype", "hydra-core"], @@ -158,7 +163,7 @@ fn attrs(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "attrs", - location: "https://github.com/python-attrs/attrs", + repository: "https://github.com/python-attrs/attrs", commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", paths: &[SystemPath::new("src")], dependencies: &[], @@ -173,7 +178,7 @@ fn anyio(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "anyio", - location: "https://github.com/agronholm/anyio", + repository: "https://github.com/agronholm/anyio", commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", paths: &[SystemPath::new("src")], dependencies: &[], diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index 1ccb4d153cf71..dcb63534c4d98 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -1,9 +1,15 @@ +#![allow(clippy::print_stderr)] + //! Infrastructure for benchmarking real-world Python projects. //! -//! This module provides functionality to: -//! 1. Clone external repositories to temporary directories -//! 2. Install dependencies using uv with date constraints -//! 3. Load project files into `MemoryFileSystem` for benchmarking +//! The module uses a setup similar to mypy primer's, which should make it easy +//! to add new benchmarks for projects in [mypy primer's project's list](https://github.com/hauntsaninja/mypy_primer/blob/ebaa9fd27b51a278873b63676fd25490cec6823b/mypy_primer/projects.py#L74). +//! +//! The basic steps for a project are: +//! 1. Clone or update the project into a directory inside `./target`. The commits are pinnted to prevent flaky benchmark results due to new commits. +//! 2. For projects with dependencies, run uv to create a virtual environment and install the dependencies. +//! 3. Read the entire project structure into a memory file system to reduce the IO noise in benchmarks. +//! 4. (not in this module) Create a `ProjectDatabase` and run the benchmark. use std::ffi::OsStr; use std::path::{Path, PathBuf}; @@ -16,17 +22,18 @@ use ruff_python_ast::PythonVersion; /// Configuration for a real-world project to benchmark #[derive(Debug, Clone)] pub struct RealWorldProject<'a> { + // The name of the project. pub name: &'a str, - - /// Git repository URL - pub location: &'a str, + /// The project's GIT repository. Must be publicly accessible. + pub repository: &'a str, /// Specific commit hash to checkout pub commit: &'a str, - /// List of paths within the project to check + /// List of paths within the project to check (`ty check `) pub paths: &'a [&'a SystemPath], /// Dependencies to install via uv pub dependencies: &'a [&'a str], - /// Date constraint for dependencies (ISO 8601 format) + /// Limit candidate packages to those that were uploaded prior to a given point in time (ISO 8601 format). + /// Maps to uv's `exclude-newer`. pub max_dep_date: &'a str, /// Python version to use pub python_version: PythonVersion, @@ -40,9 +47,21 @@ impl<'a> RealWorldProject<'a> { // Clone the repository if it doesn't exist, or update if it does if project_root.exists() { + eprintln!("Updating repository for project '{}'...", self.name); + let start = std::time::Instant::now(); update_repository(&project_root, self.commit)?; + eprintln!( + "Repository update completed in {:.2}s", + start.elapsed().as_secs_f64() + ); } else { - clone_repository(self.location, &project_root, self.commit)?; + eprintln!("Cloning repository for project '{}'...", self.name); + let start = std::time::Instant::now(); + clone_repository(self.repository, &project_root, self.commit)?; + eprintln!( + "Repository clone completed in {:.2}s", + start.elapsed().as_secs_f64() + ); } let checkout = Checkout { @@ -52,11 +71,21 @@ impl<'a> RealWorldProject<'a> { // Install dependencies if specified if !checkout.project().dependencies.is_empty() { + eprintln!( + "Installing {} dependencies for project '{}'...", + checkout.project().dependencies.len(), + checkout.project().name + ); + let start = std::time::Instant::now(); install_dependencies(&checkout)?; + eprintln!( + "Dependency installation completed in {:.2}s", + start.elapsed().as_secs_f64() + ); } // Load files into memory filesystem - let memory_fs = load_into_memory_fs(&checkout.path)?; + let memory_fs = copy_into_memory_fs(&checkout.path)?; Ok(SetupProject { path: checkout.path, @@ -86,7 +115,8 @@ impl<'a> Checkout<'a> { pub struct SetupProject<'a> { /// Path to the cloned project pub path: PathBuf, - /// Memory filesystem containing the project files + /// Memory filesystem containing the checked out project directory and the virtual environemnt + /// (only if the project has dependencies). pub memory_fs: MemoryFileSystem, /// Project configuration pub config: RealWorldProject<'a>, @@ -207,7 +237,7 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { let uv_check = Command::new("uv") .arg("--version") .output() - .context("Failed to execute uv version check")?; + .context("Failed to execute uv version check.")?; if !uv_check.status.success() { anyhow::bail!("uv is not installed or not found in PATH"); @@ -233,12 +263,6 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { } } - // Create a requirements file with dependencies - let requirements_content = checkout.project().dependencies.join("\n"); - let requirements_path = checkout.path.join("benchmark_requirements.txt"); - std::fs::write(&requirements_path, requirements_content) - .context("Failed to write requirements file")?; - // Install dependencies with date constraint in the isolated environment let mut cmd = Command::new("uv"); cmd.args([ @@ -246,13 +270,10 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { "install", "--python", venv_path.to_str().unwrap(), - "--requirement", - requirements_path.to_str().unwrap(), - ]); - - // Add date constraint if specified - - cmd.args(["--exclude-newer", checkout.project().max_dep_date]); + "--exclude-newer", + checkout.project().max_dep_date, + ]) + .args(checkout.project().dependencies); let output = cmd .output() @@ -268,24 +289,24 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { Ok(()) } -/// Load project files into a `MemoryFileSystem` -fn load_into_memory_fs(path: &Path) -> Result { +/// Copy the repositroy content and the virtual environment into a `MemoryFileSystem` +fn copy_into_memory_fs(path: &Path) -> Result { let fs = MemoryFileSystem::new(); - load_directory_recursive(&fs, path, &SystemPathBuf::from("/"))?; + copy_directory_recursive(&fs, path, &SystemPathBuf::from("/"))?; Ok(fs) } /// Recursively load a directory into the memory filesystem -fn load_directory_recursive( +fn copy_directory_recursive( fs: &MemoryFileSystem, source_path: &Path, dest_path: &SystemPath, ) -> Result<()> { if source_path.is_file() { if source_path.file_name().and_then(OsStr::to_str) == Some("pyvenv.cfg") { - // Skip pyenv.cfg files because the Python path will be invalid. + // Skip pyvenv.cfg files because the Python path will be invalid. return Ok(()); } @@ -298,7 +319,7 @@ fn load_directory_recursive( } Err(error) => { if error.kind() == std::io::ErrorKind::InvalidData { - // Skip non UTF-8 files + // Skip binary files. return Ok(()); } return Err(error) @@ -322,20 +343,16 @@ fn load_directory_recursive( })?; let file_name = entry.file_name(); - let source_child = source_path.join(&file_name); - let dest_child = dest_path.join(file_name.to_string_lossy().as_ref()); + let file_name = file_name.to_str().context("Expected UTF8 path")?; + let source_child = source_path.join(file_name); + let dest_child = dest_path.join(file_name); // Skip hidden files and common non-Python directories - let file_name_str = file_name.to_string_lossy(); - if file_name != ".venv" && file_name_str.starts_with('.') - || file_name_str == "__pycache__" - || file_name_str == "node_modules" - || file_name_str == ".git" - { + if file_name != ".venv" && (file_name.starts_with('.') || matches!(file_name, ".git")) { continue; } - load_directory_recursive(fs, &source_child, &dest_child)?; + copy_directory_recursive(fs, &source_child, &dest_child)?; } } From 219d13f95d2d44dd1bbf7061ce50b27f976fa402 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 13:47:32 +0200 Subject: [PATCH 07/28] Speedup git clone --- crates/ruff_benchmark/benches/ty_project.rs | 69 +++++++++-------- .../ruff_benchmark/src/real_world_projects.rs | 75 +++++++++++++++---- 2 files changed, 96 insertions(+), 48 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs index 6b796b627dc17..c4f1f42818ab9 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -15,6 +15,36 @@ use ty_project::{Db, ProjectDatabase, ProjectMetadata}; #[track_caller] #[allow(clippy::print_stderr)] fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { + fn setup( + metadata: &ProjectMetadata, + system: &TestSystem, + check_paths: &[&SystemPath], + ) -> ProjectDatabase { + // Create new database instance and collect files for this instance + let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); + + db.project().set_included_paths( + &mut db, + check_paths.iter().map(|path| path.to_path_buf()).collect(), + ); + db + } + + fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { + let result = db.check(); + // Don't assert specific diagnostic count for real-world projects + // as they may have legitimate type issues + let diagnostics = result.len(); + + assert!( + diagnostics > 1 && diagnostics <= max_diagnostics, + "Expected between {} and {} diagnostics but got {}", + 1, + max_diagnostics, + diagnostics + ); + } + setup_rayon(); let start = Instant::now(); @@ -40,36 +70,9 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn let check_paths = setup_project.check_paths(); - fn setup( - metadata: &ProjectMetadata, - system: &TestSystem, - check_paths: &[&SystemPath], - ) -> ProjectDatabase { - // Create new database instance and collect files for this instance - let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); - - db.project().set_included_paths( - &mut db, - check_paths - .into_iter() - .map(|path| path.to_path_buf()) - .collect(), - ); - db - } - - fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { - let result = db.check(); - // Don't assert specific diagnostic count for real-world projects - // as they may have legitimate type issues - let diagnostics = result.len(); - - assert!(diagnostics > 1 && diagnostics <= max_diagnostics); - } - - criterion.bench_function(&setup_project.config.name, |b| { + criterion.bench_function(setup_project.config.name, |b| { b.iter_batched_ref( - || setup(&metadata, &system, &check_paths), + || setup(&metadata, &system, check_paths), |db| check_project(db, max_diagnostics), BatchSize::SmallInput, ); @@ -141,7 +144,7 @@ fn freqtrade(criterion: &mut Criterion) { python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 10000); + bench_project(project, criterion, 400); } fn hydra(criterion: &mut Criterion) { @@ -156,7 +159,7 @@ fn hydra(criterion: &mut Criterion) { python_version: PythonVersion::PY313, }; - bench_project(project, criterion, 100000); + bench_project(project, criterion, 100); } fn attrs(criterion: &mut Criterion) { @@ -171,7 +174,7 @@ fn attrs(criterion: &mut Criterion) { python_version: PythonVersion::PY313, }; - bench_project(project, criterion, 100000); + bench_project(project, criterion, 100); } fn anyio(criterion: &mut Criterion) { @@ -186,7 +189,7 @@ fn anyio(criterion: &mut Criterion) { python_version: PythonVersion::PY313, }; - bench_project(project, criterion, 100000); + bench_project(project, criterion, 100); } static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index dcb63534c4d98..24d1cf877b975 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -115,7 +115,7 @@ impl<'a> Checkout<'a> { pub struct SetupProject<'a> { /// Path to the cloned project pub path: PathBuf, - /// Memory filesystem containing the checked out project directory and the virtual environemnt + /// Memory filesystem containing the checked out project directory and the virtual environment /// (only if the project has dependencies). pub memory_fs: MemoryFileSystem, /// Project configuration @@ -160,32 +160,56 @@ fn get_project_cache_dir(project_name: &str) -> Result { /// Update an existing repository fn update_repository(project_root: &Path, commit: &str) -> Result<()> { - // Fetch latest changes + // Check if we already have the specific commit let output = Command::new("git") - .args(["fetch", "origin"]) + .args(["cat-file", "-e", commit]) .current_dir(project_root) .output() - .context("Failed to execute git fetch command")?; + .context("Failed to check if commit exists")?; + // If commit doesn't exist locally, fetch it if !output.status.success() { - anyhow::bail!( - "Git fetch failed: {}", - String::from_utf8_lossy(&output.stderr) - ); + let output = Command::new("git") + .args(["fetch", "origin", commit]) + .current_dir(project_root) + .output() + .context("Failed to execute git fetch command")?; + + if !output.status.success() { + anyhow::bail!( + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + } + } + + // Check if we're already on the correct commit + let output = Command::new("git") + .args(["rev-parse", "HEAD"]) + .current_dir(project_root) + .output() + .context("Failed to get current commit")?; + + if output.status.success() { + let current_commit = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if current_commit == commit { + // Already on the correct commit, skip checkout + return Ok(()); + } } // Checkout specific commit - let target = commit; let output = Command::new("git") - .args(["reset", "--hard", target]) + .args(["checkout", commit]) .current_dir(project_root) .output() - .context("Failed to execute git reset command")?; + .context("Failed to execute git checkout command")?; if !output.status.success() { anyhow::bail!( "Git checkout of commit {} failed: {}", - target, + commit, String::from_utf8_lossy(&output.stderr) ); } @@ -200,9 +224,15 @@ fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<( std::fs::create_dir_all(parent).context("Failed to create parent directory for clone")?; } - // Clone the repository + // Clone with minimal depth and fetch only the specific commit let output = Command::new("git") - .args(["clone", repo_url, target_dir.to_str().unwrap()]) + .args([ + "clone", + "--filter=blob:none", // Don't download large files initially + "--no-checkout", // Don't checkout files yet + repo_url, + target_dir.to_str().unwrap(), + ]) .output() .context("Failed to execute git clone command")?; @@ -213,7 +243,22 @@ fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<( ); } - // Checkout specific commit + // Fetch the specific commit + let output = Command::new("git") + .args(["fetch", "origin", commit]) + .current_dir(target_dir) + .output() + .context("Failed to execute git fetch command")?; + + if !output.status.success() { + anyhow::bail!( + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); + } + + // Checkout the specific commit let output = Command::new("git") .args(["checkout", commit]) .current_dir(target_dir) From a78c6093342916efc813e2739de243fcc6ae1bce Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 14:02:57 +0200 Subject: [PATCH 08/28] Add colour_science --- crates/ruff_benchmark/Cargo.toml | 4 ++++ crates/ruff_benchmark/benches/ty_project.rs | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index e9a652a653676..ce8d0ae30934b 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -38,6 +38,7 @@ harness = false [[bench]] name = "ty_project" +required-features = ["walltime"] harness = false [dependencies] @@ -63,6 +64,9 @@ workspace = true [features] codspeed = ["codspeed-criterion-compat"] +# Enables benchmark that should only run with codspeed's walltime runner. +# May disable benchmarks that only run on instrument runners. +walltime = [] [target.'cfg(target_os = "windows")'.dev-dependencies] mimalloc = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_project.rs index c4f1f42818ab9..5d5ddb8faf47d 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_project.rs @@ -79,7 +79,7 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn }); } -#[cfg(not(feature = "codspeed"))] +// #[cfg(not(feature = "codspeed"))] fn colour_science(criterion: &mut Criterion) { // Setup the colour-science project (expensive, done once) let project = RealWorldProject { @@ -209,7 +209,7 @@ fn setup_rayon() { } #[cfg(feature = "codspeed")] -criterion_group!(real_world, anyio, attrs, pydantic, hydra); +criterion_group!(real_world, anyio, attrs, colour_science, pydantic, hydra); #[cfg(not(feature = "codspeed"))] criterion_group!( real_world, From 7cdfea4562662c5c37a6895849f132a39a9ea032 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 14:18:00 +0200 Subject: [PATCH 09/28] Use walltime runner --- .github/workflows/ci.yaml | 35 ++++++++++++++++++++++++++++++-- crates/ruff_benchmark/Cargo.toml | 9 +++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f73aca898e961..fd9343868974a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -893,7 +893,7 @@ jobs: run: npm run fmt:check working-directory: playground - benchmarks: + benchmarks-instrumented: runs-on: ubuntu-24.04 needs: determine_changes if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main') }} @@ -916,7 +916,38 @@ jobs: tool: cargo-codspeed - name: "Build benchmarks" - run: cargo codspeed build --features codspeed -p ruff_benchmark + run: cargo codspeed build --features "codspeed,instrumented" --no-default-features -p ruff_benchmark + + - name: "Run benchmarks" + uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 + with: + run: cargo codspeed run + token: ${{ secrets.CODSPEED_TOKEN }} + + benchmarks-walltime: + runs-on: codspeed-macro + needs: determine_changes + if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.ty == 'true' || github.ref == 'refs/heads/main') }} + timeout-minutes: 20 + steps: + - name: "Checkout Branch" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + + - uses: Swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2.7.8 + - uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1.0 + + - name: "Install Rust toolchain" + run: rustup show + + - name: "Install codspeed" + uses: taiki-e/install-action@735e5933943122c5ac182670a935f54a949265c1 # v2.52.4 + with: + tool: cargo-codspeed + + - name: "Build benchmarks" + run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark - name: "Run benchmarks" uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index ce8d0ae30934b..af198b214b8d7 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -19,27 +19,32 @@ doctest = false [[bench]] name = "linter" harness = false +required-features = ["instrumented"] [[bench]] name = "lexer" harness = false +required-features = ["instrumented"] [[bench]] name = "parser" harness = false +required-features = ["instrumented"] [[bench]] name = "formatter" harness = false +required-features = ["instrumented"] [[bench]] name = "ty" harness = false +required-features = ["instrumented"] [[bench]] name = "ty_project" -required-features = ["walltime"] harness = false +required-features = ["walltime"] [dependencies] codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } @@ -63,9 +68,11 @@ ty_project = { workspace = true } workspace = true [features] +default = ["instrumented", "walltime"] codspeed = ["codspeed-criterion-compat"] # Enables benchmark that should only run with codspeed's walltime runner. # May disable benchmarks that only run on instrument runners. +instrumented = [] walltime = [] [target.'cfg(target_os = "windows")'.dev-dependencies] From a669a2aefefc8beaf2ba3928db8ed1fc5e341ea6 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 14:59:05 +0200 Subject: [PATCH 10/28] More projects --- .github/actionlint.yaml | 1 + crates/ruff_benchmark/Cargo.toml | 37 +++-- crates/ruff_benchmark/benches/ty.rs | 118 +++++++++++++- .../benches/{ty_project.rs => ty_walltime.rs} | 149 +++++++----------- .../ruff_benchmark/src/real_world_projects.rs | 38 ++--- 5 files changed, 214 insertions(+), 129 deletions(-) rename crates/ruff_benchmark/benches/{ty_project.rs => ty_walltime.rs} (52%) diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index c3464e3992f21..81969ccb17244 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -9,3 +9,4 @@ self-hosted-runner: - depot-ubuntu-22.04-32 - github-windows-2025-x86_64-8 - github-windows-2025-x86_64-16 + - codspeed-macro diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index af198b214b8d7..d5bee3d44c520 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -42,27 +42,26 @@ harness = false required-features = ["instrumented"] [[bench]] -name = "ty_project" +name = "ty_walltime" harness = false required-features = ["walltime"] [dependencies] +ruff_db = { workspace = true } +ruff_python_ast = { workspace = true } +ruff_linter = { workspace = true, optional = true } +ruff_python_formatter = { workspace = true, optional = true } +ruff_python_parser = { workspace = true, optional = true } +ruff_python_trivia = { workspace = true, optional = true } +ty_project = { workspace = true, optional = true } + +anyhow = { workspace = true } codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } criterion = { workspace = true, default-features = false } -serde = { workspace = true } -serde_json = { workspace = true } rayon = { workspace = true } rustc-hash = { workspace = true } -ruff_db = { workspace = true } -ruff_python_ast = { workspace = true } -anyhow = { workspace = true } - -[dev-dependencies] -ruff_linter = { workspace = true } -ruff_python_formatter = { workspace = true } -ruff_python_parser = { workspace = true } -ruff_python_trivia = { workspace = true } -ty_project = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } [lints] workspace = true @@ -70,10 +69,16 @@ workspace = true [features] default = ["instrumented", "walltime"] codspeed = ["codspeed-criterion-compat"] +# Enables the benchmark that should only run with codspeed's instrumented runner. +instrumented = [ + "ruff_linter", + "ruff_python_formatter", + "ruff_python_parser", + "ruff_python_trivia", + "ty_project", +] # Enables benchmark that should only run with codspeed's walltime runner. -# May disable benchmarks that only run on instrument runners. -instrumented = [] -walltime = [] +walltime = ["ruff_db/os", "ty_project"] [target.'cfg(target_os = "windows")'.dev-dependencies] mimalloc = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 49399b7db9e47..1fef291abb22c 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -1,5 +1,6 @@ #![allow(clippy::disallowed_names)] use ruff_benchmark::criterion; +use ruff_benchmark::real_world_projects::RealWorldProject; use std::ops::Range; @@ -11,10 +12,10 @@ use ruff_benchmark::TestFile; use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity}; use ruff_db::files::{File, system_path_to_file}; use ruff_db::source::source_text; -use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; +use ruff_db::system::{InMemorySystem, MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem}; use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; -use ty_project::metadata::value::RangedValue; +use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::watch::{ChangeEvent, ChangedKind}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; @@ -347,10 +348,121 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { }); } +#[track_caller] +fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { + fn setup( + metadata: &ProjectMetadata, + system: &TestSystem, + check_paths: &[&SystemPath], + ) -> ProjectDatabase { + // Create new database instance and collect files for this instance + let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); + + db.project().set_included_paths( + &mut db, + check_paths.iter().map(|path| path.to_path_buf()).collect(), + ); + db + } + + fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { + let result = db.check(); + // Don't assert specific diagnostic count for real-world projects + // as they may have legitimate type issues + let diagnostics = result.len(); + + assert!( + diagnostics > 1 && diagnostics <= max_diagnostics, + "Expected between {} and {} diagnostics but got {}", + 1, + max_diagnostics, + diagnostics + ); + } + + setup_rayon(); + + let setup_project = project.setup().expect("Failed to setup project"); + + let fs = setup_project + .copy_to_memory_fs() + .expect("Failed to copy project to memory fs"); + let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); + + let src_root = SystemPath::new("/"); + let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(setup_project.config.python_version)), + python: (!setup_project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let check_paths = setup_project.check_paths(); + + criterion.bench_function(&format!("project[{}]", setup_project.config.name), |b| { + b.iter_batched_ref( + || setup(&metadata, &system, check_paths), + |db| check_project(db, max_diagnostics), + BatchSize::SmallInput, + ); + }); +} + +fn hydra(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "hydra-zen", + repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", + commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", + paths: &[SystemPath::new("src")], + dependencies: &["pydantic", "beartype", "hydra-core"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }; + + bench_project(project, criterion, 100); +} + +fn attrs(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "attrs", + repository: "https://github.com/python-attrs/attrs", + commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", + paths: &[SystemPath::new("src")], + dependencies: &[], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }; + + bench_project(project, criterion, 100); +} + +fn anyio(criterion: &mut Criterion) { + // Setup the colour-science project (expensive, done once) + let project = RealWorldProject { + name: "anyio", + repository: "https://github.com/agronholm/anyio", + commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", + paths: &[SystemPath::new("src")], + dependencies: &[], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }; + + bench_project(project, criterion, 100); +} + criterion_group!(check_file, benchmark_cold, benchmark_incremental); criterion_group!( micro, benchmark_many_string_assignments, benchmark_many_tuple_assignments, ); -criterion_main!(check_file, micro); +criterion_group!(project, anyio, attrs, hydra); +criterion_main!(check_file, micro, project); diff --git a/crates/ruff_benchmark/benches/ty_project.rs b/crates/ruff_benchmark/benches/ty_walltime.rs similarity index 52% rename from crates/ruff_benchmark/benches/ty_project.rs rename to crates/ruff_benchmark/benches/ty_walltime.rs index 5d5ddb8faf47d..4713536a513b4 100644 --- a/crates/ruff_benchmark/benches/ty_project.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,23 +1,31 @@ -#![allow(clippy::disallowed_names)] -use std::time::Instant; - -use rayon::ThreadPoolBuilder; +use ::criterion::SamplingMode; use ruff_benchmark::criterion; use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use ruff_benchmark::real_world_projects::RealWorldProject; -use ruff_db::system::{InMemorySystem, SystemPath, TestSystem}; +use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; +#[derive(Copy, Clone)] +enum Size { + Small, + Medium, + Large, +} + #[track_caller] -#[allow(clippy::print_stderr)] -fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { +fn bench_project( + project: RealWorldProject, + criterion: &mut Criterion, + max_diagnostics: usize, + size: Size, +) { fn setup( metadata: &ProjectMetadata, - system: &TestSystem, + system: &OsSystem, check_paths: &[&SystemPath], ) -> ProjectDatabase { // Create new database instance and collect files for this instance @@ -25,7 +33,10 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn db.project().set_included_paths( &mut db, - check_paths.iter().map(|path| path.to_path_buf()).collect(), + check_paths + .iter() + .map(|path| SystemPath::absolute(path, system.current_directory())) + .collect(), ); db } @@ -45,18 +56,12 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn ); } - setup_rayon(); - - let start = Instant::now(); - eprintln!("Setting up project {}", project.name); let setup_project = project.setup().expect("Failed to setup project"); - eprintln!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); - let fs = setup_project.memory_fs().clone(); - let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); + let root = SystemPathBuf::from_path_buf(setup_project.path.clone()).unwrap(); + let system = OsSystem::new(&root); - let src_root = SystemPath::new("/"); - let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + let mut metadata = ProjectMetadata::discover(&root, &system).unwrap(); metadata.apply_options(Options { environment: Some(EnvironmentOptions { @@ -70,7 +75,15 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn let check_paths = setup_project.check_paths(); - criterion.bench_function(setup_project.config.name, |b| { + let mut group = criterion.benchmark_group("project"); + group.sampling_mode(SamplingMode::Flat); + group.sample_size(match size { + Size::Small => 30, + Size::Medium => 20, + Size::Large => 10, + }); + + group.bench_function(setup_project.config.name, |b| { b.iter_batched_ref( || setup(&metadata, &system, check_paths), |db| check_project(db, max_diagnostics), @@ -79,9 +92,7 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn }); } -// #[cfg(not(feature = "codspeed"))] fn colour_science(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "colour-science", repository: "https://github.com/colour-science/colour", @@ -98,11 +109,10 @@ fn colour_science(criterion: &mut Criterion) { python_version: PythonVersion::PY310, }; - bench_project(project, criterion, 477); + bench_project(project, criterion, 477, Size::Medium); } fn pydantic(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "pydantic", repository: "https://github.com/pydantic/pydantic", @@ -118,12 +128,10 @@ fn pydantic(criterion: &mut Criterion) { python_version: PythonVersion::PY39, }; - bench_project(project, criterion, 1000); + bench_project(project, criterion, 1000, Size::Small); } -#[cfg(not(feature = "codspeed"))] fn freqtrade(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "freqtrade", repository: "https://github.com/freqtrade/freqtrade", @@ -144,80 +152,43 @@ fn freqtrade(criterion: &mut Criterion) { python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 400); -} - -fn hydra(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) - let project = RealWorldProject { - name: "hydra-zen", - repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", - commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", - paths: &[SystemPath::new("src")], - dependencies: &["pydantic", "beartype", "hydra-core"], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, - }; - - bench_project(project, criterion, 100); + bench_project(project, criterion, 400, Size::Small); } -fn attrs(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) +fn pandas(criterion: &mut Criterion) { let project = RealWorldProject { - name: "attrs", - repository: "https://github.com/python-attrs/attrs", - commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", - paths: &[SystemPath::new("src")], - dependencies: &[], + name: "pandas", + repository: "https://github.com/pandas-dev/pandas", + commit: "5909621e2267eb67943a95ef5e895e8484c53432", + paths: &[SystemPath::new("pandas")], + dependencies: &[ + "numpy", + "types-python-dateutil", + "types-pytz", + "types-PyMySQL", + "types-setuptools", + "pytest", + ], max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, + python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 100); + bench_project(project, criterion, 3000, Size::Large); } -fn anyio(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) +fn sympy(criterion: &mut Criterion) { let project = RealWorldProject { - name: "anyio", - repository: "https://github.com/agronholm/anyio", - commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", - paths: &[SystemPath::new("src")], - dependencies: &[], + name: "sympy", + repository: "https://github.com/sympy/sympy", + commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394", + paths: &[SystemPath::new("sympy")], + dependencies: &["mpmath"], max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, + python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 100); -} - -static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); - -fn setup_rayon() { - // Initialize the rayon thread pool outside the benchmark because it has a significant cost. - // We limit the thread pool to only one (the current thread) because we're focused on - // where ty spends time and less about how well the code runs concurrently. - // We might want to add a benchmark focusing on concurrency to detect congestion in the future. - RAYON_INITIALIZED.call_once(|| { - ThreadPoolBuilder::new() - .num_threads(1) - .use_current_thread() - .build_global() - .unwrap(); - }); + bench_project(project, criterion, 13000, Size::Large); } -#[cfg(feature = "codspeed")] -criterion_group!(real_world, anyio, attrs, colour_science, pydantic, hydra); -#[cfg(not(feature = "codspeed"))] -criterion_group!( - real_world, - anyio, - attrs, - colour_science, - freqtrade, - hydra, - pydantic -); -criterion_main!(real_world); +criterion_group!(project, colour_science, freqtrade, pandas, pydantic, sympy); +criterion_main!(project); diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index 24d1cf877b975..59aeb2035a689 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -8,12 +8,13 @@ //! The basic steps for a project are: //! 1. Clone or update the project into a directory inside `./target`. The commits are pinnted to prevent flaky benchmark results due to new commits. //! 2. For projects with dependencies, run uv to create a virtual environment and install the dependencies. -//! 3. Read the entire project structure into a memory file system to reduce the IO noise in benchmarks. +//! 3. (optionally) Copy the entire project structure into a memory file system to reduce the IO noise in benchmarks. //! 4. (not in this module) Create a `ProjectDatabase` and run the benchmark. use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::process::Command; +use std::time::Instant; use anyhow::{Context, Result}; use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf}; @@ -42,6 +43,9 @@ pub struct RealWorldProject<'a> { impl<'a> RealWorldProject<'a> { /// Setup a real-world project for benchmarking pub fn setup(self) -> Result> { + let start = Instant::now(); + eprintln!("Setting up project {}", self.name); + // Create project directory in cargo target let project_root = get_project_cache_dir(self.name)?; @@ -84,12 +88,10 @@ impl<'a> RealWorldProject<'a> { ); } - // Load files into memory filesystem - let memory_fs = copy_into_memory_fs(&checkout.path)?; + eprintln!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); Ok(SetupProject { path: checkout.path, - memory_fs, config: checkout.project, }) } @@ -115,19 +117,11 @@ impl<'a> Checkout<'a> { pub struct SetupProject<'a> { /// Path to the cloned project pub path: PathBuf, - /// Memory filesystem containing the checked out project directory and the virtual environment - /// (only if the project has dependencies). - pub memory_fs: MemoryFileSystem, /// Project configuration pub config: RealWorldProject<'a>, } impl<'a> SetupProject<'a> { - /// Get the memory filesystem for benchmarking - pub fn memory_fs(&self) -> &MemoryFileSystem { - &self.memory_fs - } - /// Get the project configuration pub fn config(&self) -> &RealWorldProject<'a> { &self.config @@ -142,6 +136,15 @@ impl<'a> SetupProject<'a> { pub fn venv_path(&self) -> PathBuf { self.path.join(".venv") } + + /// Copies the entire project to a memory file system. + pub fn copy_to_memory_fs(&self) -> anyhow::Result { + let fs = MemoryFileSystem::new(); + + copy_directory_recursive(&fs, &self.path, &SystemPathBuf::from("/"))?; + + Ok(fs) + } } /// Get the cache directory for a project in the cargo target directory @@ -149,6 +152,8 @@ fn get_project_cache_dir(project_name: &str) -> Result { let target_dir = cargo_target_directory() .cloned() .unwrap_or_else(|| PathBuf::from("target")); + let target_dir = + std::path::absolute(target_dir).context("Failed to construct an absolute path")?; let cache_dir = target_dir.join("benchmark_cache").join(project_name); if let Some(parent) = cache_dir.parent() { @@ -334,15 +339,6 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { Ok(()) } -/// Copy the repositroy content and the virtual environment into a `MemoryFileSystem` -fn copy_into_memory_fs(path: &Path) -> Result { - let fs = MemoryFileSystem::new(); - - copy_directory_recursive(&fs, path, &SystemPathBuf::from("/"))?; - - Ok(fs) -} - /// Recursively load a directory into the memory filesystem fn copy_directory_recursive( fs: &MemoryFileSystem, From bf826ab6bd0e77cbd4afb088fe42119b6b26097f Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 16:38:15 +0200 Subject: [PATCH 11/28] Reduce iterations for codspeed, disable multithreading --- crates/ruff_benchmark/benches/ty_walltime.rs | 34 +++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 4713536a513b4..f45293c3fc340 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,4 +1,5 @@ use ::criterion::SamplingMode; +use rayon::ThreadPoolBuilder; use ruff_benchmark::criterion; use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; @@ -56,6 +57,8 @@ fn bench_project( ); } + setup_rayon(); + let setup_project = project.setup().expect("Failed to setup project"); let root = SystemPathBuf::from_path_buf(setup_project.path.clone()).unwrap(); @@ -77,11 +80,16 @@ fn bench_project( let mut group = criterion.benchmark_group("project"); group.sampling_mode(SamplingMode::Flat); - group.sample_size(match size { - Size::Small => 30, - Size::Medium => 20, - Size::Large => 10, - }); + + if cfg!(feature = "codspeed") { + group.sample_size(10); + } else { + group.sample_size(match size { + Size::Small => 30, + Size::Medium => 20, + Size::Large => 10, + }); + } group.bench_function(setup_project.config.name, |b| { b.iter_batched_ref( @@ -190,5 +198,21 @@ fn sympy(criterion: &mut Criterion) { bench_project(project, criterion, 13000, Size::Large); } +static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); + +fn setup_rayon() { + // Initialize the rayon thread pool outside the benchmark because it has a significant cost. + // We limit the thread pool to only one (the current thread) because we're focused on + // where ty spends time and less about how well the code runs concurrently. + // We might want to add a benchmark focusing on concurrency to detect congestion in the future. + RAYON_INITIALIZED.call_once(|| { + ThreadPoolBuilder::new() + .num_threads(1) + .use_current_thread() + .build_global() + .unwrap(); + }); +} + criterion_group!(project, colour_science, freqtrade, pandas, pydantic, sympy); criterion_main!(project); From dac6ba043803ba5bde0badff7b3a47f926ee7ca0 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 16:47:25 +0200 Subject: [PATCH 12/28] Use --quick --- .github/workflows/ci.yaml | 2 +- crates/ruff_benchmark/benches/ty_walltime.rs | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fd9343868974a..2d1cb35778065 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -947,7 +947,7 @@ jobs: tool: cargo-codspeed - name: "Build benchmarks" - run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark + run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark -- --quick - name: "Run benchmarks" uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index f45293c3fc340..b3d1e5890fc34 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -57,8 +57,6 @@ fn bench_project( ); } - setup_rayon(); - let setup_project = project.setup().expect("Failed to setup project"); let root = SystemPathBuf::from_path_buf(setup_project.path.clone()).unwrap(); @@ -198,21 +196,5 @@ fn sympy(criterion: &mut Criterion) { bench_project(project, criterion, 13000, Size::Large); } -static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); - -fn setup_rayon() { - // Initialize the rayon thread pool outside the benchmark because it has a significant cost. - // We limit the thread pool to only one (the current thread) because we're focused on - // where ty spends time and less about how well the code runs concurrently. - // We might want to add a benchmark focusing on concurrency to detect congestion in the future. - RAYON_INITIALIZED.call_once(|| { - ThreadPoolBuilder::new() - .num_threads(1) - .use_current_thread() - .build_global() - .unwrap(); - }); -} - criterion_group!(project, colour_science, freqtrade, pandas, pydantic, sympy); criterion_main!(project); From 7d132083087bbeea70c66955f8b434427cd4306d Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 17:24:00 +0200 Subject: [PATCH 13/28] Try devan --- .github/workflows/ci.yaml | 2 +- Cargo.lock | 73 ++++++++++++++++++++ Cargo.toml | 13 ++-- crates/ruff_benchmark/Cargo.toml | 11 +-- crates/ruff_benchmark/benches/ty_walltime.rs | 72 ++++++------------- crates/ruff_benchmark/src/lib.rs | 1 + 6 files changed, 109 insertions(+), 63 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2d1cb35778065..fd9343868974a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -947,7 +947,7 @@ jobs: tool: cargo-codspeed - name: "Build benchmarks" - run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark -- --quick + run: cargo codspeed build --features "codspeed,walltime" --no-default-features -p ruff_benchmark - name: "Run benchmarks" uses: CodSpeedHQ/action@0010eb0ca6e89b80c88e8edaaa07cfe5f3e6664d # v3.5.0 diff --git a/Cargo.lock b/Cargo.lock index 87e7aaba379d9..e0b57a4fc9174 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -479,6 +479,46 @@ dependencies = [ "walkdir", ] +[[package]] +name = "codspeed-divan-compat" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8620a09dfaf37b3c45f982c4b65bd8f9b0203944da3ffa705c0fcae6b84655ff" +dependencies = [ + "codspeed", + "codspeed-divan-compat-macros", + "codspeed-divan-compat-walltime", +] + +[[package]] +name = "codspeed-divan-compat-macros" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30fe872bc4214626b35d3a1706a905d0243503bb6ba3bb7be2fc59083d5d680c" +dependencies = [ + "divan-macros", + "itertools 0.14.0", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "codspeed-divan-compat-walltime" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "104caa97b36d4092d89e24e4b103b40ede1edab03c0372d19e14a33f9393132b" +dependencies = [ + "cfg-if", + "clap", + "codspeed", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -519,6 +559,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "console" version = "0.15.11" @@ -837,6 +883,17 @@ dependencies = [ "syn", ] +[[package]] +name = "divan-macros" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -2272,6 +2329,15 @@ dependencies = [ "yansi", ] +[[package]] +name = "proc-macro-crate" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +dependencies = [ + "toml_edit", +] + [[package]] name = "proc-macro2" version = "1.0.95" @@ -2485,6 +2551,12 @@ dependencies = [ "regex-syntax 0.8.5", ] +[[package]] +name = "regex-lite" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" + [[package]] name = "regex-syntax" version = "0.6.29" @@ -2588,6 +2660,7 @@ version = "0.0.0" dependencies = [ "anyhow", "codspeed-criterion-compat", + "codspeed-divan-compat", "criterion", "mimalloc", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 1da4d9bdff913..60b73e6feccb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ camino = { version = "1.1.7" } clap = { version = "4.5.3", features = ["derive"] } clap_complete_command = { version = "0.6.0" } clearscreen = { version = "4.0.0" } +divan = { package = "codspeed-divan-compat", version = "2.10.1" } codspeed-criterion-compat = { version = "2.6.0", default-features = false } colored = { version = "3.0.0" } console_error_panic_hook = { version = "0.1.7" } @@ -167,7 +168,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features = "env-filter", "fmt", "ansi", - "smallvec" + "smallvec", ] } tryfn = { version = "0.2.1" } typed-arena = { version = "2.0.2" } @@ -177,11 +178,7 @@ unicode-width = { version = "0.2.0" } unicode_names2 = { version = "1.2.2" } unicode-normalization = { version = "0.1.23" } url = { version = "2.5.0" } -uuid = { version = "1.6.1", features = [ - "v4", - "fast-rng", - "macro-diagnostics", -] } +uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] } walkdir = { version = "2.3.2" } wasm-bindgen = { version = "0.2.92" } wasm-bindgen-test = { version = "0.3.42" } @@ -216,8 +213,8 @@ must_use_candidate = "allow" similar_names = "allow" single_match_else = "allow" too_many_lines = "allow" -needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. -unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. +needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. +unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. # Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250 needless_raw_string_hashes = "allow" # Disallowed restriction lints diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index d5bee3d44c520..92b9b2b458396 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -55,9 +55,10 @@ ruff_python_parser = { workspace = true, optional = true } ruff_python_trivia = { workspace = true, optional = true } ty_project = { workspace = true, optional = true } +divan = { workspace = true, optional = true } anyhow = { workspace = true } codspeed-criterion-compat = { workspace = true, default-features = false, optional = true } -criterion = { workspace = true, default-features = false } +criterion = { workspace = true, default-features = false, optional = true } rayon = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } @@ -68,17 +69,17 @@ workspace = true [features] default = ["instrumented", "walltime"] -codspeed = ["codspeed-criterion-compat"] -# Enables the benchmark that should only run with codspeed's instrumented runner. instrumented = [ + "criterion", "ruff_linter", "ruff_python_formatter", "ruff_python_parser", "ruff_python_trivia", "ty_project", -] +] # Enables the benchmark that should only run with codspeed's instrumented runner.default = ["instrumented", "walltime"] +codspeed = ["codspeed-criterion-compat"] # Enables benchmark that should only run with codspeed's walltime runner. -walltime = ["ruff_db/os", "ty_project"] +walltime = ["ruff_db/os", "ty_project", "divan"] [target.'cfg(target_os = "windows")'.dev-dependencies] mimalloc = { workspace = true } diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index b3d1e5890fc34..99934c19b694d 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,8 +1,5 @@ -use ::criterion::SamplingMode; -use rayon::ThreadPoolBuilder; -use ruff_benchmark::criterion; +use divan::{Bencher, bench}; -use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use ruff_benchmark::real_world_projects::RealWorldProject; use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; use ruff_python_ast::PythonVersion; @@ -10,20 +7,8 @@ use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; -#[derive(Copy, Clone)] -enum Size { - Small, - Medium, - Large, -} - #[track_caller] -fn bench_project( - project: RealWorldProject, - criterion: &mut Criterion, - max_diagnostics: usize, - size: Size, -) { +fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: usize) { fn setup( metadata: &ProjectMetadata, system: &OsSystem, @@ -76,29 +61,13 @@ fn bench_project( let check_paths = setup_project.check_paths(); - let mut group = criterion.benchmark_group("project"); - group.sampling_mode(SamplingMode::Flat); - - if cfg!(feature = "codspeed") { - group.sample_size(10); - } else { - group.sample_size(match size { - Size::Small => 30, - Size::Medium => 20, - Size::Large => 10, - }); - } - - group.bench_function(setup_project.config.name, |b| { - b.iter_batched_ref( - || setup(&metadata, &system, check_paths), - |db| check_project(db, max_diagnostics), - BatchSize::SmallInput, - ); - }); + bencher + .with_inputs(|| setup(&metadata, &system, check_paths)) + .bench_local_refs(|db| check_project(db, max_diagnostics)); } -fn colour_science(criterion: &mut Criterion) { +#[bench(max_time = 20)] +fn colour_science(bencher: Bencher) { let project = RealWorldProject { name: "colour-science", repository: "https://github.com/colour-science/colour", @@ -115,10 +84,11 @@ fn colour_science(criterion: &mut Criterion) { python_version: PythonVersion::PY310, }; - bench_project(project, criterion, 477, Size::Medium); + bench_project(bencher, project, 477); } -fn pydantic(criterion: &mut Criterion) { +#[bench(max_time = 10)] +fn pydantic(bencher: Bencher) { let project = RealWorldProject { name: "pydantic", repository: "https://github.com/pydantic/pydantic", @@ -134,10 +104,11 @@ fn pydantic(criterion: &mut Criterion) { python_version: PythonVersion::PY39, }; - bench_project(project, criterion, 1000, Size::Small); + bench_project(bencher, project, 1000); } -fn freqtrade(criterion: &mut Criterion) { +#[bench(max_time = 10)] +fn freqtrade(bencher: Bencher) { let project = RealWorldProject { name: "freqtrade", repository: "https://github.com/freqtrade/freqtrade", @@ -158,10 +129,11 @@ fn freqtrade(criterion: &mut Criterion) { python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 400, Size::Small); + bench_project(bencher, project, 400); } -fn pandas(criterion: &mut Criterion) { +#[bench(max_time = 80)] +fn pandas(bencher: Bencher) { let project = RealWorldProject { name: "pandas", repository: "https://github.com/pandas-dev/pandas", @@ -179,10 +151,11 @@ fn pandas(criterion: &mut Criterion) { python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 3000, Size::Large); + bench_project(bencher, project, 3000); } -fn sympy(criterion: &mut Criterion) { +#[bench(max_time = 120)] +fn sympy(bencher: Bencher) { let project = RealWorldProject { name: "sympy", repository: "https://github.com/sympy/sympy", @@ -193,8 +166,9 @@ fn sympy(criterion: &mut Criterion) { python_version: PythonVersion::PY312, }; - bench_project(project, criterion, 13000, Size::Large); + bench_project(bencher, project, 13000); } -criterion_group!(project, colour_science, freqtrade, pandas, pydantic, sympy); -criterion_main!(project); +fn main() { + divan::main(); +} diff --git a/crates/ruff_benchmark/src/lib.rs b/crates/ruff_benchmark/src/lib.rs index 8b23f832c9903..34ba0d63640de 100644 --- a/crates/ruff_benchmark/src/lib.rs +++ b/crates/ruff_benchmark/src/lib.rs @@ -1,5 +1,6 @@ use std::path::PathBuf; +#[cfg(feature = "instrumented")] pub mod criterion; pub mod real_world_projects; From 78c9f4cba4998ab393ace566c3fdcc05116c004c Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 17:50:46 +0200 Subject: [PATCH 14/28] Prewarm rayon --- crates/ruff_benchmark/benches/ty_walltime.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 99934c19b694d..34767b6f2133c 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,5 +1,6 @@ use divan::{Bencher, bench}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; use ruff_benchmark::real_world_projects::RealWorldProject; use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; use ruff_python_ast::PythonVersion; @@ -7,6 +8,15 @@ use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; +fn prewarm_rayon() { + let result = (0..100) + .into_par_iter() + .map(|number| std::hint::black_box(number)) + .collect::>(); + + std::hint::black_box(result); +} + #[track_caller] fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: usize) { fn setup( @@ -42,6 +52,8 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u ); } + prewarm_rayon(); + let setup_project = project.setup().expect("Failed to setup project"); let root = SystemPathBuf::from_path_buf(setup_project.path.clone()).unwrap(); @@ -155,6 +167,7 @@ fn pandas(bencher: Bencher) { } #[bench(max_time = 120)] +#[ignore = "Ignored by default because it takes one minute to run. We may be able to run it in the future once we emit fewer diagnostics."] fn sympy(bencher: Bencher) { let project = RealWorldProject { name: "sympy", From 463284fbdc152cac61b4bde3759f72d0bd647dd1 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:17:02 +0200 Subject: [PATCH 15/28] Disable multithreading again --- crates/ruff_benchmark/benches/ty_walltime.rs | 31 +++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 34767b6f2133c..9feb87c65124a 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,6 +1,6 @@ use divan::{Bencher, bench}; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use rayon::ThreadPoolBuilder; use ruff_benchmark::real_world_projects::RealWorldProject; use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; use ruff_python_ast::PythonVersion; @@ -8,13 +8,24 @@ use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; -fn prewarm_rayon() { - let result = (0..100) - .into_par_iter() - .map(|number| std::hint::black_box(number)) - .collect::>(); - - std::hint::black_box(result); +static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); + +fn setup_rayon() { + // Initialize the rayon thread pool outside the benchmark because it has a significant cost. + // Ideally, we wouldn't have to do this but there's a significant variance + // if we run the benchmarks multi threaded: + // ``` + // ty_walltime fastest │ slowest │ median │ mean │ samples │ iters + // ╰─ colour_science 153.7 ms │ 2.177 s │ 2.106 s │ 1.921 s │ 10 │ 10 + // + // Probably something worth looking into in the future. + RAYON_INITIALIZED.call_once(|| { + ThreadPoolBuilder::new() + .num_threads(1) + .use_current_thread() + .build_global() + .unwrap(); + }); } #[track_caller] @@ -52,7 +63,7 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u ); } - prewarm_rayon(); + setup_rayon(); let setup_project = project.setup().expect("Failed to setup project"); @@ -167,7 +178,7 @@ fn pandas(bencher: Bencher) { } #[bench(max_time = 120)] -#[ignore = "Ignored by default because it takes one minute to run. We may be able to run it in the future once we emit fewer diagnostics."] +// #[ignore = "Ignored by default because it takes one minute to run. We may be able to run it in the future once we emit fewer diagnostics."] fn sympy(bencher: Bencher) { let project = RealWorldProject { name: "sympy", From 9012cb71377f8eac48b304e21f8c1fa3af17f4d7 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:30:21 +0200 Subject: [PATCH 16/28] Unignore sympy --- crates/ruff_benchmark/benches/ty_walltime.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 9feb87c65124a..07bfd628ba769 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -178,7 +178,6 @@ fn pandas(bencher: Bencher) { } #[bench(max_time = 120)] -// #[ignore = "Ignored by default because it takes one minute to run. We may be able to run it in the future once we emit fewer diagnostics."] fn sympy(bencher: Bencher) { let project = RealWorldProject { name: "sympy", From 62040f03b26881b893daaf37070d6b5adbed1c60 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:36:11 +0200 Subject: [PATCH 17/28] Specify sample count instead of duration --- crates/ruff_benchmark/benches/ty_walltime.rs | 208 ++++++++++--------- 1 file changed, 109 insertions(+), 99 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 07bfd628ba769..ac70d03f1d728 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,9 +1,9 @@ -use divan::{Bencher, bench}; +use divan::{Bencher, bench_group}; use rayon::ThreadPoolBuilder; use ruff_benchmark::real_world_projects::RealWorldProject; use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; -use ruff_python_ast::PythonVersion; + use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; @@ -89,107 +89,117 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u .bench_local_refs(|db| check_project(db, max_diagnostics)); } -#[bench(max_time = 20)] -fn colour_science(bencher: Bencher) { - let project = RealWorldProject { - name: "colour-science", - repository: "https://github.com/colour-science/colour", - commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", - paths: &[SystemPath::new("colour")], - dependencies: &[ - "matplotlib", - "numpy", - "pandas-stubs", - "pytest", - "scipy-stubs", - ], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY310, - }; - - bench_project(bencher, project, 477); -} +#[bench_group(sample_size = 2, sample_count = 2)] +mod benches { + use divan::{Bencher, bench}; + use ruff_benchmark::real_world_projects::RealWorldProject; + use ruff_db::system::SystemPath; + use ruff_python_ast::PythonVersion; + + use crate::bench_project; + + #[bench] + fn colour_science(bencher: Bencher) { + let project = RealWorldProject { + name: "colour-science", + repository: "https://github.com/colour-science/colour", + commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", + paths: &[SystemPath::new("colour")], + dependencies: &[ + "matplotlib", + "numpy", + "pandas-stubs", + "pytest", + "scipy-stubs", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY310, + }; + + bench_project(bencher, project, 477); + } -#[bench(max_time = 10)] -fn pydantic(bencher: Bencher) { - let project = RealWorldProject { - name: "pydantic", - repository: "https://github.com/pydantic/pydantic", - commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", - paths: &[SystemPath::new("pydantic")], - dependencies: &[ - "annotated-types", - "pydantic-core", - "typing-extensions", - "typing-inspection", - ], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY39, - }; - - bench_project(bencher, project, 1000); -} + #[bench] + fn pydantic(bencher: Bencher) { + let project = RealWorldProject { + name: "pydantic", + repository: "https://github.com/pydantic/pydantic", + commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", + paths: &[SystemPath::new("pydantic")], + dependencies: &[ + "annotated-types", + "pydantic-core", + "typing-extensions", + "typing-inspection", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY39, + }; + + bench_project(bencher, project, 1000); + } -#[bench(max_time = 10)] -fn freqtrade(bencher: Bencher) { - let project = RealWorldProject { - name: "freqtrade", - repository: "https://github.com/freqtrade/freqtrade", - commit: "2d842ea129e56575852ee0c45383c8c3f706be19", - paths: &[SystemPath::new("freqtrade")], - dependencies: &[ - "numpy", - "pandas-stubs", - "pydantic", - "sqlalchemy", - "types-cachetools", - "types-filelock", - "types-python-dateutil", - "types-requests", - "types-tabulate", - ], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 400); -} + #[bench] + fn freqtrade(bencher: Bencher) { + let project = RealWorldProject { + name: "freqtrade", + repository: "https://github.com/freqtrade/freqtrade", + commit: "2d842ea129e56575852ee0c45383c8c3f706be19", + paths: &[SystemPath::new("freqtrade")], + dependencies: &[ + "numpy", + "pandas-stubs", + "pydantic", + "sqlalchemy", + "types-cachetools", + "types-filelock", + "types-python-dateutil", + "types-requests", + "types-tabulate", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }; + + bench_project(bencher, project, 400); + } -#[bench(max_time = 80)] -fn pandas(bencher: Bencher) { - let project = RealWorldProject { - name: "pandas", - repository: "https://github.com/pandas-dev/pandas", - commit: "5909621e2267eb67943a95ef5e895e8484c53432", - paths: &[SystemPath::new("pandas")], - dependencies: &[ - "numpy", - "types-python-dateutil", - "types-pytz", - "types-PyMySQL", - "types-setuptools", - "pytest", - ], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 3000); -} + #[bench] + fn pandas(bencher: Bencher) { + let project = RealWorldProject { + name: "pandas", + repository: "https://github.com/pandas-dev/pandas", + commit: "5909621e2267eb67943a95ef5e895e8484c53432", + paths: &[SystemPath::new("pandas")], + dependencies: &[ + "numpy", + "types-python-dateutil", + "types-pytz", + "types-PyMySQL", + "types-setuptools", + "pytest", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }; + + bench_project(bencher, project, 3000); + } -#[bench(max_time = 120)] -fn sympy(bencher: Bencher) { - let project = RealWorldProject { - name: "sympy", - repository: "https://github.com/sympy/sympy", - commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394", - paths: &[SystemPath::new("sympy")], - dependencies: &["mpmath"], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 13000); + #[bench] + fn sympy(bencher: Bencher) { + let project = RealWorldProject { + name: "sympy", + repository: "https://github.com/sympy/sympy", + commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394", + paths: &[SystemPath::new("sympy")], + dependencies: &["mpmath"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }; + + bench_project(bencher, project, 13000); + } } fn main() { From a805bf00e92df7ba5a4fa18125ce61a96887e9f4 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:41:58 +0200 Subject: [PATCH 18/28] Fix sample size --- crates/ruff_benchmark/benches/ty_walltime.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index ac70d03f1d728..1cd31edc3255e 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -89,7 +89,7 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u .bench_local_refs(|db| check_project(db, max_diagnostics)); } -#[bench_group(sample_size = 2, sample_count = 2)] +#[bench_group(sample_size = 1, sample_count = 3)] mod benches { use divan::{Bencher, bench}; use ruff_benchmark::real_world_projects::RealWorldProject; From 5c7950f45ce21e32cc56a5a93cb65e4a8eba7a8b Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:59:06 +0200 Subject: [PATCH 19/28] Update crates/ruff_benchmark/Cargo.toml --- crates/ruff_benchmark/Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index 92b9b2b458396..98899375b789e 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -69,6 +69,7 @@ workspace = true [features] default = ["instrumented", "walltime"] +# Enables the benchmark that should only run with codspeed's instrumented runner instrumented = [ "criterion", "ruff_linter", @@ -76,7 +77,7 @@ instrumented = [ "ruff_python_parser", "ruff_python_trivia", "ty_project", -] # Enables the benchmark that should only run with codspeed's instrumented runner.default = ["instrumented", "walltime"] +] codspeed = ["codspeed-criterion-compat"] # Enables benchmark that should only run with codspeed's walltime runner. walltime = ["ruff_db/os", "ty_project", "divan"] From 860d3b872ec2cda9bda3d20214418a338aeae74e Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 18:59:33 +0200 Subject: [PATCH 20/28] Update crates/ruff_benchmark/benches/ty.rs --- crates/ruff_benchmark/benches/ty.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 1fef291abb22c..346d4b8ffee5e 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -367,8 +367,6 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { let result = db.check(); - // Don't assert specific diagnostic count for real-world projects - // as they may have legitimate type issues let diagnostics = result.len(); assert!( From 53948d3b23f2ea59bd81e556db6954dc9ca23e39 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 19:02:25 +0200 Subject: [PATCH 21/28] Apply suggestions from code review --- crates/ruff_benchmark/benches/ty.rs | 4 ---- crates/ruff_benchmark/benches/ty_walltime.rs | 5 +---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 346d4b8ffee5e..ab6e5d4d3f698 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -355,7 +355,6 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn system: &TestSystem, check_paths: &[&SystemPath], ) -> ProjectDatabase { - // Create new database instance and collect files for this instance let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); db.project().set_included_paths( @@ -412,7 +411,6 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn } fn hydra(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "hydra-zen", repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", @@ -427,7 +425,6 @@ fn hydra(criterion: &mut Criterion) { } fn attrs(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "attrs", repository: "https://github.com/python-attrs/attrs", @@ -442,7 +439,6 @@ fn attrs(criterion: &mut Criterion) { } fn anyio(criterion: &mut Criterion) { - // Setup the colour-science project (expensive, done once) let project = RealWorldProject { name: "anyio", repository: "https://github.com/agronholm/anyio", diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 1cd31edc3255e..45823065c8fb8 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -16,7 +16,7 @@ fn setup_rayon() { // if we run the benchmarks multi threaded: // ``` // ty_walltime fastest │ slowest │ median │ mean │ samples │ iters - // ╰─ colour_science 153.7 ms │ 2.177 s │ 2.106 s │ 1.921 s │ 10 │ 10 + // ╰─ colour_science 153.7 ms │ 2.177 s │ 2.106 s │ 1.921 s │ 10 │ 10 // // Probably something worth looking into in the future. RAYON_INITIALIZED.call_once(|| { @@ -35,7 +35,6 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u system: &OsSystem, check_paths: &[&SystemPath], ) -> ProjectDatabase { - // Create new database instance and collect files for this instance let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); db.project().set_included_paths( @@ -50,8 +49,6 @@ fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: u fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { let result = db.check(); - // Don't assert specific diagnostic count for real-world projects - // as they may have legitimate type issues let diagnostics = result.len(); assert!( From d3ac9ba79839d0f616b12f6dc8b619944c11a034 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 17 Jun 2025 19:21:46 +0200 Subject: [PATCH 22/28] Add altair --- crates/ruff_benchmark/benches/ty_walltime.rs | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 45823065c8fb8..ea30d62ca7d49 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -14,9 +14,11 @@ fn setup_rayon() { // Initialize the rayon thread pool outside the benchmark because it has a significant cost. // Ideally, we wouldn't have to do this but there's a significant variance // if we run the benchmarks multi threaded: + // // ``` // ty_walltime fastest │ slowest │ median │ mean │ samples │ iters // ╰─ colour_science 153.7 ms │ 2.177 s │ 2.106 s │ 1.921 s │ 10 │ 10 + // ``` // // Probably something worth looking into in the future. RAYON_INITIALIZED.call_once(|| { @@ -197,6 +199,31 @@ mod benches { bench_project(bencher, project, 13000); } + + #[bench] + fn altair(bencher: Bencher) { + let project = RealWorldProject { + name: "altair", + repository: "https://github.com/vega/altair", + commit: "d1f4a1ef89006e5f6752ef1f6df4b7a509336fba", + paths: &[SystemPath::new("altair")], + dependencies: &[ + "jinja2", + "narwhals", + "numpy", + "packaging", + "pandas-stubs", + "pyarrow-stubs", + "pytest", + "scipy-stubs", + "types-jsonschema", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }; + + bench_project(bencher, project, 13000); + } } fn main() { From ece64c25653e05ee970803957717643de9839deb Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 09:30:57 +0200 Subject: [PATCH 23/28] Enable multithreading --- crates/ruff_benchmark/Cargo.toml | 3 +- crates/ruff_benchmark/benches/ty.rs | 12 +- crates/ruff_benchmark/benches/ty_walltime.rs | 326 +++++++++--------- .../ruff_benchmark/src/real_world_projects.rs | 26 +- 4 files changed, 186 insertions(+), 181 deletions(-) diff --git a/crates/ruff_benchmark/Cargo.toml b/crates/ruff_benchmark/Cargo.toml index 98899375b789e..b6b4b40de2b33 100644 --- a/crates/ruff_benchmark/Cargo.toml +++ b/crates/ruff_benchmark/Cargo.toml @@ -47,7 +47,7 @@ harness = false required-features = ["walltime"] [dependencies] -ruff_db = { workspace = true } +ruff_db = { workspace = true, features = ["testing"] } ruff_python_ast = { workspace = true } ruff_linter = { workspace = true, optional = true } ruff_python_formatter = { workspace = true, optional = true } @@ -63,6 +63,7 @@ rayon = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +tracing = { workspace = true } [lints] workspace = true diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index ab6e5d4d3f698..7b61c5d9be860 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -415,8 +415,8 @@ fn hydra(criterion: &mut Criterion) { name: "hydra-zen", repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", - paths: &[SystemPath::new("src")], - dependencies: &["pydantic", "beartype", "hydra-core"], + paths: vec![SystemPath::new("src")], + dependencies: vec!["pydantic", "beartype", "hydra-core"], max_dep_date: "2025-06-17", python_version: PythonVersion::PY313, }; @@ -429,8 +429,8 @@ fn attrs(criterion: &mut Criterion) { name: "attrs", repository: "https://github.com/python-attrs/attrs", commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", - paths: &[SystemPath::new("src")], - dependencies: &[], + paths: vec![SystemPath::new("src")], + dependencies: vec![], max_dep_date: "2025-06-17", python_version: PythonVersion::PY313, }; @@ -443,8 +443,8 @@ fn anyio(criterion: &mut Criterion) { name: "anyio", repository: "https://github.com/agronholm/anyio", commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", - paths: &[SystemPath::new("src")], - dependencies: &[], + paths: vec![SystemPath::new("src")], + dependencies: vec![], max_dep_date: "2025-06-17", python_version: PythonVersion::PY313, }; diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index ea30d62ca7d49..586534dde44c9 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -1,110 +1,113 @@ -use divan::{Bencher, bench_group}; +use std::fmt::{Display, Formatter}; -use rayon::ThreadPoolBuilder; -use ruff_benchmark::real_world_projects::RealWorldProject; -use ruff_db::system::{OsSystem, System, SystemPath, SystemPathBuf}; +use divan::{Bencher, bench}; +use ruff_benchmark::real_world_projects::{RealWorldProject, SetupProject}; +use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; + +use ruff_db::testing::setup_logging_with_filter; +use ruff_python_ast::PythonVersion; use ty_project::metadata::options::{EnvironmentOptions, Options}; use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; -static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new(); - -fn setup_rayon() { - // Initialize the rayon thread pool outside the benchmark because it has a significant cost. - // Ideally, we wouldn't have to do this but there's a significant variance - // if we run the benchmarks multi threaded: - // - // ``` - // ty_walltime fastest │ slowest │ median │ mean │ samples │ iters - // ╰─ colour_science 153.7 ms │ 2.177 s │ 2.106 s │ 1.921 s │ 10 │ 10 - // ``` - // - // Probably something worth looking into in the future. - RAYON_INITIALIZED.call_once(|| { - ThreadPoolBuilder::new() - .num_threads(1) - .use_current_thread() - .build_global() - .unwrap(); - }); +struct Benchmark<'a> { + project: SetupProject<'a>, + max_diagnostics: usize, } -#[track_caller] -fn bench_project(bencher: Bencher, project: RealWorldProject, max_diagnostics: usize) { - fn setup( - metadata: &ProjectMetadata, - system: &OsSystem, - check_paths: &[&SystemPath], - ) -> ProjectDatabase { - let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); +impl<'a> Benchmark<'a> { + fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self { + let setup_project = project.setup().expect("Failed to setup project"); + + Self { + project: setup_project, + max_diagnostics, + } + } + + fn setup_iteration(&self) -> ProjectDatabase { + let root = SystemPathBuf::from_path_buf(self.project.path.clone()).unwrap(); + let system = OsSystem::new(&root); + + let mut metadata = ProjectMetadata::discover(&root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(self.project.config.python_version)), + python: (!self.project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + + let mut db = ProjectDatabase::new(metadata.clone(), system).unwrap(); db.project().set_included_paths( &mut db, - check_paths + self.project + .check_paths() .iter() - .map(|path| SystemPath::absolute(path, system.current_directory())) + .map(|path| SystemPath::absolute(path, &root)) .collect(), ); db } +} - fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { - let result = db.check(); - let diagnostics = result.len(); - - assert!( - diagnostics > 1 && diagnostics <= max_diagnostics, - "Expected between {} and {} diagnostics but got {}", - 1, - max_diagnostics, - diagnostics - ); +impl Display for Benchmark<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(self.project.config.name) } - - setup_rayon(); - - let setup_project = project.setup().expect("Failed to setup project"); - - let root = SystemPathBuf::from_path_buf(setup_project.path.clone()).unwrap(); - let system = OsSystem::new(&root); - - let mut metadata = ProjectMetadata::discover(&root, &system).unwrap(); - - metadata.apply_options(Options { - environment: Some(EnvironmentOptions { - python_version: Some(RangedValue::cli(setup_project.config.python_version)), - python: (!setup_project.config().dependencies.is_empty()) - .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), - ..EnvironmentOptions::default() - }), - ..Options::default() - }); - - let check_paths = setup_project.check_paths(); - - bencher - .with_inputs(|| setup(&metadata, &system, check_paths)) - .bench_local_refs(|db| check_project(db, max_diagnostics)); } -#[bench_group(sample_size = 1, sample_count = 3)] -mod benches { - use divan::{Bencher, bench}; - use ruff_benchmark::real_world_projects::RealWorldProject; - use ruff_db::system::SystemPath; - use ruff_python_ast::PythonVersion; - - use crate::bench_project; +fn check_project(db: &ProjectDatabase, max_diagnostics: usize) { + let result = db.check(); + let diagnostics = result.len(); + + assert!( + diagnostics > 1 && diagnostics <= max_diagnostics, + "Expected between {} and {} diagnostics but got {}", + 1, + max_diagnostics, + diagnostics + ); +} - #[bench] - fn colour_science(bencher: Bencher) { - let project = RealWorldProject { +static ALTAIR: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "altair", + repository: "https://github.com/vega/altair", + commit: "d1f4a1ef89006e5f6752ef1f6df4b7a509336fba", + paths: vec![SystemPath::new("altair")], + dependencies: vec![ + "jinja2", + "narwhals", + "numpy", + "packaging", + "pandas-stubs", + "pyarrow-stubs", + "pytest", + "scipy-stubs", + "types-jsonschema", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY312, + }, + 13000, + ) +}); + +static COLOUR_SCIENCE: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { name: "colour-science", repository: "https://github.com/colour-science/colour", commit: "a17e2335c29e7b6f08080aa4c93cfa9b61f84757", - paths: &[SystemPath::new("colour")], - dependencies: &[ + paths: vec![SystemPath::new("colour")], + dependencies: vec![ "matplotlib", "numpy", "pandas-stubs", @@ -113,39 +116,19 @@ mod benches { ], max_dep_date: "2025-06-17", python_version: PythonVersion::PY310, - }; - - bench_project(bencher, project, 477); - } - - #[bench] - fn pydantic(bencher: Bencher) { - let project = RealWorldProject { - name: "pydantic", - repository: "https://github.com/pydantic/pydantic", - commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", - paths: &[SystemPath::new("pydantic")], - dependencies: &[ - "annotated-types", - "pydantic-core", - "typing-extensions", - "typing-inspection", - ], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY39, - }; - - bench_project(bencher, project, 1000); - } - - #[bench] - fn freqtrade(bencher: Bencher) { - let project = RealWorldProject { + }, + 477, + ) +}); + +static FREQTRADE: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { name: "freqtrade", repository: "https://github.com/freqtrade/freqtrade", commit: "2d842ea129e56575852ee0c45383c8c3f706be19", - paths: &[SystemPath::new("freqtrade")], - dependencies: &[ + paths: vec![SystemPath::new("freqtrade")], + dependencies: vec![ "numpy", "pandas-stubs", "pydantic", @@ -158,19 +141,19 @@ mod benches { ], max_dep_date: "2025-06-17", python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 400); - } - - #[bench] - fn pandas(bencher: Bencher) { - let project = RealWorldProject { + }, + 400, + ) +}); + +static PANDAS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { name: "pandas", repository: "https://github.com/pandas-dev/pandas", commit: "5909621e2267eb67943a95ef5e895e8484c53432", - paths: &[SystemPath::new("pandas")], - dependencies: &[ + paths: vec![SystemPath::new("pandas")], + dependencies: vec![ "numpy", "types-python-dateutil", "types-pytz", @@ -180,52 +163,73 @@ mod benches { ], max_dep_date: "2025-06-17", python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 3000); - } - - #[bench] - fn sympy(bencher: Bencher) { - let project = RealWorldProject { + }, + 3000, + ) +}); + +static PYDANTIC: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { + name: "pydantic", + repository: "https://github.com/pydantic/pydantic", + commit: "0c4a22b64b23dfad27387750cf07487efc45eb05", + paths: vec![SystemPath::new("pydantic")], + dependencies: vec![ + "annotated-types", + "pydantic-core", + "typing-extensions", + "typing-inspection", + ], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY39, + }, + 1000, + ) +}); + +static SYMPY: std::sync::LazyLock> = std::sync::LazyLock::new(|| { + Benchmark::new( + RealWorldProject { name: "sympy", repository: "https://github.com/sympy/sympy", commit: "22fc107a94eaabc4f6eb31470b39db65abb7a394", - paths: &[SystemPath::new("sympy")], - dependencies: &["mpmath"], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY312, - }; - - bench_project(bencher, project, 13000); - } - - #[bench] - fn altair(bencher: Bencher) { - let project = RealWorldProject { - name: "altair", - repository: "https://github.com/vega/altair", - commit: "d1f4a1ef89006e5f6752ef1f6df4b7a509336fba", - paths: &[SystemPath::new("altair")], - dependencies: &[ - "jinja2", - "narwhals", - "numpy", - "packaging", - "pandas-stubs", - "pyarrow-stubs", - "pytest", - "scipy-stubs", - "types-jsonschema", - ], + paths: vec![SystemPath::new("sympy")], + dependencies: vec!["mpmath"], max_dep_date: "2025-06-17", python_version: PythonVersion::PY312, - }; + }, + 13000, + ) +}); - bench_project(bencher, project, 13000); - } +#[bench(args=[&*ALTAIR, &*COLOUR_SCIENCE, &*FREQTRADE, &*PANDAS, &*PYDANTIC, &*SYMPY], sample_size=1, sample_count=3)] +fn bench_project(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); } fn main() { + let filter = + std::env::var("TY_LOG").unwrap_or("ty_walltime=info,ruff_benchmark=info".to_string()); + + let _logging = setup_logging_with_filter(&filter).expect("Filter to be valid"); + + // Pre-warm, (or "pre-cold"?) the salsa DB. Salsa has a fast-path + // for looking up ingredients but it only works if there's a single db. + // The slow-path is much slower, close to a 10x slowdown. Create + // a first db here, so that all further runs are not the first db, + // so that all hit the slow path (reduces noise). + // https://github.com/salsa-rs/salsa/issues/918 + tracing::info!( + "Pre-warm Salsa running Altair, see https://github.com/salsa-rs/salsa/issues/918" + ); + let altair_db = ALTAIR.setup_iteration(); + check_project(&altair_db, 1000); + tracing::info!("Pre-warm completed"); + divan::main(); } diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index 59aeb2035a689..c7294aa2a2814 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -30,9 +30,9 @@ pub struct RealWorldProject<'a> { /// Specific commit hash to checkout pub commit: &'a str, /// List of paths within the project to check (`ty check `) - pub paths: &'a [&'a SystemPath], + pub paths: Vec<&'a SystemPath>, /// Dependencies to install via uv - pub dependencies: &'a [&'a str], + pub dependencies: Vec<&'a str>, /// Limit candidate packages to those that were uploaded prior to a given point in time (ISO 8601 format). /// Maps to uv's `exclude-newer`. pub max_dep_date: &'a str, @@ -44,25 +44,25 @@ impl<'a> RealWorldProject<'a> { /// Setup a real-world project for benchmarking pub fn setup(self) -> Result> { let start = Instant::now(); - eprintln!("Setting up project {}", self.name); + tracing::debug!("Setting up project {}", self.name); // Create project directory in cargo target let project_root = get_project_cache_dir(self.name)?; // Clone the repository if it doesn't exist, or update if it does if project_root.exists() { - eprintln!("Updating repository for project '{}'...", self.name); + tracing::debug!("Updating repository for project '{}'...", self.name); let start = std::time::Instant::now(); update_repository(&project_root, self.commit)?; - eprintln!( + tracing::debug!( "Repository update completed in {:.2}s", start.elapsed().as_secs_f64() ); } else { - eprintln!("Cloning repository for project '{}'...", self.name); + tracing::debug!("Cloning repository for project '{}'...", self.name); let start = std::time::Instant::now(); clone_repository(self.repository, &project_root, self.commit)?; - eprintln!( + tracing::debug!( "Repository clone completed in {:.2}s", start.elapsed().as_secs_f64() ); @@ -75,20 +75,20 @@ impl<'a> RealWorldProject<'a> { // Install dependencies if specified if !checkout.project().dependencies.is_empty() { - eprintln!( + tracing::debug!( "Installing {} dependencies for project '{}'...", checkout.project().dependencies.len(), checkout.project().name ); let start = std::time::Instant::now(); install_dependencies(&checkout)?; - eprintln!( + tracing::debug!( "Dependency installation completed in {:.2}s", start.elapsed().as_secs_f64() ); } - eprintln!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); + tracing::debug!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); Ok(SetupProject { path: checkout.path, @@ -128,8 +128,8 @@ impl<'a> SetupProject<'a> { } /// Get the benchmark paths as `SystemPathBuf` - pub fn check_paths(&self) -> &'a [&SystemPath] { - self.config.paths + pub fn check_paths(&self) -> &[&SystemPath] { + &self.config.paths } /// Get the virtual environment path @@ -323,7 +323,7 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { "--exclude-newer", checkout.project().max_dep_date, ]) - .args(checkout.project().dependencies); + .args(&checkout.project().dependencies); let output = cmd .output() From 0cf08814732d9543c44c976ba3f91ef53b1d0ba0 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 09:55:24 +0200 Subject: [PATCH 24/28] Split small/medium/lage benchmarks --- .github/workflows/ci.yaml | 2 + Cargo.lock | 1 + crates/ruff_benchmark/benches/ty.rs | 164 +++++++++++-------- crates/ruff_benchmark/benches/ty_walltime.rs | 24 ++- crates/ty_project/src/lib.rs | 13 +- 5 files changed, 131 insertions(+), 73 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index fd9343868974a..a00c1ba721b5a 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -929,6 +929,8 @@ jobs: needs: determine_changes if: ${{ github.repository == 'astral-sh/ruff' && !contains(github.event.pull_request.labels.*.name, 'no-test') && (needs.determine_changes.outputs.ty == 'true' || github.ref == 'refs/heads/main') }} timeout-minutes: 20 + env: + TY_LOG: ruff_benchmark=debug steps: - name: "Checkout Branch" uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/Cargo.lock b/Cargo.lock index e0b57a4fc9174..7c17d84a6f2e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2674,6 +2674,7 @@ dependencies = [ "serde", "serde_json", "tikv-jemallocator", + "tracing", "ty_project", ] diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 7b61c5d9be860..cbc6f98a15a30 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -1,6 +1,6 @@ #![allow(clippy::disallowed_names)] use ruff_benchmark::criterion; -use ruff_benchmark::real_world_projects::RealWorldProject; +use ruff_benchmark::real_world_projects::{RealWorldProject, SetupProject}; use std::ops::Range; @@ -348,21 +348,60 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { }); } -#[track_caller] -fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagnostics: usize) { - fn setup( - metadata: &ProjectMetadata, - system: &TestSystem, - check_paths: &[&SystemPath], - ) -> ProjectDatabase { +struct ProjectBenchmark<'a> { + project: SetupProject<'a>, + fs: MemoryFileSystem, + max_diagnostics: usize, +} + +impl<'a> ProjectBenchmark<'a> { + fn new(project: RealWorldProject<'a>, max_diagnostics: usize) -> Self { + let setup_project = project.setup().expect("Failed to setup project"); + let fs = setup_project + .copy_to_memory_fs() + .expect("Failed to copy project to memory fs"); + + Self { + project: setup_project, + fs, + max_diagnostics, + } + } + + fn setup_iteration(&self) -> ProjectDatabase { + let system = TestSystem::new(InMemorySystem::from_memory_fs(self.fs.clone())); + + let src_root = SystemPath::new("/"); + let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); + + metadata.apply_options(Options { + environment: Some(EnvironmentOptions { + python_version: Some(RangedValue::cli(self.project.config.python_version)), + python: (!self.project.config().dependencies.is_empty()) + .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), + ..EnvironmentOptions::default() + }), + ..Options::default() + }); + let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); db.project().set_included_paths( &mut db, - check_paths.iter().map(|path| path.to_path_buf()).collect(), + self.project + .check_paths() + .iter() + .map(|path| path.to_path_buf()) + .collect(), ); + db } +} + +#[track_caller] +fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) { + setup_rayon(); fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { let result = db.check(); @@ -377,79 +416,66 @@ fn bench_project(project: RealWorldProject, criterion: &mut Criterion, max_diagn ); } - setup_rayon(); - - let setup_project = project.setup().expect("Failed to setup project"); - - let fs = setup_project - .copy_to_memory_fs() - .expect("Failed to copy project to memory fs"); - let system = TestSystem::new(InMemorySystem::from_memory_fs(fs)); - - let src_root = SystemPath::new("/"); - let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap(); - - metadata.apply_options(Options { - environment: Some(EnvironmentOptions { - python_version: Some(RangedValue::cli(setup_project.config.python_version)), - python: (!setup_project.config().dependencies.is_empty()) - .then_some(RelativePathBuf::cli(SystemPath::new(".venv"))), - ..EnvironmentOptions::default() - }), - ..Options::default() - }); - - let check_paths = setup_project.check_paths(); - - criterion.bench_function(&format!("project[{}]", setup_project.config.name), |b| { + let mut group = criterion.benchmark_group("project"); + group.sampling_mode(criterion::SamplingMode::Flat); + group.bench_function(benchmark.project.config.name, |b| { b.iter_batched_ref( - || setup(&metadata, &system, check_paths), - |db| check_project(db, max_diagnostics), + || benchmark.setup_iteration(), + |db| check_project(db, benchmark.max_diagnostics), BatchSize::SmallInput, ); }); } fn hydra(criterion: &mut Criterion) { - let project = RealWorldProject { - name: "hydra-zen", - repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", - commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", - paths: vec![SystemPath::new("src")], - dependencies: vec!["pydantic", "beartype", "hydra-core"], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, - }; - - bench_project(project, criterion, 100); + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "hydra-zen", + repository: "https://github.com/mit-ll-responsible-ai/hydra-zen", + commit: "dd2b50a9614c6f8c46c5866f283c8f7e7a960aa8", + paths: vec![SystemPath::new("src")], + dependencies: vec!["pydantic", "beartype", "hydra-core"], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); } fn attrs(criterion: &mut Criterion) { - let project = RealWorldProject { - name: "attrs", - repository: "https://github.com/python-attrs/attrs", - commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", - paths: vec![SystemPath::new("src")], - dependencies: vec![], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, - }; - - bench_project(project, criterion, 100); + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "attrs", + repository: "https://github.com/python-attrs/attrs", + commit: "a6ae894aad9bc09edc7cdad8c416898784ceec9b", + paths: vec![SystemPath::new("src")], + dependencies: vec![], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); } fn anyio(criterion: &mut Criterion) { - let project = RealWorldProject { - name: "anyio", - repository: "https://github.com/agronholm/anyio", - commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", - paths: vec![SystemPath::new("src")], - dependencies: vec![], - max_dep_date: "2025-06-17", - python_version: PythonVersion::PY313, - }; - - bench_project(project, criterion, 100); + let benchmark = ProjectBenchmark::new( + RealWorldProject { + name: "anyio", + repository: "https://github.com/agronholm/anyio", + commit: "561d81270a12f7c6bbafb5bc5fad99a2a13f96be", + paths: vec![SystemPath::new("src")], + dependencies: vec![], + max_dep_date: "2025-06-17", + python_version: PythonVersion::PY313, + }, + 100, + ); + + bench_project(&benchmark, criterion); } criterion_group!(check_file, benchmark_cold, benchmark_incremental); diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 586534dde44c9..641842862ad1f 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -96,7 +96,7 @@ static ALTAIR: std::sync::LazyLock> = std::sync::LazyLock::ne max_dep_date: "2025-06-17", python_version: PythonVersion::PY312, }, - 13000, + 1000, ) }); @@ -203,8 +203,26 @@ static SYMPY: std::sync::LazyLock> = std::sync::LazyLock::new ) }); -#[bench(args=[&*ALTAIR, &*COLOUR_SCIENCE, &*FREQTRADE, &*PANDAS, &*PYDANTIC, &*SYMPY], sample_size=1, sample_count=3)] -fn bench_project(bencher: Bencher, benchmark: &Benchmark) { +#[bench(args=[&*ALTAIR, &*FREQTRADE, &*PYDANTIC], sample_size=2, sample_count=3)] +fn small(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); +} + +#[bench(args=[&*COLOUR_SCIENCE, &*PANDAS], sample_size=1, sample_count=3)] +fn medium(bencher: Bencher, benchmark: &Benchmark) { + bencher + .with_inputs(|| benchmark.setup_iteration()) + .bench_local_refs(|db| { + check_project(db, benchmark.max_diagnostics); + }); +} + +#[bench(args=[&*SYMPY], sample_size=1, sample_count=2)] +fn large(bencher: Bencher, benchmark: &Benchmark) { bencher .with_inputs(|| benchmark.setup_iteration()) .bench_local_refs(|db| { diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index 924785ca2da0a..9a32d1293ff71 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -237,6 +237,7 @@ impl Project { .map(IOErrorDiagnostic::to_diagnostic), ); + let check_start = std::time::Instant::now(); let file_diagnostics = std::sync::Mutex::new(vec![]); { @@ -262,6 +263,11 @@ impl Project { }); } + tracing::debug!( + "Checking all files took {:.3}s", + check_start.elapsed().as_secs_f64(), + ); + let mut file_diagnostics = file_diagnostics.into_inner().unwrap(); file_diagnostics.sort_by(|left, right| { left.rendering_sort_key(db) @@ -442,11 +448,16 @@ impl Project { let _entered = tracing::debug_span!("Project::index_files", project = %self.name(db)) .entered(); + let start = std::time::Instant::now(); let walker = ProjectFilesWalker::new(db); let (files, diagnostics) = walker.collect_set(db); - tracing::info!("Indexed {} file(s)", files.len()); + tracing::info!( + "Indexed {} file(s) in {:.3}s", + files.len(), + start.elapsed().as_secs_f64() + ); vacant.set(files, diagnostics) } Index::Indexed(indexed) => indexed, From 1bf86f31bd9923b7506d00f556b1c5f91bf664f6 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 09:57:04 +0200 Subject: [PATCH 25/28] Remove unnecessary clone --- crates/ruff_benchmark/benches/ty.rs | 2 +- crates/ruff_benchmark/benches/ty_walltime.rs | 2 +- crates/ty_project/src/metadata.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index cbc6f98a15a30..4a4568a967b86 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -384,7 +384,7 @@ impl<'a> ProjectBenchmark<'a> { ..Options::default() }); - let mut db = ProjectDatabase::new(metadata.clone(), system.clone()).unwrap(); + let mut db = ProjectDatabase::new(metadata, system.clone()).unwrap(); db.project().set_included_paths( &mut db, diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 641842862ad1f..4a25c3d637e18 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -42,7 +42,7 @@ impl<'a> Benchmark<'a> { ..Options::default() }); - let mut db = ProjectDatabase::new(metadata.clone(), system).unwrap(); + let mut db = ProjectDatabase::new(metadata, system).unwrap(); db.project().set_included_paths( &mut db, diff --git a/crates/ty_project/src/metadata.rs b/crates/ty_project/src/metadata.rs index 2c280a15a5b1a..47896e503113d 100644 --- a/crates/ty_project/src/metadata.rs +++ b/crates/ty_project/src/metadata.rs @@ -17,7 +17,7 @@ pub mod pyproject; pub mod settings; pub mod value; -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] #[cfg_attr(test, derive(serde::Serialize))] pub struct ProjectMetadata { pub(super) name: Name, From 7e82e5ad5261a02eb44d494c6e6ae88d061e3ce8 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 10:13:31 +0200 Subject: [PATCH 26/28] Disable multithreading once more, because it's slower over all --- crates/ruff_benchmark/benches/ty.rs | 6 +++--- crates/ruff_benchmark/benches/ty_walltime.rs | 21 +++++++++++--------- crates/ruff_db/src/lib.rs | 6 ++++++ crates/ty_project/src/lib.rs | 4 ++-- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index 4a4568a967b86..bc34f8579ea5c 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -384,7 +384,7 @@ impl<'a> ProjectBenchmark<'a> { ..Options::default() }); - let mut db = ProjectDatabase::new(metadata, system.clone()).unwrap(); + let mut db = ProjectDatabase::new(metadata, system).unwrap(); db.project().set_included_paths( &mut db, @@ -401,8 +401,6 @@ impl<'a> ProjectBenchmark<'a> { #[track_caller] fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) { - setup_rayon(); - fn check_project(db: &mut ProjectDatabase, max_diagnostics: usize) { let result = db.check(); let diagnostics = result.len(); @@ -416,6 +414,8 @@ fn bench_project(benchmark: &ProjectBenchmark, criterion: &mut Criterion) { ); } + setup_rayon(); + let mut group = criterion.benchmark_group("project"); group.sampling_mode(criterion::SamplingMode::Flat); group.bench_function(benchmark.project.config.name, |b| { diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 4a25c3d637e18..d0c1ed18f08b9 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -2,6 +2,7 @@ use std::fmt::{Display, Formatter}; use divan::{Bencher, bench}; +use rayon::ThreadPoolBuilder; use ruff_benchmark::real_world_projects::{RealWorldProject, SetupProject}; use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; @@ -236,18 +237,20 @@ fn main() { let _logging = setup_logging_with_filter(&filter).expect("Filter to be valid"); - // Pre-warm, (or "pre-cold"?) the salsa DB. Salsa has a fast-path - // for looking up ingredients but it only works if there's a single db. - // The slow-path is much slower, close to a 10x slowdown. Create - // a first db here, so that all further runs are not the first db, - // so that all hit the slow path (reduces noise). - // https://github.com/salsa-rs/salsa/issues/918 + // Disable multithreading for now due to + // https://github.com/salsa-rs/salsa/issues/918. + // + // Salsa has a fast-path for the first db when looking up ingredients. + // It seems that this fast-path becomes extremelly slow for all db's other + // than the first one, especially when using multithreading (10x slower than the first run). tracing::info!( "Pre-warm Salsa running Altair, see https://github.com/salsa-rs/salsa/issues/918" ); - let altair_db = ALTAIR.setup_iteration(); - check_project(&altair_db, 1000); - tracing::info!("Pre-warm completed"); + ThreadPoolBuilder::new() + .num_threads(1) + .use_current_thread() + .build_global() + .unwrap(); divan::main(); } diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs index dec4500c5d3e0..6172e1f484a12 100644 --- a/crates/ruff_db/src/lib.rs +++ b/crates/ruff_db/src/lib.rs @@ -18,6 +18,12 @@ pub mod system; pub mod testing; pub mod vendored; +#[cfg(not(target_arch = "wasm32"))] +pub use std::time::{Instant, SystemTime, SystemTimeError}; + +#[cfg(target_arch = "wasm32")] +pub use web_time::{Instant, SystemTime, SystemTimeError}; + pub type FxDashMap = dashmap::DashMap>; pub type FxDashSet = dashmap::DashSet>; diff --git a/crates/ty_project/src/lib.rs b/crates/ty_project/src/lib.rs index 9a32d1293ff71..f869b06bab290 100644 --- a/crates/ty_project/src/lib.rs +++ b/crates/ty_project/src/lib.rs @@ -237,7 +237,7 @@ impl Project { .map(IOErrorDiagnostic::to_diagnostic), ); - let check_start = std::time::Instant::now(); + let check_start = ruff_db::Instant::now(); let file_diagnostics = std::sync::Mutex::new(vec![]); { @@ -448,7 +448,7 @@ impl Project { let _entered = tracing::debug_span!("Project::index_files", project = %self.name(db)) .entered(); - let start = std::time::Instant::now(); + let start = ruff_db::Instant::now(); let walker = ProjectFilesWalker::new(db); let (files, diagnostics) = walker.collect_set(db); From 7327336cf55b283de026fe432f09110b4cd0aaf7 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 10:24:55 +0200 Subject: [PATCH 27/28] Pre-commit --- Cargo.toml | 12 ++++++++---- crates/ruff_benchmark/benches/ty_walltime.rs | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 60b73e6feccb8..32804e36ba13a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -168,7 +168,7 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features = "env-filter", "fmt", "ansi", - "smallvec", + "smallvec" ] } tryfn = { version = "0.2.1" } typed-arena = { version = "2.0.2" } @@ -178,7 +178,11 @@ unicode-width = { version = "0.2.0" } unicode_names2 = { version = "1.2.2" } unicode-normalization = { version = "0.1.23" } url = { version = "2.5.0" } -uuid = { version = "1.6.1", features = ["v4", "fast-rng", "macro-diagnostics"] } +uuid = { version = "1.6.1", features = [ + "v4", + "fast-rng", + "macro-diagnostics", +] } walkdir = { version = "2.3.2" } wasm-bindgen = { version = "0.2.92" } wasm-bindgen-test = { version = "0.3.42" } @@ -213,8 +217,8 @@ must_use_candidate = "allow" similar_names = "allow" single_match_else = "allow" too_many_lines = "allow" -needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. -unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. +needless_continue = "allow" # An explicit continue can be more readable, especially if the alternative is an empty block. +unnecessary_debug_formatting = "allow" # too many instances, the display also doesn't quote the path which is often desired in logs where we use them the most often. # Without the hashes we run into a `rustfmt` bug in some snapshot tests, see #13250 needless_raw_string_hashes = "allow" # Disallowed restriction lints diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index d0c1ed18f08b9..1b18091fbe12d 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -241,7 +241,7 @@ fn main() { // https://github.com/salsa-rs/salsa/issues/918. // // Salsa has a fast-path for the first db when looking up ingredients. - // It seems that this fast-path becomes extremelly slow for all db's other + // It seems that this fast-path becomes extremely slow for all db's other // than the first one, especially when using multithreading (10x slower than the first run). tracing::info!( "Pre-warm Salsa running Altair, see https://github.com/salsa-rs/salsa/issues/918" From 5cee3f0f38d95f64d409c18310bbf1b2c93f975b Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Wed, 18 Jun 2025 13:26:25 +0200 Subject: [PATCH 28/28] Code review feedback --- crates/ruff_benchmark/benches/ty.rs | 4 +- crates/ruff_benchmark/benches/ty_walltime.rs | 7 +- .../ruff_benchmark/src/real_world_projects.rs | 141 +++++++----------- 3 files changed, 58 insertions(+), 94 deletions(-) diff --git a/crates/ruff_benchmark/benches/ty.rs b/crates/ruff_benchmark/benches/ty.rs index bc34f8579ea5c..bdef551be75fe 100644 --- a/crates/ruff_benchmark/benches/ty.rs +++ b/crates/ruff_benchmark/benches/ty.rs @@ -1,6 +1,6 @@ #![allow(clippy::disallowed_names)] use ruff_benchmark::criterion; -use ruff_benchmark::real_world_projects::{RealWorldProject, SetupProject}; +use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject}; use std::ops::Range; @@ -349,7 +349,7 @@ fn benchmark_many_tuple_assignments(criterion: &mut Criterion) { } struct ProjectBenchmark<'a> { - project: SetupProject<'a>, + project: InstalledProject<'a>, fs: MemoryFileSystem, max_diagnostics: usize, } diff --git a/crates/ruff_benchmark/benches/ty_walltime.rs b/crates/ruff_benchmark/benches/ty_walltime.rs index 1b18091fbe12d..39029f450516d 100644 --- a/crates/ruff_benchmark/benches/ty_walltime.rs +++ b/crates/ruff_benchmark/benches/ty_walltime.rs @@ -3,7 +3,7 @@ use std::fmt::{Display, Formatter}; use divan::{Bencher, bench}; use rayon::ThreadPoolBuilder; -use ruff_benchmark::real_world_projects::{RealWorldProject, SetupProject}; +use ruff_benchmark::real_world_projects::{InstalledProject, RealWorldProject}; use ruff_db::system::{OsSystem, SystemPath, SystemPathBuf}; use ruff_db::testing::setup_logging_with_filter; @@ -13,7 +13,7 @@ use ty_project::metadata::value::{RangedValue, RelativePathBuf}; use ty_project::{Db, ProjectDatabase, ProjectMetadata}; struct Benchmark<'a> { - project: SetupProject<'a>, + project: InstalledProject<'a>, max_diagnostics: usize, } @@ -243,9 +243,6 @@ fn main() { // Salsa has a fast-path for the first db when looking up ingredients. // It seems that this fast-path becomes extremely slow for all db's other // than the first one, especially when using multithreading (10x slower than the first run). - tracing::info!( - "Pre-warm Salsa running Altair, see https://github.com/salsa-rs/salsa/issues/918" - ); ThreadPoolBuilder::new() .num_threads(1) .use_current_thread() diff --git a/crates/ruff_benchmark/src/real_world_projects.rs b/crates/ruff_benchmark/src/real_world_projects.rs index c7294aa2a2814..fd8536889a047 100644 --- a/crates/ruff_benchmark/src/real_world_projects.rs +++ b/crates/ruff_benchmark/src/real_world_projects.rs @@ -42,7 +42,7 @@ pub struct RealWorldProject<'a> { impl<'a> RealWorldProject<'a> { /// Setup a real-world project for benchmarking - pub fn setup(self) -> Result> { + pub fn setup(self) -> Result> { let start = Instant::now(); tracing::debug!("Setting up project {}", self.name); @@ -90,7 +90,7 @@ impl<'a> RealWorldProject<'a> { tracing::debug!("Project setup took: {:.2}s", start.elapsed().as_secs_f64()); - Ok(SetupProject { + Ok(InstalledProject { path: checkout.path, config: checkout.project, }) @@ -113,15 +113,15 @@ impl<'a> Checkout<'a> { } } -/// A setup real-world project ready for benchmarking -pub struct SetupProject<'a> { +/// Checked out project with its dependencies installed. +pub struct InstalledProject<'a> { /// Path to the cloned project pub path: PathBuf, /// Project configuration pub config: RealWorldProject<'a>, } -impl<'a> SetupProject<'a> { +impl<'a> InstalledProject<'a> { /// Get the project configuration pub fn config(&self) -> &RealWorldProject<'a> { &self.config @@ -165,43 +165,18 @@ fn get_project_cache_dir(project_name: &str) -> Result { /// Update an existing repository fn update_repository(project_root: &Path, commit: &str) -> Result<()> { - // Check if we already have the specific commit let output = Command::new("git") - .args(["cat-file", "-e", commit]) + .args(["fetch", "origin", commit]) .current_dir(project_root) .output() - .context("Failed to check if commit exists")?; + .context("Failed to execute git fetch command")?; - // If commit doesn't exist locally, fetch it if !output.status.success() { - let output = Command::new("git") - .args(["fetch", "origin", commit]) - .current_dir(project_root) - .output() - .context("Failed to execute git fetch command")?; - - if !output.status.success() { - anyhow::bail!( - "Git fetch of commit {} failed: {}", - commit, - String::from_utf8_lossy(&output.stderr) - ); - } - } - - // Check if we're already on the correct commit - let output = Command::new("git") - .args(["rev-parse", "HEAD"]) - .current_dir(project_root) - .output() - .context("Failed to get current commit")?; - - if output.status.success() { - let current_commit = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if current_commit == commit { - // Already on the correct commit, skip checkout - return Ok(()); - } + anyhow::bail!( + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); } // Checkout specific commit @@ -211,13 +186,12 @@ fn update_repository(project_root: &Path, commit: &str) -> Result<()> { .output() .context("Failed to execute git checkout command")?; - if !output.status.success() { - anyhow::bail!( - "Git checkout of commit {} failed: {}", - commit, - String::from_utf8_lossy(&output.stderr) - ); - } + anyhow::ensure!( + output.status.success(), + "Git checkout of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); Ok(()) } @@ -241,12 +215,11 @@ fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<( .output() .context("Failed to execute git clone command")?; - if !output.status.success() { - anyhow::bail!( - "Git clone failed: {}", - String::from_utf8_lossy(&output.stderr) - ); - } + anyhow::ensure!( + output.status.success(), + "Git clone failed: {}", + String::from_utf8_lossy(&output.stderr) + ); // Fetch the specific commit let output = Command::new("git") @@ -255,13 +228,12 @@ fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<( .output() .context("Failed to execute git fetch command")?; - if !output.status.success() { - anyhow::bail!( - "Git fetch of commit {} failed: {}", - commit, - String::from_utf8_lossy(&output.stderr) - ); - } + anyhow::ensure!( + output.status.success(), + "Git fetch of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); // Checkout the specific commit let output = Command::new("git") @@ -270,13 +242,12 @@ fn clone_repository(repo_url: &str, target_dir: &Path, commit: &str) -> Result<( .output() .context("Failed to execute git checkout command")?; - if !output.status.success() { - anyhow::bail!( - "Git checkout of commit {} failed: {}", - commit, - String::from_utf8_lossy(&output.stderr) - ); - } + anyhow::ensure!( + output.status.success(), + "Git checkout of commit {} failed: {}", + commit, + String::from_utf8_lossy(&output.stderr) + ); Ok(()) } @@ -290,28 +261,25 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { .context("Failed to execute uv version check.")?; if !uv_check.status.success() { - anyhow::bail!("uv is not installed or not found in PATH"); + anyhow::bail!( + "uv is not installed or not found in PATH. If you need to install it, follow the instructions at https://docs.astral.sh/uv/getting-started/installation/" + ); } - // Create an isolated virtual environment to avoid picking up ruff's pyproject.toml let venv_path = checkout.venv_path(); let python_version_str = checkout.project().python_version.to_string(); - // Only create venv if it doesn't exist - if !venv_path.exists() { - let output = Command::new("uv") - .args(["venv", "--python", &python_version_str]) - .arg(&venv_path) - .output() - .context("Failed to execute uv venv command")?; - - if !output.status.success() { - anyhow::bail!( - "Failed to create virtual environment: {}", - String::from_utf8_lossy(&output.stderr) - ); - } - } + let output = Command::new("uv") + .args(["venv", "--python", &python_version_str, "--allow-existing"]) + .arg(&venv_path) + .output() + .context("Failed to execute uv venv command")?; + + anyhow::ensure!( + output.status.success(), + "Failed to create virtual environment: {}", + String::from_utf8_lossy(&output.stderr) + ); // Install dependencies with date constraint in the isolated environment let mut cmd = Command::new("uv"); @@ -329,12 +297,11 @@ fn install_dependencies(checkout: &Checkout) -> Result<()> { .output() .context("Failed to execute uv pip install command")?; - if !output.status.success() { - anyhow::bail!( - "Dependency installation failed: {}", - String::from_utf8_lossy(&output.stderr) - ); - } + anyhow::ensure!( + output.status.success(), + "Dependency installation failed: {}", + String::from_utf8_lossy(&output.stderr) + ); Ok(()) }