From f633a9006917e18f370fdb285cab41f4162b8bd6 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 23 Jul 2024 13:15:19 -0400 Subject: [PATCH 1/2] Add --extracted to cache prune --- crates/uv-cache/src/lib.rs | 30 ++++++++++++++- crates/uv-cli/src/lib.rs | 16 +++++++- crates/uv/src/commands/cache_prune.rs | 8 +++- crates/uv/src/lib.rs | 7 +++- crates/uv/tests/cache_prune.rs | 54 ++++++++++++++++++++++++++- 5 files changed, 108 insertions(+), 7 deletions(-) diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index dbc742f1485f..bf22963dda72 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -340,7 +340,7 @@ impl Cache { } /// Run the garbage collector on the cache, removing any dangling entries. - pub fn prune(&self) -> Result { + pub fn prune(&self, all_unzipped: bool) -> Result { let mut summary = Removal::default(); // First, remove any top-level directories that are unused. These typically represent @@ -386,6 +386,34 @@ impl Cache { Err(err) => return Err(err), } + // Third, if enabled, remove all unzipped wheels, leaving only the wheel archives. + if all_unzipped { + // Remove the entire pre-built wheel cache, since every entry is an unzipped wheel. + match fs::read_dir(self.bucket(CacheBucket::Wheels)) { + Ok(entries) => { + for entry in entries { + let entry = entry?; + let path = fs_err::canonicalize(entry.path())?; + if path.is_dir() { + debug!("Removing unzipped wheel entry: {}", path.display()); + summary += rm_rf(path)?; + } + } + } + Err(err) if err.kind() == io::ErrorKind::NotFound => (), + Err(err) => return Err(err), + } + + // Remove any unzipped wheels (i.e., symlinks) from the built wheels cache. + for entry in walkdir::WalkDir::new(self.bucket(CacheBucket::SourceDistributions)) { + let entry = entry?; + if entry.file_type().is_symlink() { + debug!("Removing unzipped wheel entry: {}", entry.path().display()); + summary += rm_rf(entry.path())?; + } + } + } + // Third, remove any unused archives (by searching for archives that are not symlinked). // TODO(charlie): Remove any unused source distributions. This requires introspecting the // cache contents, e.g., reading and deserializing the manifests. diff --git a/crates/uv-cli/src/lib.rs b/crates/uv-cli/src/lib.rs index aff0af791c3e..50be1b8e4d60 100644 --- a/crates/uv-cli/src/lib.rs +++ b/crates/uv-cli/src/lib.rs @@ -279,7 +279,7 @@ pub enum CacheCommand { /// Clear the cache, removing all entries or those linked to specific packages. Clean(CleanArgs), /// Prune all unreachable objects from the cache. - Prune, + Prune(PruneArgs), /// Show the cache directory. Dir, } @@ -291,6 +291,20 @@ pub struct CleanArgs { pub package: Vec, } +#[derive(Args, Debug)] +#[allow(clippy::struct_excessive_bools)] +pub struct PruneArgs { + /// Whether to remove unzipped wheels from the cache, leaving only zipped wheel entries. + /// + /// By default, uv stores unzipped wheels in the cache, which enables high-performance package + /// installation. In some scenarios, though, persisting unzipped wheels may be undesirable. For + /// example, in GitHub Actions or other CI environments, uploading unzipped wheels to a remote + /// cache may have a negative impact on cache performance. Pruning unzipped wheels will leave + /// the cache with any built wheels in their zipped form. + #[arg(long)] + pub all_unzipped: bool, +} + #[derive(Args)] #[allow(clippy::struct_excessive_bools)] pub struct PipNamespace { diff --git a/crates/uv/src/commands/cache_prune.rs b/crates/uv/src/commands/cache_prune.rs index c5c4b60d91fd..3de1eef3c83d 100644 --- a/crates/uv/src/commands/cache_prune.rs +++ b/crates/uv/src/commands/cache_prune.rs @@ -10,7 +10,11 @@ use crate::commands::{human_readable_bytes, ExitStatus}; use crate::printer::Printer; /// Prune all unreachable objects from the cache. -pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result { +pub(crate) fn cache_prune( + all_unzipped: bool, + cache: &Cache, + printer: Printer, +) -> Result { if !cache.root().exists() { writeln!( printer.stderr(), @@ -27,7 +31,7 @@ pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result )?; let summary = cache - .prune() + .prune(all_unzipped) .with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?; // Write a summary of the number of files and directories removed. diff --git a/crates/uv/src/lib.rs b/crates/uv/src/lib.rs index f35ab03f949b..9e6e953bf574 100644 --- a/crates/uv/src/lib.rs +++ b/crates/uv/src/lib.rs @@ -538,8 +538,11 @@ async fn run(cli: Cli) -> Result { commands::cache_clean(&args.package, &cache, printer) } Commands::Cache(CacheNamespace { - command: CacheCommand::Prune, - }) => commands::cache_prune(&cache, printer), + command: CacheCommand::Prune(args), + }) => { + show_settings!(args); + commands::cache_prune(args.all_unzipped, &cache, printer) + } Commands::Cache(CacheNamespace { command: CacheCommand::Dir, }) => { diff --git a/crates/uv/tests/cache_prune.rs b/crates/uv/tests/cache_prune.rs index 81166b013dbf..d121cdafa345 100644 --- a/crates/uv/tests/cache_prune.rs +++ b/crates/uv/tests/cache_prune.rs @@ -3,8 +3,8 @@ use anyhow::Result; use assert_cmd::prelude::*; use assert_fs::prelude::*; - use common::uv_snapshot; +use indoc::indoc; use crate::common::TestContext; @@ -171,3 +171,55 @@ fn prune_stale_symlink() -> Result<()> { Ok(()) } + +/// `cache prune --all-unzips` should remove all unzipped archives. +#[test] +fn prune_unzipped() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(indoc! { r" + source-distribution==0.0.1 + " })?; + + // Install a requirement, to populate the cache. + uv_snapshot!(context.filters(), context.pip_sync().env_remove("UV_EXCLUDE_NEWER").arg("requirements.txt").arg("--reinstall"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Installed 1 package in [TIME] + + source-distribution==0.0.1 + "###); + + uv_snapshot!(context.filters(), context.prune().arg("--all-unzipped"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Pruning cache at: [CACHE_DIR]/ + Removed 151 files ([SIZE]) + "###); + + // Reinstalling the source distribution should not require re-downloading the source + // distribution. + uv_snapshot!(context.filters(), context.pip_sync().env_remove("UV_EXCLUDE_NEWER").arg("requirements.txt").arg("--reinstall").arg("--offline"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - source-distribution==0.0.1 + + source-distribution==0.0.1 + "###); + + Ok(()) +} From 7ba49f2463057e2ec2f040b9cccbd113a63ad779 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 23 Jul 2024 23:03:20 -0400 Subject: [PATCH 2/2] Junction --- crates/uv-cache/src/lib.rs | 4 ++-- crates/uv-cache/src/removal.rs | 9 +++++++-- crates/uv-cli/src/lib.rs | 22 ++++++++++++++-------- crates/uv/src/commands/cache_prune.rs | 8 ++------ crates/uv/src/lib.rs | 2 +- crates/uv/tests/cache_prune.rs | 4 ++-- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index bf22963dda72..457a0513ee13 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -340,7 +340,7 @@ impl Cache { } /// Run the garbage collector on the cache, removing any dangling entries. - pub fn prune(&self, all_unzipped: bool) -> Result { + pub fn prune(&self, ci: bool) -> Result { let mut summary = Removal::default(); // First, remove any top-level directories that are unused. These typically represent @@ -387,7 +387,7 @@ impl Cache { } // Third, if enabled, remove all unzipped wheels, leaving only the wheel archives. - if all_unzipped { + if ci { // Remove the entire pre-built wheel cache, since every entry is an unzipped wheel. match fs::read_dir(self.bucket(CacheBucket::Wheels)) { Ok(entries) => { diff --git a/crates/uv-cache/src/removal.rs b/crates/uv-cache/src/removal.rs index c54ce81b5aef..156f48a3832b 100644 --- a/crates/uv-cache/src/removal.rs +++ b/crates/uv-cache/src/removal.rs @@ -40,7 +40,12 @@ impl Removal { // Remove the file. self.total_bytes += metadata.len(); - remove_file(path)?; + if cfg!(windows) && metadata.is_symlink() { + // Remove the junction. + remove_dir(path)?; + } else { + remove_file(path)?; + } return Ok(()); } @@ -64,7 +69,7 @@ impl Removal { let entry = entry?; if cfg!(windows) && entry.file_type().is_symlink() { - // In this branch, we try to handle junction removal. + // Remove the junction. self.num_files += 1; remove_dir(entry.path())?; } else if entry.file_type().is_dir() { diff --git a/crates/uv-cli/src/lib.rs b/crates/uv-cli/src/lib.rs index 50be1b8e4d60..a79e747535ff 100644 --- a/crates/uv-cli/src/lib.rs +++ b/crates/uv-cli/src/lib.rs @@ -294,15 +294,21 @@ pub struct CleanArgs { #[derive(Args, Debug)] #[allow(clippy::struct_excessive_bools)] pub struct PruneArgs { - /// Whether to remove unzipped wheels from the cache, leaving only zipped wheel entries. - /// - /// By default, uv stores unzipped wheels in the cache, which enables high-performance package - /// installation. In some scenarios, though, persisting unzipped wheels may be undesirable. For - /// example, in GitHub Actions or other CI environments, uploading unzipped wheels to a remote - /// cache may have a negative impact on cache performance. Pruning unzipped wheels will leave - /// the cache with any built wheels in their zipped form. + /// Optimize the cache for persistence in a continuous integration environment, like GitHub + /// Actions. + /// + /// By default, uv caches both the wheels that it builds from source and the pre-built wheels + /// that it downloads directly, to enable high-performance package installation. In some + /// scenarios, though, persisting pre-built wheels may be undesirable. For example, in GitHub + /// Actions, it's faster to omit pre-built wheels from the cache and instead have re-download + /// them on each run. However, it typically _is_ faster to cache wheels that are built from + /// source, since the wheel building process can be expensive, especially for extension + /// modules. + /// + /// In `--ci` mode, uv will prune any pre-built wheels from the cache, but retain any wheels + /// that were built from source. #[arg(long)] - pub all_unzipped: bool, + pub ci: bool, } #[derive(Args)] diff --git a/crates/uv/src/commands/cache_prune.rs b/crates/uv/src/commands/cache_prune.rs index 3de1eef3c83d..8a21b2cb7000 100644 --- a/crates/uv/src/commands/cache_prune.rs +++ b/crates/uv/src/commands/cache_prune.rs @@ -10,11 +10,7 @@ use crate::commands::{human_readable_bytes, ExitStatus}; use crate::printer::Printer; /// Prune all unreachable objects from the cache. -pub(crate) fn cache_prune( - all_unzipped: bool, - cache: &Cache, - printer: Printer, -) -> Result { +pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result { if !cache.root().exists() { writeln!( printer.stderr(), @@ -31,7 +27,7 @@ pub(crate) fn cache_prune( )?; let summary = cache - .prune(all_unzipped) + .prune(ci) .with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?; // Write a summary of the number of files and directories removed. diff --git a/crates/uv/src/lib.rs b/crates/uv/src/lib.rs index 9e6e953bf574..dc7c06a535e6 100644 --- a/crates/uv/src/lib.rs +++ b/crates/uv/src/lib.rs @@ -541,7 +541,7 @@ async fn run(cli: Cli) -> Result { command: CacheCommand::Prune(args), }) => { show_settings!(args); - commands::cache_prune(args.all_unzipped, &cache, printer) + commands::cache_prune(args.ci, &cache, printer) } Commands::Cache(CacheNamespace { command: CacheCommand::Dir, diff --git a/crates/uv/tests/cache_prune.rs b/crates/uv/tests/cache_prune.rs index d121cdafa345..24471a806cf9 100644 --- a/crates/uv/tests/cache_prune.rs +++ b/crates/uv/tests/cache_prune.rs @@ -172,7 +172,7 @@ fn prune_stale_symlink() -> Result<()> { Ok(()) } -/// `cache prune --all-unzips` should remove all unzipped archives. +/// `cache prune --ci` should remove all unzipped archives. #[test] fn prune_unzipped() -> Result<()> { let context = TestContext::new("3.12"); @@ -195,7 +195,7 @@ fn prune_unzipped() -> Result<()> { + source-distribution==0.0.1 "###); - uv_snapshot!(context.filters(), context.prune().arg("--all-unzipped"), @r###" + uv_snapshot!(context.filters(), context.prune().arg("--ci"), @r###" success: true exit_code: 0 ----- stdout -----