Skip to content

Commit

Permalink
Add --ci mode to uv cache prune (#5391)
Browse files Browse the repository at this point in the history
## Summary

Users can now run `uv cache prune --ci` (open to feedback on the name of
that flag) to remove all pre-built wheels from the cache, leaving behind
zipped, built wheels (which tend to be the most expensive assets to
re-create). This should greatly increase cache performance in CI
environments, since uploading unzipped wheels can actually hurt
performance if you're persisting the uv cache.

Closes #5282.
  • Loading branch information
charliermarsh authored Jul 24, 2024
1 parent fff3a7d commit 4bc04f9
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 9 deletions.
30 changes: 29 additions & 1 deletion crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ impl Cache {
}

/// Run the garbage collector on the cache, removing any dangling entries.
pub fn prune(&self) -> Result<Removal, io::Error> {
pub fn prune(&self, ci: bool) -> Result<Removal, io::Error> {
let mut summary = Removal::default();

// First, remove any top-level directories that are unused. These typically represent
Expand Down Expand Up @@ -386,6 +386,34 @@ impl Cache {
Err(err) => return Err(err),
}

// Third, if enabled, remove all unzipped wheels, leaving only the wheel archives.
if ci {
// Remove the entire pre-built wheel cache, since every entry is an unzipped wheel.
match fs::read_dir(self.bucket(CacheBucket::Wheels)) {
Ok(entries) => {
for entry in entries {
let entry = entry?;
let path = fs_err::canonicalize(entry.path())?;
if path.is_dir() {
debug!("Removing unzipped wheel entry: {}", path.display());
summary += rm_rf(path)?;
}
}
}
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
Err(err) => return Err(err),
}

// Remove any unzipped wheels (i.e., symlinks) from the built wheels cache.
for entry in walkdir::WalkDir::new(self.bucket(CacheBucket::SourceDistributions)) {
let entry = entry?;
if entry.file_type().is_symlink() {
debug!("Removing unzipped wheel entry: {}", entry.path().display());
summary += rm_rf(entry.path())?;
}
}
}

// Third, remove any unused archives (by searching for archives that are not symlinked).
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
// cache contents, e.g., reading and deserializing the manifests.
Expand Down
9 changes: 7 additions & 2 deletions crates/uv-cache/src/removal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ impl Removal {

// Remove the file.
self.total_bytes += metadata.len();
remove_file(path)?;
if cfg!(windows) && metadata.is_symlink() {
// Remove the junction.
remove_dir(path)?;
} else {
remove_file(path)?;
}

return Ok(());
}
Expand All @@ -64,7 +69,7 @@ impl Removal {

let entry = entry?;
if cfg!(windows) && entry.file_type().is_symlink() {
// In this branch, we try to handle junction removal.
// Remove the junction.
self.num_files += 1;
remove_dir(entry.path())?;
} else if entry.file_type().is_dir() {
Expand Down
22 changes: 21 additions & 1 deletion crates/uv-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ pub enum CacheCommand {
/// Clear the cache, removing all entries or those linked to specific packages.
Clean(CleanArgs),
/// Prune all unreachable objects from the cache.
Prune,
Prune(PruneArgs),
/// Show the cache directory.
Dir,
}
Expand All @@ -291,6 +291,26 @@ pub struct CleanArgs {
pub package: Vec<PackageName>,
}

#[derive(Args, Debug)]
#[allow(clippy::struct_excessive_bools)]
pub struct PruneArgs {
/// Optimize the cache for persistence in a continuous integration environment, like GitHub
/// Actions.
///
/// By default, uv caches both the wheels that it builds from source and the pre-built wheels
/// that it downloads directly, to enable high-performance package installation. In some
/// scenarios, though, persisting pre-built wheels may be undesirable. For example, in GitHub
/// Actions, it's faster to omit pre-built wheels from the cache and instead have re-download
/// them on each run. However, it typically _is_ faster to cache wheels that are built from
/// source, since the wheel building process can be expensive, especially for extension
/// modules.
///
/// In `--ci` mode, uv will prune any pre-built wheels from the cache, but retain any wheels
/// that were built from source.
#[arg(long)]
pub ci: bool,
}

#[derive(Args)]
#[allow(clippy::struct_excessive_bools)]
pub struct PipNamespace {
Expand Down
4 changes: 2 additions & 2 deletions crates/uv/src/commands/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::commands::{human_readable_bytes, ExitStatus};
use crate::printer::Printer;

/// Prune all unreachable objects from the cache.
pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result<ExitStatus> {
pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result<ExitStatus> {
if !cache.root().exists() {
writeln!(
printer.stderr(),
Expand All @@ -27,7 +27,7 @@ pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result<ExitStatus>
)?;

let summary = cache
.prune()
.prune(ci)
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

// Write a summary of the number of files and directories removed.
Expand Down
7 changes: 5 additions & 2 deletions crates/uv/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,8 +538,11 @@ async fn run(cli: Cli) -> Result<ExitStatus> {
commands::cache_clean(&args.package, &cache, printer)
}
Commands::Cache(CacheNamespace {
command: CacheCommand::Prune,
}) => commands::cache_prune(&cache, printer),
command: CacheCommand::Prune(args),
}) => {
show_settings!(args);
commands::cache_prune(args.ci, &cache, printer)
}
Commands::Cache(CacheNamespace {
command: CacheCommand::Dir,
}) => {
Expand Down
54 changes: 53 additions & 1 deletion crates/uv/tests/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
use anyhow::Result;
use assert_cmd::prelude::*;
use assert_fs::prelude::*;

use common::uv_snapshot;
use indoc::indoc;

use crate::common::TestContext;

Expand Down Expand Up @@ -171,3 +171,55 @@ fn prune_stale_symlink() -> Result<()> {

Ok(())
}

/// `cache prune --ci` should remove all unzipped archives.
#[test]
fn prune_unzipped() -> Result<()> {
let context = TestContext::new("3.12");

let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt.write_str(indoc! { r"
source-distribution==0.0.1
" })?;

// Install a requirement, to populate the cache.
uv_snapshot!(context.filters(), context.pip_sync().env_remove("UV_EXCLUDE_NEWER").arg("requirements.txt").arg("--reinstall"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Installed 1 package in [TIME]
+ source-distribution==0.0.1
"###);

uv_snapshot!(context.filters(), context.prune().arg("--ci"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Pruning cache at: [CACHE_DIR]/
Removed 151 files ([SIZE])
"###);

// Reinstalling the source distribution should not require re-downloading the source
// distribution.
uv_snapshot!(context.filters(), context.pip_sync().env_remove("UV_EXCLUDE_NEWER").arg("requirements.txt").arg("--reinstall").arg("--offline"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
- source-distribution==0.0.1
+ source-distribution==0.0.1
"###);

Ok(())
}

0 comments on commit 4bc04f9

Please sign in to comment.