Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ http = "1.2.0"
http-cache-reqwest = "0.15.1"
human_bytes = "0.4.3"
humantime = "2.1.0"
ignore = "0.4.23"
indexmap = "2.7.1"
indicatif = "0.17.11"
insta = "1.42.1"
Expand Down Expand Up @@ -240,7 +239,6 @@ fs_extra = { workspace = true }
futures = { workspace = true }
human_bytes = { workspace = true }
humantime = { workspace = true }
ignore = { workspace = true }
indexmap = { workspace = true, features = ["serde"] }
indicatif = { workspace = true }
is_executable = { workspace = true }
Expand Down Expand Up @@ -298,6 +296,7 @@ pixi_uv_conversions = { workspace = true }
pypi_mapping = { workspace = true }
pypi_modifiers = { workspace = true }
rattler_virtual_packages = { workspace = true }
rayon = "1.10.0"
regex = { workspace = true }
reqwest = { workspace = true, features = [
"http2",
Expand Down
8 changes: 7 additions & 1 deletion crates/pixi_glob/src/glob_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ use itertools::{Either, Itertools};
use thiserror::Error;
use wax::{Glob, WalkEntry};

pub(crate) struct GlobSet<'t> {
/// A set of globs to include and exclude from a directory.
pub struct GlobSet<'t> {
/// The globs to include in the filter.
pub include: Vec<Glob<'t>>,
/// The globs to exclude from the filter.
pub exclude: Vec<Glob<'t>>,
}

#[derive(Error, Debug)]
#[allow(missing_docs)]
pub enum GlobSetError {
#[error("failed to access {}", .0.display())]
Io(PathBuf, #[source] io::Error),
Expand All @@ -30,6 +32,10 @@ pub enum GlobSetError {
}

impl<'t> GlobSet<'t> {
/// Create a new `GlobSet` from a list of globs.
///
/// The globs are split into inclusion and exclusion globs based on whether they
/// start with `!`.
pub fn create(globs: impl IntoIterator<Item = &'t str>) -> Result<GlobSet<'t>, GlobSetError> {
// Split the globs into inclusion and exclusion globs based on whether they
// start with `!`.
Expand Down
1 change: 1 addition & 0 deletions crates/pixi_glob/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ mod glob_set;
pub use glob_hash::{GlobHash, GlobHashError};
pub use glob_hash_cache::{GlobHashCache, GlobHashKey};
pub use glob_mtime::{GlobModificationTime, GlobModificationTimeError};
pub use glob_set::{GlobSet, GlobSetError};
2 changes: 1 addition & 1 deletion pixi_docs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

97 changes: 43 additions & 54 deletions src/task/file_hashes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,29 @@
//! The main entry-point to compute the hashes of all files in a directory is the
//! [`FileHashes::from_files`] method.

use ignore::{overrides::OverrideBuilder, WalkBuilder};
use itertools::Itertools;
use std::hash::Hash;
use pixi_glob::{GlobSet, GlobSetError};
use rayon::prelude::*;
use std::sync::LazyLock;
use std::{
clone::Clone,
collections::HashMap,
fmt::Debug,
fs::File,
hash::Hasher,
hash::{Hash, Hasher},
io::{BufRead, BufReader},
path::{Path, PathBuf},
sync::Arc,
};
use thiserror::Error;
use tokio::task::JoinError;
use uv_configuration::RAYON_INITIALIZE;
use xxhash_rust::xxh3::Xxh3;

#[derive(Debug, Error)]
pub enum FileHashesError {
#[error(transparent)]
WalkError(#[from] ignore::Error),
WalkError(#[from] GlobSetError),

#[error("I/O error while reading file {0}")]
IoError(PathBuf, #[source] std::io::Error),
Expand Down Expand Up @@ -72,75 +77,59 @@ impl FileHashes {
}

// Construct the custom filter
let mut ignore_builder = OverrideBuilder::new(root);
let mut ignore_builder = vec![];
for ignore_line in filters {
let path = root.join(ignore_line.as_ref());
let mut pat = if ignore_line.as_ref().ends_with('/') {
let pat = if ignore_line.as_ref().ends_with('/') {
format!("{}**", ignore_line.as_ref())
} else if path.exists() && path.is_dir() {
format!("{}/**", ignore_line.as_ref())
} else {
ignore_line.as_ref().to_owned()
};

if pat.starts_with('!') && !pat.starts_with("!/") {
// make sure there is a `/` at the 2nd place so that the pattern reads
// `!/**/lib.rs` instead of `!**/lib.rs`
pat.insert(1, '/');
} else {
// Same for the others, make sure they start in the right folder
if !pat.starts_with('/') {
pat.insert(0, '/');
}
}
ignore_builder.add(&pat)?;
ignore_builder.push(pat);
}

let filter = ignore_builder.build()?;
let glob = GlobSet::create(ignore_builder.iter().map(|s| s.as_str()))?;

// Spawn a thread that will collect the results from a channel.
let (tx, rx) = crossbeam_channel::bounded(100);
let collect_handle =
tokio::task::spawn_blocking(move || rx.iter().collect::<Result<HashMap<_, _>, _>>());

// Iterate over all entries in parallel and send them over a channel to the collection thread.
let collect_root = root.to_owned();
WalkBuilder::new(root)
.overrides(filter)
.hidden(false)
.git_ignore(false)
.git_global(false)
.git_exclude(false)
// Turn this back off as it can cause issues with symlinks:
// https://github.com/prefix-dev/pixi/issues/2196
// TODO: The current idea is to completely reimplement this without the `ignore` crate.
// .follow_links(true)
.build_parallel()
.run(|| {
let tx = tx.clone();
let collect_root = collect_root.clone();
Box::new(move |entry| {
let result = match entry {
Ok(entry) if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) => {
return ignore::WalkState::Continue;
}
Ok(entry) => compute_file_hash(entry.path()).map(|hash| {
let path = entry
.path()
.strip_prefix(&collect_root)
.expect("path is not prefixed by the root");
tracing::info!("Added hash for file: {:?}", path);
(path.to_owned(), hash)
}),
Err(e) => Err(FileHashesError::from(e)),
};
match (result.is_err(), tx.send(result)) {
(true, _) => ignore::WalkState::Quit,
(_, Err(_)) => ignore::WalkState::Quit,
_ => ignore::WalkState::Continue,
}
let collect_root = Arc::new(root.to_owned());

// Collect all entries first to avoid holding lock during iteration
let entries: Vec<_> = glob.filter_directory(root).collect::<Result<Vec<_>, _>>()?;

// Force the initialization of the rayon thread pool to avoid implicit creation
// by the Installer.
LazyLock::force(&RAYON_INITIALIZE);

// Process entries in parallel using rayon
entries.into_par_iter().for_each(|entry| {
let tx = tx.clone();
let collect_root = Arc::clone(&collect_root);

let result: Result<(PathBuf, String), FileHashesError> = if entry.file_type().is_dir() {
// Skip directories
return;
} else {
compute_file_hash(entry.path()).map(|hash| {
let path = entry
.path()
.strip_prefix(&*collect_root)
.expect("path is not prefixed by the root");
tracing::info!("Added hash for file: {:?}", path);
(path.to_owned(), hash)
})
});
};

// Send result to channel - if it fails, we just continue with the next item
let _ = tx.send(result);
});

// Drop the local handle to the channel. This will close the channel which in turn will
// cause the collection thread to finish which allows us to join without deadlocking.
Expand Down
Loading