Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 50 additions & 19 deletions crates/uv-build-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,18 @@ mod tests {
use indoc::indoc;
use insta::assert_snapshot;
use itertools::Itertools;
use sha2::Digest;
use std::io::{BufReader, Read};
use tempfile::TempDir;
use uv_fs::{copy_dir_all, relative_to};

/// Test that source tree -> source dist -> wheel includes the right files and is stable and
/// deterministic in dependent of the build path.
/// Tests that builds are stable and include the right files and.
///
/// Tests that both source tree -> source dist -> wheel and source tree -> wheel include the
/// right files. Also checks that the resulting archives are byte-by-byte identical
/// independent of the build path or platform, with the caveat that we cannot serialize an
/// executable bit on Window. This ensures reproducible builds and best-effort
/// platform-independent deterministic builds.
#[test]
fn built_by_uv_building() {
let built_by_uv = Path::new("../../scripts/packages/built-by-uv");
Expand All @@ -309,6 +315,20 @@ mod tests {
fs_err::copy(built_by_uv.join(dir), src.path().join(dir)).unwrap();
}

// Clear executable bit on Unix to build the same archive between Unix and Windows.
// This is a caveat to the determinism of the uv build backend: When a file has the
// executable in the source repository, it only has the executable bit on Unix, as Windows
// does not have the concept of the executable bit.
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let path = src.path().join("scripts").join("whoami.sh");
let metadata = fs_err::metadata(&path).unwrap();
let mut perms = metadata.permissions();
perms.set_mode(perms.mode() & !0o111);
fs_err::set_permissions(&path, perms).unwrap();
}

// Add some files to be excluded
let module_root = src.path().join("src").join("built_by_uv");
fs_err::create_dir_all(module_root.join("__pycache__")).unwrap();
Expand Down Expand Up @@ -336,10 +356,15 @@ mod tests {
let source_dist_dir = TempDir::new().unwrap();
let (_name, source_dist_list_files) = list_source_dist(src.path(), "1.0.0+test").unwrap();
build_source_dist(src.path(), source_dist_dir.path(), "1.0.0+test").unwrap();
let source_dist_path = source_dist_dir.path().join("built_by_uv-0.1.0.tar.gz");
// Check that the source dist is reproducible across platforms.
assert_snapshot!(
format!("{:x}", sha2::Sha256::digest(fs_err::read(&source_dist_path).unwrap())),
@"dab46bcc4d66960a11cfdc19604512a8e1a3241a67536f7e962166760e9c575c"
);

// Build a wheel from the source dist
let sdist_tree = TempDir::new().unwrap();
let source_dist_path = source_dist_dir.path().join("built_by_uv-0.1.0.tar.gz");
let sdist_reader = BufReader::new(File::open(&source_dist_path).unwrap());
let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader));
let mut source_dist_contents: Vec<_> = source_dist
Expand Down Expand Up @@ -419,10 +444,10 @@ mod tests {
built_by_uv-0.1.0/third-party-licenses
built_by_uv-0.1.0/third-party-licenses/PEP-401.txt
"###);
assert_snapshot!(format_file_list(source_dist_list_files), @r###"
assert_snapshot!(format_file_list(source_dist_list_files), @r"
built_by_uv-0.1.0/PKG-INFO (generated)
built_by_uv-0.1.0/LICENSE-APACHE (LICENSE-APACHE)
built_by_uv-0.1.0/LICENSE-MIT (LICENSE-MIT)
built_by_uv-0.1.0/PKG-INFO (generated)
built_by_uv-0.1.0/README.md (README.md)
built_by_uv-0.1.0/assets/data.csv (assets/data.csv)
built_by_uv-0.1.0/header/built_by_uv.h (header/built_by_uv.h)
Expand All @@ -435,7 +460,7 @@ mod tests {
built_by_uv-0.1.0/src/built_by_uv/build-only.h (src/built_by_uv/build-only.h)
built_by_uv-0.1.0/src/built_by_uv/cli.py (src/built_by_uv/cli.py)
built_by_uv-0.1.0/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt)
"###);
");

assert_snapshot!(indirect_wheel_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r###"
built_by_uv-0.1.0.data/data/
Expand Down Expand Up @@ -463,28 +488,34 @@ mod tests {
built_by_uv/cli.py
"###);

assert_snapshot!(format_file_list(wheel_list_files), @r###"
built_by_uv-0.1.0.data/data/data.csv (assets/data.csv)
built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h)
built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh)
built_by_uv-0.1.0.dist-info/METADATA (generated)
built_by_uv-0.1.0.dist-info/WHEEL (generated)
built_by_uv-0.1.0.dist-info/entry_points.txt (generated)
built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE)
built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT)
built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt)
assert_snapshot!(format_file_list(wheel_list_files), @r"
built_by_uv/__init__.py (src/built_by_uv/__init__.py)
built_by_uv/arithmetic/__init__.py (src/built_by_uv/arithmetic/__init__.py)
built_by_uv/arithmetic/circle.py (src/built_by_uv/arithmetic/circle.py)
built_by_uv/arithmetic/pi.txt (src/built_by_uv/arithmetic/pi.txt)
built_by_uv/cli.py (src/built_by_uv/cli.py)
"###);
built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE)
built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT)
built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt)
built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h)
built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh)
built_by_uv-0.1.0.data/data/data.csv (assets/data.csv)
built_by_uv-0.1.0.dist-info/WHEEL (generated)
built_by_uv-0.1.0.dist-info/entry_points.txt (generated)
built_by_uv-0.1.0.dist-info/METADATA (generated)
");

// Check that we write deterministic wheels.
// Check that the wheel is the same for both build paths and reproducible across platforms.
let wheel_filename = "built_by_uv-0.1.0-py3-none-any.whl";
let index_wheel_contents =
fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap();
assert_eq!(
fs_err::read(direct_output_dir.path().join(wheel_filename)).unwrap(),
fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap()
index_wheel_contents
);
assert_snapshot!(
format!("{:x}", sha2::Sha256::digest(&index_wheel_contents)),
@"ac3f68ac448023bca26de689d80401bff57f764396ae802bf4666234740ffbe3"
);
}

Expand Down
20 changes: 12 additions & 8 deletions crates/uv-build-backend/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,14 +410,18 @@ impl PyProjectToml {
}
})?;

for entry in WalkDir::new(root).into_iter().filter_entry(|entry| {
license_globs.match_directory(
entry
.path()
.strip_prefix(root)
.expect("walkdir starts with root"),
)
}) {
for entry in WalkDir::new(root)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
license_globs.match_directory(
entry
.path()
.strip_prefix(root)
.expect("walkdir starts with root"),
)
})
{
let entry = entry.map_err(|err| Error::WalkDir {
root: root.to_path_buf(),
err,
Expand Down
55 changes: 32 additions & 23 deletions crates/uv-build-backend/src/source_dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ pub fn list_source_dist(
let mut files = FileList::new();
let writer = ListWriter::new(&mut files);
write_source_dist(source_tree, writer, uv_version)?;
// Ensure a deterministic order even when file walking changes
files.sort_unstable();
Ok((filename, files))
}

Expand Down Expand Up @@ -206,21 +204,25 @@ fn write_source_dist(
let (include_matcher, exclude_matcher) = source_dist_matcher(&pyproject_toml, settings)?;

let mut files_visited = 0;
for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
for entry in WalkDir::new(source_tree)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");

// Fast path: Don't descend into a directory that can't be included. This is the most
// important performance optimization, it avoids descending into directories such as
// `.venv`. While walkdir is generally cheap, we still avoid traversing large data
// directories that often exist on the top level of a project. This is especially noticeable
// on network file systems with high latencies per operation (while contiguous reading may
// still be fast).
include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative)
}) {
// Fast path: Don't descend into a directory that can't be included. This is the most
// important performance optimization, it avoids descending into directories such as
// `.venv`. While walkdir is generally cheap, we still avoid traversing large data
// directories that often exist on the top level of a project. This is especially noticeable
// on network file systems with high latencies per operation (while contiguous reading may
// still be fast).
include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative)
})
{
let entry = entry.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
Expand Down Expand Up @@ -305,15 +307,22 @@ impl DirectoryWriter for TarGzWriter {
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> {
let metadata = fs_err::metadata(file)?;
let mut header = Header::new_gnu();
// Preserve the executable bit, especially for scripts
#[cfg(unix)]
{
// Preserve for example an executable bit.
header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata));
}
let executable_bit = {
use std::os::unix::fs::PermissionsExt;
file.metadata()?.permissions().mode() & 0o111 != 0
};
// Windows has no executable bit
#[cfg(not(unix))]
{
// Reasonable default to avoid 0o000 permissions, the user's umask will be applied on
// unpacking.
let executable_bit = false;

// Set reasonable defaults to avoid 0o000 permissions, while avoiding adding the exact
// filesystem permissions to the archive for reproducibility. Where applicable, the
// operating system filters the stored permission by the user's umask when unpacking.
if executable_bit {
header.set_mode(0o755);
} else {
header.set_mode(0o644);
}
header.set_size(metadata.len());
Expand Down
28 changes: 16 additions & 12 deletions crates/uv-build-backend/src/wheel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ pub fn list_wheel(
let mut files = FileList::new();
let writer = ListWriter::new(&mut files);
write_wheel(source_tree, &pyproject_toml, &filename, uv_version, writer)?;
// Ensure a deterministic order even when file walking changes
files.sort_unstable();
Ok((filename, files))
}

Expand Down Expand Up @@ -136,6 +134,7 @@ fn write_wheel(

let mut files_visited = 0;
for entry in WalkDir::new(module_root)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| !exclude_matcher.is_match(entry.path()))
{
Expand Down Expand Up @@ -482,16 +481,20 @@ fn wheel_subdir_from_globs(

wheel_writer.write_directory(target)?;

for entry in WalkDir::new(src).into_iter().filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(src)
.expect("walkdir starts with root");

// Fast path: Don't descend into a directory that can't be included.
matcher.match_directory(relative)
}) {
for entry in WalkDir::new(src)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(src)
.expect("walkdir starts with root");

// Fast path: Don't descend into a directory that can't be included.
matcher.match_directory(relative)
})
{
let entry = entry.map_err(|err| Error::WalkDir {
root: src.to_path_buf(),
err,
Expand Down Expand Up @@ -823,6 +826,7 @@ mod test {
metadata(built_by_uv, metadata_dir.path(), "1.0.0+test").unwrap();

let mut files: Vec<_> = WalkDir::new(metadata_dir.path())
.sort_by_file_name()
.into_iter()
.map(|entry| {
entry
Expand Down
5 changes: 3 additions & 2 deletions crates/uv-globfilter/src/glob_dir_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ mod tests {
let matcher = GlobDirFilter::from_globs(&patterns).unwrap();

// Test the prefix filtering
let mut visited: Vec<_> = WalkDir::new(dir.path())
let visited: Vec<_> = WalkDir::new(dir.path())
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
let relative = entry
Expand All @@ -196,7 +197,6 @@ mod tests {
relative.replace(MAIN_SEPARATOR, "/")
})
.collect();
visited.sort();
assert_eq!(
visited,
[
Expand Down Expand Up @@ -234,6 +234,7 @@ mod tests {

let walkdir_root = dir.path();
let mut matches: Vec<_> = WalkDir::new(walkdir_root)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
// TODO(konsti): This should be prettier.
Expand Down
1 change: 1 addition & 0 deletions crates/uv-globfilter/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ fn main() {

let walkdir_root = args().next().unwrap();
for entry in WalkDir::new(&walkdir_root)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| {
// TODO(konsti): This should be prettier.
Expand Down
Loading
Loading