diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 00f24aa5c5ba9..cbec1f0554c2e 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -278,12 +278,18 @@ mod tests { use indoc::indoc; use insta::assert_snapshot; use itertools::Itertools; + use sha2::Digest; use std::io::{BufReader, Read}; use tempfile::TempDir; use uv_fs::{copy_dir_all, relative_to}; - /// Test that source tree -> source dist -> wheel includes the right files and is stable and - /// deterministic in dependent of the build path. + /// Tests that builds are stable and include the right files and. + /// + /// Tests that both source tree -> source dist -> wheel and source tree -> wheel include the + /// right files. Also checks that the resulting archives are byte-by-byte identical + /// independent of the build path or platform, with the caveat that we cannot serialize an + /// executable bit on Window. This ensures reproducible builds and best-effort + /// platform-independent deterministic builds. #[test] fn built_by_uv_building() { let built_by_uv = Path::new("../../scripts/packages/built-by-uv"); @@ -309,6 +315,20 @@ mod tests { fs_err::copy(built_by_uv.join(dir), src.path().join(dir)).unwrap(); } + // Clear executable bit on Unix to build the same archive between Unix and Windows. + // This is a caveat to the determinism of the uv build backend: When a file has the + // executable in the source repository, it only has the executable bit on Unix, as Windows + // does not have the concept of the executable bit. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let path = src.path().join("scripts").join("whoami.sh"); + let metadata = fs_err::metadata(&path).unwrap(); + let mut perms = metadata.permissions(); + perms.set_mode(perms.mode() & !0o111); + fs_err::set_permissions(&path, perms).unwrap(); + } + // Add some files to be excluded let module_root = src.path().join("src").join("built_by_uv"); fs_err::create_dir_all(module_root.join("__pycache__")).unwrap(); @@ -336,10 +356,15 @@ mod tests { let source_dist_dir = TempDir::new().unwrap(); let (_name, source_dist_list_files) = list_source_dist(src.path(), "1.0.0+test").unwrap(); build_source_dist(src.path(), source_dist_dir.path(), "1.0.0+test").unwrap(); + let source_dist_path = source_dist_dir.path().join("built_by_uv-0.1.0.tar.gz"); + // Check that the source dist is reproducible across platforms. + assert_snapshot!( + format!("{:x}", sha2::Sha256::digest(fs_err::read(&source_dist_path).unwrap())), + @"dab46bcc4d66960a11cfdc19604512a8e1a3241a67536f7e962166760e9c575c" + ); // Build a wheel from the source dist let sdist_tree = TempDir::new().unwrap(); - let source_dist_path = source_dist_dir.path().join("built_by_uv-0.1.0.tar.gz"); let sdist_reader = BufReader::new(File::open(&source_dist_path).unwrap()); let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader)); let mut source_dist_contents: Vec<_> = source_dist @@ -419,10 +444,10 @@ mod tests { built_by_uv-0.1.0/third-party-licenses built_by_uv-0.1.0/third-party-licenses/PEP-401.txt "###); - assert_snapshot!(format_file_list(source_dist_list_files), @r###" + assert_snapshot!(format_file_list(source_dist_list_files), @r" + built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/LICENSE-APACHE (LICENSE-APACHE) built_by_uv-0.1.0/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/README.md (README.md) built_by_uv-0.1.0/assets/data.csv (assets/data.csv) built_by_uv-0.1.0/header/built_by_uv.h (header/built_by_uv.h) @@ -435,7 +460,7 @@ mod tests { built_by_uv-0.1.0/src/built_by_uv/build-only.h (src/built_by_uv/build-only.h) built_by_uv-0.1.0/src/built_by_uv/cli.py (src/built_by_uv/cli.py) built_by_uv-0.1.0/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) - "###); + "); assert_snapshot!(indirect_wheel_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r###" built_by_uv-0.1.0.data/data/ @@ -463,28 +488,34 @@ mod tests { built_by_uv/cli.py "###); - assert_snapshot!(format_file_list(wheel_list_files), @r###" - built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) - built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) - built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) - built_by_uv-0.1.0.dist-info/METADATA (generated) - built_by_uv-0.1.0.dist-info/WHEEL (generated) - built_by_uv-0.1.0.dist-info/entry_points.txt (generated) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) + assert_snapshot!(format_file_list(wheel_list_files), @r" built_by_uv/__init__.py (src/built_by_uv/__init__.py) built_by_uv/arithmetic/__init__.py (src/built_by_uv/arithmetic/__init__.py) built_by_uv/arithmetic/circle.py (src/built_by_uv/arithmetic/circle.py) built_by_uv/arithmetic/pi.txt (src/built_by_uv/arithmetic/pi.txt) built_by_uv/cli.py (src/built_by_uv/cli.py) - "###); + built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) + built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) + built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) + built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) + built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) + built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) + built_by_uv-0.1.0.dist-info/WHEEL (generated) + built_by_uv-0.1.0.dist-info/entry_points.txt (generated) + built_by_uv-0.1.0.dist-info/METADATA (generated) + "); - // Check that we write deterministic wheels. + // Check that the wheel is the same for both build paths and reproducible across platforms. let wheel_filename = "built_by_uv-0.1.0-py3-none-any.whl"; + let index_wheel_contents = + fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap(); assert_eq!( fs_err::read(direct_output_dir.path().join(wheel_filename)).unwrap(), - fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap() + index_wheel_contents + ); + assert_snapshot!( + format!("{:x}", sha2::Sha256::digest(&index_wheel_contents)), + @"ac3f68ac448023bca26de689d80401bff57f764396ae802bf4666234740ffbe3" ); } diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index 9436c5c19d8d3..b6c9f3c3eb630 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -410,14 +410,18 @@ impl PyProjectToml { } })?; - for entry in WalkDir::new(root).into_iter().filter_entry(|entry| { - license_globs.match_directory( - entry - .path() - .strip_prefix(root) - .expect("walkdir starts with root"), - ) - }) { + for entry in WalkDir::new(root) + .sort_by_file_name() + .into_iter() + .filter_entry(|entry| { + license_globs.match_directory( + entry + .path() + .strip_prefix(root) + .expect("walkdir starts with root"), + ) + }) + { let entry = entry.map_err(|err| Error::WalkDir { root: root.to_path_buf(), err, diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index ba519e89b2845..5fc3f32998446 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -54,8 +54,6 @@ pub fn list_source_dist( let mut files = FileList::new(); let writer = ListWriter::new(&mut files); write_source_dist(source_tree, writer, uv_version)?; - // Ensure a deterministic order even when file walking changes - files.sort_unstable(); Ok((filename, files)) } @@ -206,21 +204,25 @@ fn write_source_dist( let (include_matcher, exclude_matcher) = source_dist_matcher(&pyproject_toml, settings)?; let mut files_visited = 0; - for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| { - // TODO(konsti): This should be prettier. - let relative = entry - .path() - .strip_prefix(source_tree) - .expect("walkdir starts with root"); + for entry in WalkDir::new(source_tree) + .sort_by_file_name() + .into_iter() + .filter_entry(|entry| { + // TODO(konsti): This should be prettier. + let relative = entry + .path() + .strip_prefix(source_tree) + .expect("walkdir starts with root"); - // Fast path: Don't descend into a directory that can't be included. This is the most - // important performance optimization, it avoids descending into directories such as - // `.venv`. While walkdir is generally cheap, we still avoid traversing large data - // directories that often exist on the top level of a project. This is especially noticeable - // on network file systems with high latencies per operation (while contiguous reading may - // still be fast). - include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative) - }) { + // Fast path: Don't descend into a directory that can't be included. This is the most + // important performance optimization, it avoids descending into directories such as + // `.venv`. While walkdir is generally cheap, we still avoid traversing large data + // directories that often exist on the top level of a project. This is especially noticeable + // on network file systems with high latencies per operation (while contiguous reading may + // still be fast). + include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative) + }) + { let entry = entry.map_err(|err| Error::WalkDir { root: source_tree.to_path_buf(), err, @@ -305,15 +307,22 @@ impl DirectoryWriter for TarGzWriter { fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> { let metadata = fs_err::metadata(file)?; let mut header = Header::new_gnu(); + // Preserve the executable bit, especially for scripts #[cfg(unix)] - { - // Preserve for example an executable bit. - header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata)); - } + let executable_bit = { + use std::os::unix::fs::PermissionsExt; + file.metadata()?.permissions().mode() & 0o111 != 0 + }; + // Windows has no executable bit #[cfg(not(unix))] - { - // Reasonable default to avoid 0o000 permissions, the user's umask will be applied on - // unpacking. + let executable_bit = false; + + // Set reasonable defaults to avoid 0o000 permissions, while avoiding adding the exact + // filesystem permissions to the archive for reproducibility. Where applicable, the + // operating system filters the stored permission by the user's umask when unpacking. + if executable_bit { + header.set_mode(0o755); + } else { header.set_mode(0o644); } header.set_size(metadata.len()); diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs index 4fa0ca73cc4fa..3314694b4c9ec 100644 --- a/crates/uv-build-backend/src/wheel.rs +++ b/crates/uv-build-backend/src/wheel.rs @@ -87,8 +87,6 @@ pub fn list_wheel( let mut files = FileList::new(); let writer = ListWriter::new(&mut files); write_wheel(source_tree, &pyproject_toml, &filename, uv_version, writer)?; - // Ensure a deterministic order even when file walking changes - files.sort_unstable(); Ok((filename, files)) } @@ -136,6 +134,7 @@ fn write_wheel( let mut files_visited = 0; for entry in WalkDir::new(module_root) + .sort_by_file_name() .into_iter() .filter_entry(|entry| !exclude_matcher.is_match(entry.path())) { @@ -482,16 +481,20 @@ fn wheel_subdir_from_globs( wheel_writer.write_directory(target)?; - for entry in WalkDir::new(src).into_iter().filter_entry(|entry| { - // TODO(konsti): This should be prettier. - let relative = entry - .path() - .strip_prefix(src) - .expect("walkdir starts with root"); - - // Fast path: Don't descend into a directory that can't be included. - matcher.match_directory(relative) - }) { + for entry in WalkDir::new(src) + .sort_by_file_name() + .into_iter() + .filter_entry(|entry| { + // TODO(konsti): This should be prettier. + let relative = entry + .path() + .strip_prefix(src) + .expect("walkdir starts with root"); + + // Fast path: Don't descend into a directory that can't be included. + matcher.match_directory(relative) + }) + { let entry = entry.map_err(|err| Error::WalkDir { root: src.to_path_buf(), err, @@ -823,6 +826,7 @@ mod test { metadata(built_by_uv, metadata_dir.path(), "1.0.0+test").unwrap(); let mut files: Vec<_> = WalkDir::new(metadata_dir.path()) + .sort_by_file_name() .into_iter() .map(|entry| { entry diff --git a/crates/uv-globfilter/src/glob_dir_filter.rs b/crates/uv-globfilter/src/glob_dir_filter.rs index 9ed022fafe7c9..ef59b3cb860fe 100644 --- a/crates/uv-globfilter/src/glob_dir_filter.rs +++ b/crates/uv-globfilter/src/glob_dir_filter.rs @@ -174,7 +174,8 @@ mod tests { let matcher = GlobDirFilter::from_globs(&patterns).unwrap(); // Test the prefix filtering - let mut visited: Vec<_> = WalkDir::new(dir.path()) + let visited: Vec<_> = WalkDir::new(dir.path()) + .sort_by_file_name() .into_iter() .filter_entry(|entry| { let relative = entry @@ -196,7 +197,6 @@ mod tests { relative.replace(MAIN_SEPARATOR, "/") }) .collect(); - visited.sort(); assert_eq!( visited, [ @@ -234,6 +234,7 @@ mod tests { let walkdir_root = dir.path(); let mut matches: Vec<_> = WalkDir::new(walkdir_root) + .sort_by_file_name() .into_iter() .filter_entry(|entry| { // TODO(konsti): This should be prettier. diff --git a/crates/uv-globfilter/src/main.rs b/crates/uv-globfilter/src/main.rs index 32174d7803969..808dfb25fe92b 100644 --- a/crates/uv-globfilter/src/main.rs +++ b/crates/uv-globfilter/src/main.rs @@ -33,6 +33,7 @@ fn main() { let walkdir_root = args().next().unwrap(); for entry in WalkDir::new(&walkdir_root) + .sort_by_file_name() .into_iter() .filter_entry(|entry| { // TODO(konsti): This should be prettier. diff --git a/crates/uv/tests/it/build.rs b/crates/uv/tests/it/build.rs index ca7b82dc1cff2..732966eccc01d 100644 --- a/crates/uv/tests/it/build.rs +++ b/crates/uv/tests/it/build.rs @@ -1553,14 +1553,14 @@ fn build_list_files() -> Result<()> { .arg(&built_by_uv) .arg("--out-dir") .arg(context.temp_dir.join("output1")) - .arg("--list"), @r###" + .arg("--list"), @r" success: true exit_code: 0 ----- stdout ----- Building built_by_uv-0.1.0.tar.gz will include the following files: + built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/LICENSE-APACHE (LICENSE-APACHE) built_by_uv-0.1.0/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/README.md (README.md) built_by_uv-0.1.0/assets/data.csv (assets/data.csv) built_by_uv-0.1.0/header/built_by_uv.h (header/built_by_uv.h) @@ -1574,25 +1574,25 @@ fn build_list_files() -> Result<()> { built_by_uv-0.1.0/src/built_by_uv/cli.py (src/built_by_uv/cli.py) built_by_uv-0.1.0/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) Building built_by_uv-0.1.0-py3-none-any.whl will include the following files: - built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) - built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) - built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) - built_by_uv-0.1.0.dist-info/METADATA (generated) - built_by_uv-0.1.0.dist-info/WHEEL (generated) - built_by_uv-0.1.0.dist-info/entry_points.txt (generated) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) built_by_uv/__init__.py (src/built_by_uv/__init__.py) built_by_uv/arithmetic/__init__.py (src/built_by_uv/arithmetic/__init__.py) built_by_uv/arithmetic/circle.py (src/built_by_uv/arithmetic/circle.py) built_by_uv/arithmetic/pi.txt (src/built_by_uv/arithmetic/pi.txt) built_by_uv/cli.py (src/built_by_uv/cli.py) + built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) + built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) + built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) + built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) + built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) + built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) + built_by_uv-0.1.0.dist-info/WHEEL (generated) + built_by_uv-0.1.0.dist-info/entry_points.txt (generated) + built_by_uv-0.1.0.dist-info/METADATA (generated) ----- stderr ----- Building source distribution (uv build backend)... Successfully built output1/built_by_uv-0.1.0.tar.gz - "###); + "); context .temp_dir .child("output1") @@ -1611,14 +1611,14 @@ fn build_list_files() -> Result<()> { .arg(context.temp_dir.join("output2")) .arg("--list") .arg("--sdist") - .arg("--wheel"), @r###" + .arg("--wheel"), @r" success: true exit_code: 0 ----- stdout ----- Building built_by_uv-0.1.0.tar.gz will include the following files: + built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/LICENSE-APACHE (LICENSE-APACHE) built_by_uv-0.1.0/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0/PKG-INFO (generated) built_by_uv-0.1.0/README.md (README.md) built_by_uv-0.1.0/assets/data.csv (assets/data.csv) built_by_uv-0.1.0/header/built_by_uv.h (header/built_by_uv.h) @@ -1632,23 +1632,23 @@ fn build_list_files() -> Result<()> { built_by_uv-0.1.0/src/built_by_uv/cli.py (src/built_by_uv/cli.py) built_by_uv-0.1.0/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) Building built_by_uv-0.1.0-py3-none-any.whl will include the following files: - built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) - built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) - built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) - built_by_uv-0.1.0.dist-info/METADATA (generated) - built_by_uv-0.1.0.dist-info/WHEEL (generated) - built_by_uv-0.1.0.dist-info/entry_points.txt (generated) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) - built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) - built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) built_by_uv/__init__.py (src/built_by_uv/__init__.py) built_by_uv/arithmetic/__init__.py (src/built_by_uv/arithmetic/__init__.py) built_by_uv/arithmetic/circle.py (src/built_by_uv/arithmetic/circle.py) built_by_uv/arithmetic/pi.txt (src/built_by_uv/arithmetic/pi.txt) built_by_uv/cli.py (src/built_by_uv/cli.py) + built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE (LICENSE-APACHE) + built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT (LICENSE-MIT) + built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt (third-party-licenses/PEP-401.txt) + built_by_uv-0.1.0.data/headers/built_by_uv.h (header/built_by_uv.h) + built_by_uv-0.1.0.data/scripts/whoami.sh (scripts/whoami.sh) + built_by_uv-0.1.0.data/data/data.csv (assets/data.csv) + built_by_uv-0.1.0.dist-info/WHEEL (generated) + built_by_uv-0.1.0.dist-info/entry_points.txt (generated) + built_by_uv-0.1.0.dist-info/METADATA (generated) ----- stderr ----- - "###); + "); context .temp_dir .child("output2")