From 69ba77b9a1a2bed49e956f6aa1869795608c3bbc Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 20 Jan 2026 14:55:01 +0100 Subject: [PATCH 1/5] Better detection for conflicting packages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the previous iteration, conflict detection was based on top level modules. This would work if all namespace packages correctly omitted the `__init__.py`, but e.g. the nvidia packages include an empty `nvida/__init__.py`. Instead, we track overlapping top level modules during installation and perform conflict analysis after installation, recursing only as far as necessary. Before: ``` $ uv venv -c -q && uv pip install --preview nvidia-nvjitlink-cu12==12.8.93 nvidia-nvtx-cu12==12.8.90 Resolved 2 packages in 0.99ms ░░░░░░░░░░░░░░░░░░░░ [0/2] Installing wheels... warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-nvtx-cu12` (v12.8.90) or `nvidia-nvjitlink-cu12` (v12.8.93). Installed 2 packages in 3ms + nvidia-nvjitlink-cu12==12.8.93 + nvidia-nvtx-cu12==12.8.90 ``` After: ``` $ uv venv -c -q && cargo run -q pip install --preview nvidia-nvjitlink-cu12==12.8.93 nvidia-nvtx-cu12==12.8.90 Resolved 2 packages in 3ms Installed 2 packages in 4ms + nvidia-nvjitlink-cu12==12.8.93 + nvidia-nvtx-cu12==12.8.90 ``` Still detected true positive: ``` $ uv venv -c -q && cargo run -q pip install --no-progress opencv-python opencv-contrib-python --no-build --no-deps --preview Resolved 2 packages in 5ms warning: The file `cv2/__init__.pyi` is provided by more than one package, which causes an install race condition and can result in a broken module. Packages containing the file: * opencv-contrib-python (opencv_contrib_python-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl) * opencv-python (opencv_python-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl) Installed 2 packages in 6ms + opencv-contrib-python==4.13.0.90 + opencv-python==4.13.0.90 ``` --- Cargo.lock | 1 + crates/uv-install-wheel/Cargo.toml | 1 + crates/uv-install-wheel/src/linker.rs | 232 +++++++++++++++++++------- crates/uv-installer/src/installer.rs | 5 +- crates/uv/tests/it/pip_install.rs | 183 +++++++++++++++++++- 5 files changed, 364 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc5bd73aa832a..0fd11ba983797 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6443,6 +6443,7 @@ dependencies = [ "data-encoding", "fs-err", "indoc", + "itertools 0.14.0", "mailparse", "owo-colors", "pathdiff", diff --git a/crates/uv-install-wheel/Cargo.toml b/crates/uv-install-wheel/Cargo.toml index 4b58725a412c2..228a59754fd29 100644 --- a/crates/uv-install-wheel/Cargo.toml +++ b/crates/uv-install-wheel/Cargo.toml @@ -35,6 +35,7 @@ configparser = { workspace = true } csv = { workspace = true } data-encoding = { workspace = true } fs-err = { workspace = true } +itertools = { workspace = true } mailparse = { workspace = true } owo-colors = { workspace = true } pathdiff = { workspace = true } diff --git a/crates/uv-install-wheel/src/linker.rs b/crates/uv-install-wheel/src/linker.rs index a24a7e4fc4654..53b45a545a595 100644 --- a/crates/uv-install-wheel/src/linker.rs +++ b/crates/uv-install-wheel/src/linker.rs @@ -1,15 +1,18 @@ -use std::ffi::{OsStr, OsString}; +use std::collections::{BTreeMap, BTreeSet}; +use std::ffi::OsString; +use std::io; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::time::SystemTime; use fs_err as fs; use fs_err::DirEntry; +use itertools::Itertools; use reflink_copy as reflink; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use tempfile::tempdir_in; -use tracing::{debug, instrument, trace}; +use tracing::{debug, instrument, trace, warn}; use walkdir::WalkDir; use uv_distribution_filename::WheelFilename; @@ -19,13 +22,15 @@ use uv_warnings::{warn_user, warn_user_once}; use crate::Error; +/// Avoid and track conflicts between packages. #[expect(clippy::struct_field_names)] #[derive(Debug, Default)] pub struct Locks { /// The parent directory of a file in a synchronized copy copy_dir_locks: Mutex>>>, - /// Top level modules (excluding namespaces) we write to. - modules: Mutex>, + /// Top level modules and wheels they are from, as well as their directory in the unpacked + /// wheel. + modules: Mutex>>, /// Preview settings for feature flags. preview: Preview, } @@ -40,39 +45,169 @@ impl Locks { } } - /// Warn when a module exists in multiple packages. - fn warn_module_conflict(&self, module: &OsStr, wheel_a: &WheelFilename) { - if let Some(wheel_b) = self - .modules + /// Register which package contains which top level module, to later warn when different files + /// at the same path exist in multiple packages. + fn register_module_origin(&self, relative: &Path, absolute: &Path, wheel: &WheelFilename) { + debug_assert!(!relative.is_absolute()); + debug_assert!(absolute.is_absolute()); + + // Only register top level modules + if relative.components().count() != 1 { + return; + } + + self.modules .lock() .unwrap() - .insert(module.to_os_string(), wheel_a.clone()) + .entry(relative.as_os_str().to_os_string()) + .or_default() + .insert((wheel.clone(), absolute.to_path_buf())); + } + + /// Warn when a module exists in multiple packages. + /// + /// The intent is to detect different variants of the same package installed over each other, + /// or different packages using the same top-level module name, which cause non-deterministic + /// failures only surfacing at runtime. See for a + /// list of cases. + /// + /// The check has some false negatives. It is rather too lenient than too strict, and includes + /// support for namespace packages that include the same `__init__.py` file, e.g., gpu-a and + /// gpu-b both including the same `gpu/__init__.py`. + /// + /// We assume that all wheels of a package have the same module(s), so a conflict between + /// installing two unpacked wheels is a conflict between two packages. + /// + /// # Performance + /// + /// When there are no namespace packages, this method is a Mutex lock and a hash map iteration. + /// + /// When there are namespace packages, we only traverse into directories shared by at least two + /// packages. For example, for namespace packages gpu-a, gpu-b, and gpu-c with + /// `gpu/a/__init__.py`, `gpu/b/__init__.py`, and `gpu/c/__init__.py` respectively, we only + /// need to read the `gpu` directory. If there is a deeper shared directory, we only recurse + /// down to this directory. As packages without conflicts generally do not share many + /// directories, we do not recurse far. + /// + /// For each directory, we analyze all packages sharing the directory at the same time, reading + /// the directory in each unpacked wheel only once. Effectively, we perform a parallel directory + /// walk with early exit. + /// + /// We avoid reading the actual file contents and assume they are the same when their file + /// length matches. This also excludes the same empty `__init__.py` files being reported as + /// conflicting. + pub fn warn_package_conflicts(self) -> Result<(), io::Error> { + // This warning is currently in preview. + if !self + .preview + .is_enabled(PreviewFeatures::DETECT_MODULE_CONFLICTS) { - // Only warn if the preview feature is enabled - if !self - .preview - .is_enabled(PreviewFeatures::DETECT_MODULE_CONFLICTS) - { - return; + return Ok(()); + } + + for (top_level_module, wheels) in &*self.modules.lock().unwrap() { + // Fast path: Only one package is using this module name, no conflicts. + if wheels.len() == 1 { + continue; } - // Sort for consistent output, at least with two packages - let (wheel_a, wheel_b) = if wheel_b.name > wheel_a.name { - (&wheel_b, wheel_a) - } else { - (wheel_a, &wheel_b) + // Don't early return if the method returns true, so we show warnings for each top-level + // module + Self::warn_directory_conflict(Path::new(top_level_module), wheels)?; + } + + Ok(()) + } + + /// Analyze a directory for conflicts. + /// + /// If there are any non-identical files (checked by size) included in more than one wheels, + /// report this file and return. + /// + /// If there are any directories included in more than one wheel, recurse to analyze whether + /// the directories contain conflicting files. + /// + /// Returns `true` if a warning was emitted. + fn warn_directory_conflict( + directory: &Path, + wheels: &BTreeSet<(WheelFilename, PathBuf)>, + ) -> Result { + // The files in the directory, as paths relative to the site-packages, with their origin and + // size. + let mut files: BTreeMap> = BTreeMap::default(); + // The directories in the directory, as paths relative to the site-packages, with their + // origin and absolute path. + let mut subdirectories: BTreeMap> = + BTreeMap::default(); + + // Read the shared directory in each unpacked wheel. + for (wheel, absolute) in wheels { + let mut entries: Vec = + fs_err::read_dir(absolute)?.collect::, io::Error>>()?; + // Ensure we always warn for the same first file. + entries.sort_by_key(DirEntry::path); + for dir_entry in entries { + let relative = directory.join(dir_entry.file_name()); + let file_type = dir_entry.file_type()?; + if file_type.is_file() { + files + .entry(relative) + .or_default() + .insert((wheel.clone(), dir_entry.metadata()?.len())); + } else if file_type.is_dir() { + subdirectories + .entry(relative) + .or_default() + .insert((wheel.clone(), dir_entry.path().clone())); + } else { + // We don't expect any other file type, but it's ok if this check has false + // negatives. + } + } + } + + for (file, file_wheels) in files { + let Some((_, file_len)) = file_wheels.first() else { + debug_assert!(false, "Always at least one element"); + continue; }; - warn_user!( - "The module `{}` is provided by more than one package, \ - which causes an install race condition and can result in a broken module. \ - Consider removing your dependency on either `{}` ({}) or `{}` ({}).", - module.simplified_display().green(), - wheel_a.name.cyan(), - format!("v{}", wheel_a.version).cyan(), - wheel_b.name.cyan(), - format!("v{}", wheel_b.version).cyan() - ); + + if file_wheels + .iter() + .any(|(_, file_len_other)| file_len_other != file_len) + { + let packages = file_wheels + .iter() + .map(|(wheel_filename, _file_len)| { + format!("* {} ({})", wheel_filename.name, wheel_filename) + }) + .join("\n"); + warn_user!( + "The file `{}` is provided by more than one package, \ + which causes an install race condition and can result in a broken module. \ + Packages containing the file:\n{}", + file.user_display(), + packages + ); + // Assumption: There is generally two packages that have a conflict. The output is + // more helpful with a single message that calls out the packages + // rather than being comprehensive about the conflicting files. + return Ok(true); + } + } + + for (subdirectory, subdirectory_wheels) in subdirectories { + if subdirectory_wheels.len() == 1 { + continue; + } + // If there are directories shared between multiple wheels, recurse to check them + // for shared files. + if Self::warn_directory_conflict(&subdirectory, &subdirectory_wheels)? { + return Ok(true); + } } + + Ok(false) } } @@ -143,16 +278,14 @@ fn clone_wheel_files( for entry in fs::read_dir(wheel)? { let entry = entry?; - if entry.path().join("__init__.py").is_file() { - locks.warn_module_conflict( - entry - .path() - .strip_prefix(wheel) - .expect("wheel path starts with wheel root") - .as_os_str(), - filename, - ); - } + locks.register_module_origin( + entry + .path() + .strip_prefix(wheel) + .expect("wheel path starts with wheel root"), + &entry.path(), + filename, + ); clone_recursive(site_packages.as_ref(), wheel, locks, &entry, &mut attempt)?; count += 1; } @@ -329,8 +462,7 @@ fn copy_wheel_files( let path = entry.path(); let relative = path.strip_prefix(&wheel).expect("walkdir starts with root"); let out_path = site_packages.as_ref().join(relative); - - warn_module_conflict(locks, filename, relative); + locks.register_module_origin(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; @@ -362,7 +494,7 @@ fn hardlink_wheel_files( let relative = path.strip_prefix(&wheel).expect("walkdir starts with root"); let out_path = site_packages.as_ref().join(relative); - warn_module_conflict(locks, filename, relative); + locks.register_module_origin(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; @@ -462,7 +594,7 @@ fn symlink_wheel_files( let relative = path.strip_prefix(&wheel).unwrap(); let out_path = site_packages.as_ref().join(relative); - warn_module_conflict(locks, filename, relative); + locks.register_module_origin(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; @@ -568,18 +700,6 @@ fn synchronized_copy(from: &Path, to: &Path, locks: &Locks) -> std::io::Result<( Ok(()) } -/// Warn when a module exists in multiple packages. -fn warn_module_conflict(locks: &Locks, filename: &WheelFilename, relative: &Path) { - // Check for `__init__.py` to account for namespace packages. - // TODO(konsti): We need to warn for overlapping namespace packages, too. - if relative.components().count() == 2 - && relative.components().next_back().unwrap().as_os_str() == "__init__.py" - { - // Modules must be UTF-8, but we can skip the conversion using OsStr. - locks.warn_module_conflict(relative.components().next().unwrap().as_os_str(), filename); - } -} - #[cfg(unix)] fn create_symlink, Q: AsRef>(original: P, link: Q) -> std::io::Result<()> { fs_err::os::unix::fs::symlink(original, link) diff --git a/crates/uv-installer/src/installer.rs b/crates/uv-installer/src/installer.rs index 48c4275a1c9a0..50c516e69ec7a 100644 --- a/crates/uv-installer/src/installer.rs +++ b/crates/uv-installer/src/installer.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, LazyLock}; use anyhow::{Context, Error, Result}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use tokio::sync::oneshot; -use tracing::instrument; +use tracing::{instrument, warn}; use uv_cache::Cache; use uv_configuration::RAYON_INITIALIZE; @@ -198,6 +198,9 @@ fn install( Ok::<(), Error>(()) })?; + if let Err(err) = locks.warn_package_conflicts() { + warn!("Checking for conflicts between packages failed: {err}"); + } Ok(wheels) } diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 17b39ae7d50f6..959b9e4b23770 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -13160,7 +13160,9 @@ fn overlapping_packages_warning() -> Result<()> { ----- stderr ----- Resolved 2 packages in [TIME] Prepared 2 packages in [TIME] - warning: The module `built_by_uv` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `built-by-uv` (v0.1.0) or `also-built-by-uv` (v0.1.0). + warning: The file `built_by_uv/__init__.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing conflicting packages. Packages containing the file: + * also-built-by-uv (also_built_by_uv-0.1.0-py3-none-any.whl) + * built-by-uv (built_by_uv-0.1.0-py3-none-any.whl) Installed 2 packages in [TIME] + also-built-by-uv==0.1.0 (from file://[TEMP_DIR]/also-built-by-uv) + built-by-uv==0.1.0 (from file://[WORKSPACE]/test/packages/built-by-uv) @@ -13243,6 +13245,185 @@ fn overlapping_packages_warning() -> Result<()> { Ok(()) } +/// Don't warn for improperly built namespace packages with overlapping empty `__init__.py`. +#[test] +fn overlapping_empty_init_py() -> Result<()> { + let context = TestContext::new("3.12"); + + let gpu_a = context.temp_dir.child("gpu-a"); + gpu_a.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-a" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "gpu" + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_a + .child("src") + .child("gpu") + .child("__init__.py") + .touch()?; + + gpu_a + .child("src") + .child("gpu") + .child("a") + .child("__init__.py") + .write_str("print('a')")?; + + let gpu_b = context.temp_dir.child("gpu-b"); + gpu_b.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-b" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "gpu" + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_b + .child("src") + .child("gpu") + .child("__init__.py") + .touch()?; + + gpu_b + .child("src") + .child("gpu") + .child("b") + .child("__init__.py") + .write_str("print('b')")?; + + // Check that overlapping packages don't show a warning by default + uv_snapshot!(context.filters(), context.pip_install() + .arg("--preview-features") + .arg("detect-module-conflicts") + .arg("./gpu-a") + .arg("./gpu-b"), @" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Prepared 2 packages in [TIME] + Installed 2 packages in [TIME] + + gpu-a==0.1.0 (from file://[TEMP_DIR]/gpu-a) + + gpu-b==0.1.0 (from file://[TEMP_DIR]/gpu-b) + " + ); + + Ok(()) +} + +/// Warn for conflicting files even nested in namespace packages. +#[test] +fn overlapping_nested_files() -> Result<()> { + let context = TestContext::new("3.12"); + + let gpu_a = context.temp_dir.child("gpu-a"); + gpu_a.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-a" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "gpu" + namespace = true + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_a + .child("src") + .child("gpu") + .child("accelerator") + .child("matrix") + .child("product.py") + .write_str("print('Hi!')")?; + // This file collides to, but we always show the first file. + gpu_a + .child("src") + .child("gpu") + .child("bccelerator") + .child("sum.py") + .write_str("print('Hi!')")?; + + let gpu_b = context.temp_dir.child("gpu-b"); + gpu_b.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-b" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "gpu" + namespace = true + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_b + .child("src") + .child("gpu") + .child("accelerator") + .child("matrix") + .child("product.py") + .write_str("print('Hello world')")?; + // This file collides to, but we always show the first file. + gpu_b + .child("src") + .child("gpu") + .child("bccelerator") + .child("sum.py") + .write_str("print('Hi!')")?; + + // Check that overlapping packages don't show a warning by default + uv_snapshot!(context.filters(), context.pip_install() + .arg("--preview-features") + .arg("detect-module-conflicts") + .arg("./gpu-a") + .arg("./gpu-b"), @" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Prepared 2 packages in [TIME] + warning: The file `gpu/accelerator/matrix/product.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing conflicting packages. Packages containing the file: + * gpu-a (gpu_a-0.1.0-py3-none-any.whl) + * gpu-b (gpu_b-0.1.0-py3-none-any.whl) + Installed 2 packages in [TIME] + + gpu-a==0.1.0 (from file://[TEMP_DIR]/gpu-a) + + gpu-b==0.1.0 (from file://[TEMP_DIR]/gpu-b) + " + ); + + Ok(()) +} + /// See: #[test] fn transitive_dependency_config_settings_invalidation() -> Result<()> { From 302a327327c53c953e522040aa8acdb40b981171 Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 20 Jan 2026 18:09:44 +0100 Subject: [PATCH 2/5] Update snapshots --- crates/uv/tests/it/pip_install.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 959b9e4b23770..fc552342c996a 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -13160,7 +13160,7 @@ fn overlapping_packages_warning() -> Result<()> { ----- stderr ----- Resolved 2 packages in [TIME] Prepared 2 packages in [TIME] - warning: The file `built_by_uv/__init__.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing conflicting packages. Packages containing the file: + warning: The file `built_by_uv/__init__.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Packages containing the file: * also-built-by-uv (also_built_by_uv-0.1.0-py3-none-any.whl) * built-by-uv (built_by_uv-0.1.0-py3-none-any.whl) Installed 2 packages in [TIME] @@ -13412,7 +13412,7 @@ fn overlapping_nested_files() -> Result<()> { ----- stderr ----- Resolved 2 packages in [TIME] Prepared 2 packages in [TIME] - warning: The file `gpu/accelerator/matrix/product.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing conflicting packages. Packages containing the file: + warning: The file `gpu/accelerator/matrix/product.py` is provided by more than one package, which causes an install race condition and can result in a broken module. Packages containing the file: * gpu-a (gpu_a-0.1.0-py3-none-any.whl) * gpu-b (gpu_b-0.1.0-py3-none-any.whl) Installed 2 packages in [TIME] From 61fb58175519f35a90f6c9767faec7bb295e09cb Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 20 Jan 2026 18:25:00 +0100 Subject: [PATCH 3/5] Editing --- crates/uv-install-wheel/src/linker.rs | 2 +- crates/uv/tests/it/pip_install.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/uv-install-wheel/src/linker.rs b/crates/uv-install-wheel/src/linker.rs index 53b45a545a595..6b5d8e211a4f8 100644 --- a/crates/uv-install-wheel/src/linker.rs +++ b/crates/uv-install-wheel/src/linker.rs @@ -121,7 +121,7 @@ impl Locks { /// Analyze a directory for conflicts. /// - /// If there are any non-identical files (checked by size) included in more than one wheels, + /// If there are any non-identical files (checked by size) included in more than one wheel, /// report this file and return. /// /// If there are any directories included in more than one wheel, recurse to analyze whether diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index fc552342c996a..b4c771dbb4bb3 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -13359,7 +13359,7 @@ fn overlapping_nested_files() -> Result<()> { .child("matrix") .child("product.py") .write_str("print('Hi!')")?; - // This file collides to, but we always show the first file. + // This file collides too, but we always show the first file. gpu_a .child("src") .child("gpu") @@ -13391,7 +13391,7 @@ fn overlapping_nested_files() -> Result<()> { .child("matrix") .child("product.py") .write_str("print('Hello world')")?; - // This file collides to, but we always show the first file. + // This file collides too, but we always show the first file. gpu_b .child("src") .child("gpu") @@ -13399,7 +13399,6 @@ fn overlapping_nested_files() -> Result<()> { .child("sum.py") .write_str("print('Hi!')")?; - // Check that overlapping packages don't show a warning by default uv_snapshot!(context.filters(), context.pip_install() .arg("--preview-features") .arg("detect-module-conflicts") From 6497c82d74e4fe176340d11e16b52d90120fb21b Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 21 Jan 2026 11:02:29 +0100 Subject: [PATCH 4/5] Review and check top level files --- crates/uv-install-wheel/src/linker.rs | 142 ++++++++++++++++---------- crates/uv/tests/it/pip_install.rs | 76 ++++++++++++++ 2 files changed, 164 insertions(+), 54 deletions(-) diff --git a/crates/uv-install-wheel/src/linker.rs b/crates/uv-install-wheel/src/linker.rs index 6b5d8e211a4f8..454d56cceedd5 100644 --- a/crates/uv-install-wheel/src/linker.rs +++ b/crates/uv-install-wheel/src/linker.rs @@ -1,5 +1,4 @@ use std::collections::{BTreeMap, BTreeSet}; -use std::ffi::OsString; use std::io; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; @@ -28,9 +27,9 @@ use crate::Error; pub struct Locks { /// The parent directory of a file in a synchronized copy copy_dir_locks: Mutex>>>, - /// Top level modules and wheels they are from, as well as their directory in the unpacked - /// wheel. - modules: Mutex>>, + /// Top level files and directories in site-packages, stored as relative path, and wheels they + /// are from, with the absolute paths in the unpacked wheel. + site_packages_paths: Mutex>>, /// Preview settings for feature flags. preview: Preview, } @@ -40,31 +39,36 @@ impl Locks { pub fn new(preview: Preview) -> Self { Self { copy_dir_locks: Mutex::new(FxHashMap::default()), - modules: Mutex::new(FxHashMap::default()), + site_packages_paths: Mutex::new(FxHashMap::default()), preview, } } - /// Register which package contains which top level module, to later warn when different files - /// at the same path exist in multiple packages. - fn register_module_origin(&self, relative: &Path, absolute: &Path, wheel: &WheelFilename) { + /// Register which package installs which (top level) path. + /// + /// This is later used warn when different files at the same path exist in multiple packages. + /// + /// The first non-self argument is the target path relative to site-packages, the second is the + /// source path in the unpacked wheel. + fn register_installed_path(&self, relative: &Path, absolute: &Path, wheel: &WheelFilename) { debug_assert!(!relative.is_absolute()); debug_assert!(absolute.is_absolute()); - // Only register top level modules + // Only register top level entries, these are the only ones we have reliably as cloning + // a directory on macOS traverses outside our code. if relative.components().count() != 1 { return; } - self.modules + self.site_packages_paths .lock() .unwrap() - .entry(relative.as_os_str().to_os_string()) + .entry(relative.to_path_buf()) .or_default() .insert((wheel.clone(), absolute.to_path_buf())); } - /// Warn when a module exists in multiple packages. + /// Warn when the same file with different contents exists in multiple packages. /// /// The intent is to detect different variants of the same package installed over each other, /// or different packages using the same top-level module name, which cause non-deterministic @@ -105,15 +109,35 @@ impl Locks { return Ok(()); } - for (top_level_module, wheels) in &*self.modules.lock().unwrap() { + for (relative, wheels) in &*self.site_packages_paths.lock().unwrap() { // Fast path: Only one package is using this module name, no conflicts. - if wheels.len() == 1 { + let mut wheel_iter = wheels.iter(); + let Some(first_wheel) = wheel_iter.next() else { + debug_assert!(false, "at least one wheel"); + continue; + }; + if wheel_iter.next().is_none() { continue; } - // Don't early return if the method returns true, so we show warnings for each top-level - // module - Self::warn_directory_conflict(Path::new(top_level_module), wheels)?; + // TODO(konsti): This assumes a path is either a file or a directory in all wheels. + let file_type = fs_err::metadata(&first_wheel.1)?.file_type(); + if file_type.is_file() { + // Handle conflicts between files directly in site-packages without a module + // directory enclosing them. + let files: BTreeSet<(&WheelFilename, u64)> = wheels + .iter() + .map(|(wheel, absolute)| Ok((wheel, absolute.metadata()?.len()))) + .collect::>()?; + Self::warn_file_conflict(&relative, &files); + } else if file_type.is_dir() { + // Don't early return if the method returns true, so we show warnings for each + // top-level module. + Self::warn_directory_conflict(relative, wheels)?; + } else { + // We don't expect any other file type, but it's ok if this check has false + // negatives. + } } Ok(()) @@ -134,7 +158,7 @@ impl Locks { ) -> Result { // The files in the directory, as paths relative to the site-packages, with their origin and // size. - let mut files: BTreeMap> = BTreeMap::default(); + let mut files: BTreeMap> = BTreeMap::default(); // The directories in the directory, as paths relative to the site-packages, with their // origin and absolute path. let mut subdirectories: BTreeMap> = @@ -142,23 +166,20 @@ impl Locks { // Read the shared directory in each unpacked wheel. for (wheel, absolute) in wheels { - let mut entries: Vec = - fs_err::read_dir(absolute)?.collect::, io::Error>>()?; - // Ensure we always warn for the same first file. - entries.sort_by_key(DirEntry::path); - for dir_entry in entries { + for dir_entry in fs_err::read_dir(absolute)? { + let dir_entry = dir_entry?; let relative = directory.join(dir_entry.file_name()); let file_type = dir_entry.file_type()?; if file_type.is_file() { files .entry(relative) .or_default() - .insert((wheel.clone(), dir_entry.metadata()?.len())); + .insert((wheel, dir_entry.metadata()?.len())); } else if file_type.is_dir() { subdirectories .entry(relative) .or_default() - .insert((wheel.clone(), dir_entry.path().clone())); + .insert((wheel.clone(), dir_entry.path())); } else { // We don't expect any other file type, but it's ok if this check has false // negatives. @@ -167,31 +188,7 @@ impl Locks { } for (file, file_wheels) in files { - let Some((_, file_len)) = file_wheels.first() else { - debug_assert!(false, "Always at least one element"); - continue; - }; - - if file_wheels - .iter() - .any(|(_, file_len_other)| file_len_other != file_len) - { - let packages = file_wheels - .iter() - .map(|(wheel_filename, _file_len)| { - format!("* {} ({})", wheel_filename.name, wheel_filename) - }) - .join("\n"); - warn_user!( - "The file `{}` is provided by more than one package, \ - which causes an install race condition and can result in a broken module. \ - Packages containing the file:\n{}", - file.user_display(), - packages - ); - // Assumption: There is generally two packages that have a conflict. The output is - // more helpful with a single message that calls out the packages - // rather than being comprehensive about the conflicting files. + if Self::warn_file_conflict(&file, &file_wheels) { return Ok(true); } } @@ -209,6 +206,43 @@ impl Locks { Ok(false) } + + /// Check if all files are the same size, if so assume they are identical. + /// + /// It's unlikely that two modules overlap with different contents but their files all have + /// the same length, so we use this heuristic in this performance critical path to avoid + /// reading potentially large files. + fn warn_file_conflict(file: &Path, file_wheels: &BTreeSet<(&WheelFilename, u64)>) -> bool { + let Some((_, file_len)) = file_wheels.first() else { + debug_assert!(false, "Always at least one element"); + return false; + }; + if !file_wheels + .iter() + .any(|(_, file_len_other)| file_len_other != file_len) + { + return false; + } + + let packages = file_wheels + .iter() + .map(|(wheel_filename, _file_len)| { + format!("* {} ({})", wheel_filename.name, wheel_filename) + }) + .join("\n"); + warn_user!( + "The file `{}` is provided by more than one package, \ + which causes an install race condition and can result in a broken module. \ + Packages containing the file:\n{}", + file.user_display(), + packages + ); + + // Assumption: There is generally two packages that have a conflict. The output is + // more helpful with a single message that calls out the packages + // rather than being comprehensive about the conflicting files. + true + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -278,7 +312,7 @@ fn clone_wheel_files( for entry in fs::read_dir(wheel)? { let entry = entry?; - locks.register_module_origin( + locks.register_installed_path( entry .path() .strip_prefix(wheel) @@ -462,7 +496,7 @@ fn copy_wheel_files( let path = entry.path(); let relative = path.strip_prefix(&wheel).expect("walkdir starts with root"); let out_path = site_packages.as_ref().join(relative); - locks.register_module_origin(relative, path, filename); + locks.register_installed_path(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; @@ -494,7 +528,7 @@ fn hardlink_wheel_files( let relative = path.strip_prefix(&wheel).expect("walkdir starts with root"); let out_path = site_packages.as_ref().join(relative); - locks.register_module_origin(relative, path, filename); + locks.register_installed_path(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; @@ -594,7 +628,7 @@ fn symlink_wheel_files( let relative = path.strip_prefix(&wheel).unwrap(); let out_path = site_packages.as_ref().join(relative); - locks.register_module_origin(relative, path, filename); + locks.register_installed_path(relative, path, filename); if entry.file_type().is_dir() { fs::create_dir_all(&out_path)?; diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index b4c771dbb4bb3..d0615d55e1090 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -13423,6 +13423,82 @@ fn overlapping_nested_files() -> Result<()> { Ok(()) } +/// Warn for conflicting files directly in site-packages without a folder containing them. +/// +/// There are some packages which are just a Python file or just a shared library, not contained +/// in a module directory. +#[test] +fn overlapping_file_without_enclosing_directory() -> Result<()> { + let context = TestContext::new("3.12"); + + let gpu_a = context.temp_dir.child("gpu-a"); + gpu_a.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-a" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "" + namespace = true + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_a + .child("src") + .child("gpu.abi3.so") + .write_str("ELF file contents")?; + + let gpu_b = context.temp_dir.child("gpu-b"); + gpu_b.child("pyproject.toml").write_str( + r#" + [project] + name = "gpu-b" + version = "0.1.0" + requires-python = ">=3.12" + + [tool.uv.build-backend] + module-name = "" + namespace = true + + [build-system] + requires = ["uv_build>=0.7,<10000"] + build-backend = "uv_build" + "#, + )?; + gpu_b + .child("src") + .child("gpu.abi3.so") + .write_str("DT_NEEDED libblas.so")?; + + uv_snapshot!(context.filters(), context.pip_install() + .arg("--preview-features") + .arg("detect-module-conflicts") + .arg("./gpu-a") + .arg("./gpu-b"), @" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 2 packages in [TIME] + Prepared 2 packages in [TIME] + warning: The file `gpu.abi3.so` is provided by more than one package, which causes an install race condition and can result in a broken module. Packages containing the file: + * gpu-a (gpu_a-0.1.0-py3-none-any.whl) + * gpu-b (gpu_b-0.1.0-py3-none-any.whl) + Installed 2 packages in [TIME] + + gpu-a==0.1.0 (from file://[TEMP_DIR]/gpu-a) + + gpu-b==0.1.0 (from file://[TEMP_DIR]/gpu-b) + " + ); + + Ok(()) +} + /// See: #[test] fn transitive_dependency_config_settings_invalidation() -> Result<()> { From 8423175d3bd60b0b098fc099fc08ab9b97f19e2c Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 21 Jan 2026 12:31:07 +0100 Subject: [PATCH 5/5] clippy --- crates/uv-install-wheel/src/linker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/uv-install-wheel/src/linker.rs b/crates/uv-install-wheel/src/linker.rs index 454d56cceedd5..4cd667c868902 100644 --- a/crates/uv-install-wheel/src/linker.rs +++ b/crates/uv-install-wheel/src/linker.rs @@ -129,7 +129,7 @@ impl Locks { .iter() .map(|(wheel, absolute)| Ok((wheel, absolute.metadata()?.len()))) .collect::>()?; - Self::warn_file_conflict(&relative, &files); + Self::warn_file_conflict(relative, &files); } else if file_type.is_dir() { // Don't early return if the method returns true, so we show warnings for each // top-level module.