Skip to content

Commit

Permalink
Allow non-nested archives for hexdump and others (#1564)
Browse files Browse the repository at this point in the history
## Summary#1562 

It turns out that `hexdump` uses an invalid source distribution format
whereby the contents aren't nested in a top-level directory -- instead,
they're all just flattened at the top-level. In looking at pip's source
(https://github.com/pypa/pip/blob/51de88ca6459fdd5213f86a54b021a80884572f9/src/pip/_internal/utils/unpacking.py#L62),
it only strips the top-level directory if all entries have the same
directory prefix (i.e., if it's the only thing in the directory). This
PR accommodates these "invalid" distributions.

I can't find any history on this method in `pip`. It looks like it dates
back over 15 years ago, to before `pip` was even called `pip`.

Closes #1376.
  • Loading branch information
charliermarsh authored Feb 17, 2024
1 parent 4a09889 commit 340cb67
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 13 deletions.
11 changes: 7 additions & 4 deletions crates/uv-build/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ impl Pep517Backend {
import sys
sys.path = [{backend_path}] + sys.path
{import}
{import}
"#, backend_path = backend_path_encoded}
}
}
Expand Down Expand Up @@ -305,8 +305,11 @@ impl SourceBuild {
.map_err(|err| Error::Extraction(extracted.clone(), err))?;

// Extract the top-level directory from the archive.
uv_extract::strip_component(&extracted)
.map_err(|err| Error::Extraction(extracted.clone(), err))?
match uv_extract::strip_component(&extracted) {
Ok(top_level) => top_level,
Err(uv_extract::Error::NonSingularArchive(_)) => extracted,
Err(err) => return Err(Error::Extraction(extracted.clone(), err)),
}
};
let source_tree = if let Some(subdir) = subdirectory {
source_root.join(subdir)
Expand Down Expand Up @@ -614,7 +617,7 @@ impl SourceBuild {
let script = formatdoc! {
r#"{}
print(backend.build_{}("{}", metadata_directory={}))
"#, pep517_backend.backend_import(), self.build_kind, escaped_wheel_dir, metadata_directory
"#, pep517_backend.backend_import(), self.build_kind, escaped_wheel_dir, metadata_directory
};
let span = info_span!(
"run_python_script",
Expand Down
6 changes: 5 additions & 1 deletion crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,11 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> {
drop(span);

// Extract the top-level directory.
let extracted = uv_extract::strip_component(temp_dir.path())?;
let extracted = match uv_extract::strip_component(temp_dir.path()) {
Ok(top_level) => top_level,
Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.into_path(),
Err(err) => return Err(err.into()),
};

// Persist it to the cache.
fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent"))
Expand Down
6 changes: 4 additions & 2 deletions crates/uv-extract/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ pub enum Error {
#[error("Unsupported archive type: {0}")]
UnsupportedArchive(PathBuf),
#[error(
"The top level of the archive must only contain a list directory, but it contains: {0:?}"
"The top-level of the archive must only contain a list directory, but it contains: {0:?}"
)]
InvalidArchive(Vec<OsString>),
NonSingularArchive(Vec<OsString>),
#[error("The top-level of the archive must only contain a list directory, but it's empty")]
EmptyArchive,
}
16 changes: 10 additions & 6 deletions crates/uv-extract/src/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,14 @@ pub fn strip_component(source: impl AsRef<Path>) -> Result<PathBuf, Error> {
// TODO(konstin): Verify the name of the directory.
let top_level =
fs_err::read_dir(source.as_ref())?.collect::<std::io::Result<Vec<fs_err::DirEntry>>>()?;
let [root] = top_level.as_slice() else {
return Err(Error::InvalidArchive(
top_level.into_iter().map(|e| e.file_name()).collect(),
));
};
Ok(root.path())
match top_level.as_slice() {
[root] => Ok(root.path()),
[] => Err(Error::EmptyArchive),
_ => Err(Error::NonSingularArchive(
top_level
.into_iter()
.map(|entry| entry.file_name())
.collect(),
)),
}
}

0 comments on commit 340cb67

Please sign in to comment.