Skip to content

Commit

Permalink
Build basic source distributions (#8886)
Browse files Browse the repository at this point in the history
Very basic source distribution support. What's included:

- Include and exclude patterns (hard-coded): Currently, we have
globset+walkdir in one part and glob in the other. I'll migrate
everything to globset+walkset and some custom perf optimizations to
avoid traversing irrelevant directories on top. I'll also pick a glob
syntax (or subset), PEP 639 seems like a good candidate since it's
consistent with what we already have to support.
- Add the `PKG-INFO` file with metadata: Thanks to Code Metadata 2.2,
this metadata is reliable and can be read statically by external tools.

Example output:

```
$ tar -ztvf dist/dummy-0.1.0.tar.gz
-rw-r--r-- 0/0             154 1970-01-01 01:00 dummy-0.1.0/PKG-INFO
-rw-rw-r-- 0/0             509 1970-01-01 01:00 dummy-0.1.0/pyproject.toml
drwxrwxr-x 0/0               0 1970-01-01 01:00 dummy-0.1.0/src/dummy
drwxrwxr-x 0/0               0 1970-01-01 01:00 dummy-0.1.0/src/dummy/submodule
-rw-rw-r-- 0/0              30 1970-01-01 01:00 dummy-0.1.0/src/dummy/submodule/impl.py
-rw-rw-r-- 0/0              14 1970-01-01 01:00 dummy-0.1.0/src/dummy/submodule/__init__.py
-rw-rw-r-- 0/0              12 1970-01-01 01:00 dummy-0.1.0/src/dummy/__init__.py
```

No tests since the source distributions don't build valid wheels yet.
  • Loading branch information
konstin authored Nov 7, 2024
1 parent 5eba64a commit 107ab3d
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 14 deletions.
14 changes: 14 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ fs-err = { version = "2.11.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.30" }
glob = { version = "0.3.1" }
globset = { version = "0.4.15" }
globwalk = { version = "0.9.1" }
goblin = { version = "0.9.0", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] }
hex = { version = "0.4.3" }
Expand All @@ -126,7 +127,7 @@ path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" }
petgraph = { version = "0.6.5" }
platform-info = { version = "2.0.3" }
procfs = { version = "0.17.0" , default-features = false, features = ["flate2"] }
procfs = { version = "0.17.0", default-features = false, features = ["flate2"] }
proc-macro2 = { version = "1.0.86" }
pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" }
version-ranges = { git = "https://github.com/astral-sh/pubgrub", rev = "95e1390399cdddee986b658be19587eb1fdb2d79" }
Expand All @@ -153,6 +154,7 @@ smallvec = { version = "1.13.2" }
spdx = { version = "0.10.6" }
syn = { version = "2.0.77" }
sys-info = { version = "0.9.1" }
tar = { version = "0.4.43" }
target-lexicon = { version = "0.12.16" }
tempfile = { version = "3.12.0" }
textwrap = { version = "0.16.1" }
Expand Down
5 changes: 4 additions & 1 deletion crates/uv-build-backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@ uv-pep508 = { workspace = true }
uv-pypi-types = { workspace = true }
uv-warnings = { workspace = true }

csv = { workspace = true}
csv = { workspace = true }
flate2 = { workspace = true }
fs-err = { workspace = true }
glob = { workspace = true }
globset = { workspace = true }
itertools = { workspace = true }
serde = { workspace = true }
sha2 = { workspace = true }
spdx = { workspace = true }
tar = { workspace = true }
thiserror = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
Expand Down
146 changes: 139 additions & 7 deletions crates/uv-build-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ mod pep639_glob;

use crate::metadata::{PyProjectToml, ValidationError};
use crate::pep639_glob::Pep639GlobError;
use flate2::write::GzEncoder;
use flate2::Compression;
use fs_err::File;
use glob::{GlobError, PatternError};
use globset::{Glob, GlobSetBuilder};
use itertools::Itertools;
use sha2::{Digest, Sha256};
use std::fs::FileType;
use std::io::{BufReader, Read, Write};
use std::io::{BufReader, Cursor, Read, Write};
use std::path::{Path, PathBuf, StripPrefixError};
use std::{io, mem};
use tar::{EntryType, Header};
use thiserror::Error;
use tracing::{debug, trace};
use uv_distribution_filename::WheelFilename;
use uv_distribution_filename::{SourceDistExtension, SourceDistFilename, WheelFilename};
use uv_fs::Simplified;
use walkdir::WalkDir;
use zip::{CompressionMethod, ZipWriter};
Expand All @@ -33,6 +37,9 @@ pub enum Error {
/// [`GlobError`] is a wrapped io error.
#[error(transparent)]
Glob(#[from] GlobError),
/// [`globset::Error`] shows the glob that failed to parse.
#[error(transparent)]
GlobSet(#[from] globset::Error),
#[error("Failed to walk source tree: `{}`", root.user_display())]
WalkDir {
root: PathBuf,
Expand All @@ -43,8 +50,8 @@ pub enum Error {
NotUtf8Path(PathBuf),
#[error("Failed to walk source tree")]
StripPrefix(#[from] StripPrefixError),
#[error("Unsupported file type: {0:?}")]
UnsupportedFileType(FileType),
#[error("Unsupported file type {1:?}: `{}`", _0.user_display())]
UnsupportedFileType(PathBuf, FileType),
#[error("Failed to write wheel zip archive")]
Zip(#[from] zip::result::ZipError),
#[error("Failed to write RECORD file")]
Expand All @@ -53,6 +60,8 @@ pub enum Error {
MissingModule(PathBuf),
#[error("Inconsistent metadata between prepare and build step: `{0}`")]
InconsistentSteps(&'static str),
#[error("Failed to write to {}", _0.user_display())]
TarWrite(PathBuf, #[source] io::Error),
}

/// Allow dispatching between writing to a directory, writing to zip and writing to a `.tar.gz`.
Expand Down Expand Up @@ -276,7 +285,7 @@ fn write_hashed(
}

/// Build a wheel from the source tree and place it in the output directory.
pub fn build(
pub fn build_wheel(
source_tree: &Path,
wheel_dir: &Path,
metadata_directory: Option<&Path>,
Expand Down Expand Up @@ -323,7 +332,10 @@ pub fn build(
wheel_writer.write_file(relative_path_str, entry.path())?;
} else {
// TODO(konsti): We may want to support symlinks, there is support for installing them.
return Err(Error::UnsupportedFileType(entry.file_type()));
return Err(Error::UnsupportedFileType(
entry.path().to_path_buf(),
entry.file_type(),
));
}

entry.path();
Expand All @@ -342,6 +354,126 @@ pub fn build(
Ok(filename)
}

/// Build a source distribution from the source tree and place it in the output directory.
pub fn build_source_dist(
source_tree: &Path,
source_dist_directory: &Path,
uv_version: &str,
) -> Result<SourceDistFilename, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system(uv_version);

let filename = SourceDistFilename {
name: pyproject_toml.name().clone(),
version: pyproject_toml.version().clone(),
extension: SourceDistExtension::TarGz,
};

let top_level = format!("{}-{}", pyproject_toml.name(), pyproject_toml.version());

let source_dist_path = source_dist_directory.join(filename.to_string());
let tar_gz = File::create(&source_dist_path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut tar = tar::Builder::new(enc);

let metadata = pyproject_toml
.to_metadata(source_tree)?
.core_metadata_format();

let mut header = Header::new_gnu();
header.set_size(metadata.bytes().len() as u64);
header.set_mode(0o644);
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join("PKG-INFO"),
Cursor::new(metadata),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;

let includes = ["src/**/*", "pyproject.toml"];
let mut include_builder = GlobSetBuilder::new();
for include in includes {
include_builder.add(Glob::new(include)?);
}
let include_matcher = include_builder.build()?;

let excludes = ["__pycache__", "*.pyc", "*.pyo"];
let mut exclude_builder = GlobSetBuilder::new();
for exclude in excludes {
exclude_builder.add(Glob::new(exclude)?);
}
let exclude_matcher = exclude_builder.build()?;

// TODO(konsti): Add files linked by pyproject.toml

for file in WalkDir::new(source_tree).into_iter().filter_entry(|dir| {
let relative = dir
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
// TODO(konsti): Also check that we're matching at least a prefix of an include matcher.
!exclude_matcher.is_match(relative)
}) {
let entry = file.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
if !include_matcher.is_match(relative) {
trace!("Excluding {}", relative.user_display());
continue;
}
debug!("Including {}", relative.user_display());

let metadata = fs_err::metadata(entry.path())?;
let mut header = Header::new_gnu();
#[cfg(unix)]
{
header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata));
}
#[cfg(not(unix))]
{
header.set_mode(0o644);
}

if entry.file_type().is_dir() {
header.set_entry_type(EntryType::Directory);
header
.set_path(Path::new(&top_level).join(relative))
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
header.set_size(0);
header.set_cksum();
tar.append(&header, io::empty())
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
continue;
} else if entry.file_type().is_file() {
header.set_size(metadata.len());
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join(relative),
BufReader::new(File::open(entry.path())?),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
} else {
return Err(Error::UnsupportedFileType(
relative.to_path_buf(),
entry.file_type(),
));
}
}

tar.finish()
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;

Ok(filename)
}

/// Write the dist-info directory to the output directory without building the wheel.
pub fn metadata(
source_tree: &Path,
Expand All @@ -350,7 +482,7 @@ pub fn metadata(
) -> Result<String, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system("1.0.0+test");
pyproject_toml.check_build_system(uv_version);

let filename = WheelFilename {
name: pyproject_toml.name().clone(),
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-build-backend/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn test_record() {
fn test_determinism() {
let temp1 = TempDir::new().unwrap();
let uv_backend = Path::new("../../scripts/packages/uv_backend");
build(uv_backend, temp1.path(), None, "1.0.0+test").unwrap();
build_wheel(uv_backend, temp1.path(), None, "1.0.0+test").unwrap();

// Touch the file to check that we don't serialize the last modified date.
fs_err::write(
Expand All @@ -56,7 +56,7 @@ fn test_determinism() {
.unwrap();

let temp2 = TempDir::new().unwrap();
build(uv_backend, temp2.path(), None, "1.0.0+test").unwrap();
build_wheel(uv_backend, temp2.path(), None, "1.0.0+test").unwrap();

let wheel_filename = "uv_backend-0.1.0-py3-none-any.whl";
assert_eq!(
Expand Down
12 changes: 9 additions & 3 deletions crates/uv/src/commands/build_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ use anyhow::Result;
use std::env;
use std::path::Path;

pub(crate) fn build_sdist(_sdist_directory: &Path) -> Result<ExitStatus> {
todo!()
pub(crate) fn build_sdist(sdist_directory: &Path) -> Result<ExitStatus> {
let filename = uv_build_backend::build_source_dist(
&env::current_dir()?,
sdist_directory,
uv_version::version(),
)?;
println!("{filename}");
Ok(ExitStatus::Success)
}
pub(crate) fn build_wheel(
wheel_directory: &Path,
metadata_directory: Option<&Path>,
) -> Result<ExitStatus> {
let filename = uv_build_backend::build(
let filename = uv_build_backend::build_wheel(
&env::current_dir()?,
wheel_directory,
metadata_directory,
Expand Down

0 comments on commit 107ab3d

Please sign in to comment.