Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions crates/uv-build-backend/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,12 +395,13 @@ impl PyProjectToml {
let mut license_files = Vec::new();
let mut license_globs_parsed = Vec::new();
for license_glob in license_globs {
let pep639_glob = PortableGlobParser.parse(license_glob).map_err(|err| {
Error::PortableGlob {
field: license_glob.to_string(),
source: err,
}
})?;
let pep639_glob =
PortableGlobParser::Pep639
.parse(license_glob)
.map_err(|err| Error::PortableGlob {
field: license_glob.to_string(),
source: err,
})?;
license_globs_parsed.push(pep639_glob);
}
let license_globs =
Expand Down
6 changes: 3 additions & 3 deletions crates/uv-build-backend/src/source_dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ fn source_dist_matcher(
.to_string();
includes.push(format!("{}/**", globset::escape(import_path)));
for include in includes {
let glob = PortableGlobParser
let glob = PortableGlobParser::Uv
.parse(&include)
.map_err(|err| Error::PortableGlob {
field: "tool.uv.build-backend.source-include".to_string(),
Expand All @@ -113,7 +113,7 @@ fn source_dist_matcher(
// Include the license files
for license_files in pyproject_toml.license_files_source_dist() {
trace!("Including license files at: `{license_files}`");
let glob = PortableGlobParser
let glob = PortableGlobParser::Pep639
.parse(license_files)
.map_err(|err| Error::PortableGlob {
field: "project.license-files".to_string(),
Expand All @@ -124,7 +124,7 @@ fn source_dist_matcher(

// Include the data files
for (name, directory) in settings.data.iter() {
let glob = PortableGlobParser
let glob = PortableGlobParser::Uv
.parse(&format!("{}/**", globset::escape(directory)))
.map_err(|err| Error::PortableGlob {
field: format!("tool.uv.build-backend.data.{name}"),
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-build-backend/src/wheel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ pub(crate) fn build_exclude_matcher(
} else {
format!("**/{exclude}").to_string()
};
let glob = PortableGlobParser
let glob = PortableGlobParser::Uv
.parse(&exclude)
.map_err(|err| Error::PortableGlob {
field: "tool.uv.build-backend.*-exclude".to_string(),
Expand Down Expand Up @@ -469,7 +469,7 @@ fn wheel_subdir_from_globs(
src.user_display(),
license_files
);
PortableGlobParser.parse(license_files)
PortableGlobParser::Pep639.parse(license_files)
})
.collect::<Result<_, _>>()
.map_err(|err| Error::PortableGlob {
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-globfilter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ license.workspace = true

[dependencies]
globset = { workspace = true }
owo-colors = { workspace = true }
regex = { workspace = true }
regex-automata = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
walkdir = { workspace = true }

[dev-dependencies]
anstream = { workspace = true }
fs-err = { workspace = true }
insta = "1.41.1"
tempfile = { workspace = true }
Expand Down
8 changes: 4 additions & 4 deletions crates/uv-globfilter/src/glob_dir_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl GlobDirFilter {
#[cfg(test)]
mod tests {
use crate::glob_dir_filter::GlobDirFilter;
use crate::portable_glob::PortableGlobParser;
use crate::PortableGlobParser;
use std::path::{Path, MAIN_SEPARATOR};
use tempfile::tempdir;
use walkdir::WalkDir;
Expand Down Expand Up @@ -152,7 +152,7 @@ mod tests {

#[test]
fn match_directory() {
let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap());
let matcher = GlobDirFilter::from_globs(&patterns).unwrap();
assert!(matcher.match_directory(&Path::new("path1").join("dir1")));
assert!(matcher.match_directory(&Path::new("path2").join("dir2")));
Expand All @@ -170,7 +170,7 @@ mod tests {
fs_err::create_dir_all(file.parent().unwrap()).unwrap();
fs_err::File::create(file).unwrap();
}
let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap());
let matcher = GlobDirFilter::from_globs(&patterns).unwrap();

// Test the prefix filtering
Expand Down Expand Up @@ -228,7 +228,7 @@ mod tests {
fs_err::create_dir_all(file.parent().unwrap()).unwrap();
fs_err::File::create(file).unwrap();
}
let patterns = PATTERNS.map(|pattern| PortableGlobParser.parse(pattern).unwrap());
let patterns = PATTERNS.map(|pattern| PortableGlobParser::Pep639.parse(pattern).unwrap());

let include_matcher = GlobDirFilter::from_globs(&patterns).unwrap();

Expand Down
4 changes: 2 additions & 2 deletions crates/uv-globfilter/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ fn main() {

let mut include_globs = Vec::new();
for include in includes {
let glob = PortableGlobParser.parse(include).unwrap();
let glob = PortableGlobParser::Pep639.parse(include).unwrap();
include_globs.push(glob.clone());
}
let include_matcher = GlobDirFilter::from_globs(&include_globs).unwrap();
Expand All @@ -25,7 +25,7 @@ fn main() {
} else {
format!("**/{exclude}").to_string()
};
let glob = PortableGlobParser.parse(&exclude).unwrap();
let glob = PortableGlobParser::Pep639.parse(&exclude).unwrap();
exclude_builder.add(glob);
}
// https://github.com/BurntSushi/ripgrep/discussions/2927
Expand Down
170 changes: 144 additions & 26 deletions crates/uv-globfilter/src/portable_glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/).

use globset::{Glob, GlobBuilder};
use owo_colors::OwoColorize;
use thiserror::Error;

#[derive(Debug, Error)]
Expand All @@ -19,6 +20,24 @@ pub enum PortableGlobError {
pos: usize,
invalid: char,
},
#[error(
"Invalid character `{invalid}` at position {pos} in glob: `{glob}`. {}{} Characters can be escaped with a backslash",
"hint".bold().cyan(),
":".bold()
Comment on lines +24 to +26
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not as pretty as our usual hint logic where we show hints below the error chain, but it avoids doing this processing separately for this one error.

)]
InvalidCharacterUv {
glob: String,
pos: usize,
invalid: char,
},
#[error(
"Only forward slashes are allowed as path separator, invalid character at position {pos} in glob: `{glob}`"
)]
InvalidBackslash { glob: String, pos: usize },
#[error(
"Path separators can't be escaped, invalid character at position {pos} in glob: `{glob}`"
)]
InvalidEscapee { glob: String, pos: usize },
#[error("Invalid character `{invalid}` in range at position {pos} in glob: `{glob}`")]
InvalidCharacterRange {
glob: String,
Expand All @@ -27,15 +46,35 @@ pub enum PortableGlobError {
},
#[error("Too many at stars at position {pos} in glob: `{glob}`")]
TooManyStars { glob: String, pos: usize },
#[error("Trailing backslash at position {pos} in glob: `{glob}`")]
TrailingEscape { glob: String, pos: usize },
}

/// Cross-language glob parser with the glob syntax from
/// Cross-language glob syntax from
/// [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/).
///
/// The variant determines whether the parser strictly adheres to PEP 639 rules or allows extensions
/// such as backslash escapes.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct PortableGlobParser;
pub enum PortableGlobParser {
/// Follow the PEP 639 rules strictly.
Pep639,
/// In addition to the PEP 639 syntax, allow escaping characters with backslashes.
///
/// For cross-platform compatibility, escaping path separators is not allowed, i.e., forward
/// slashes and backslashes can't be escaped.
Uv,
}

impl PortableGlobParser {
/// Parse cross-language glob syntax from [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/):
fn backslash_escape(self) -> bool {
match self {
PortableGlobParser::Pep639 => false,
PortableGlobParser::Uv => true,
}
}

/// Parse cross-language glob syntax based on [PEP 639](https://packaging.python.org/en/latest/specifications/glob-patterns/):
///
/// - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`) are matched verbatim.
/// - The special glob characters are:
Expand All @@ -45,17 +84,21 @@ impl PortableGlobParser {
/// - `[]`, containing only the verbatim matched characters: Matches a single of the characters contained. Within
/// `[...]`, the hyphen indicates a locale-agnostic range (e.g. `a-z`, order based on Unicode code points). Hyphens at
/// the start or end are matched literally.
/// - `\`: Disallowed in PEP 639 mode. In uv mode, it escapes the following character to be matched verbatim.
/// - The path separator is the forward slash character (`/`). Patterns are relative to the given directory, a leading slash
/// character for absolute paths is not supported.
/// - Parent directory indicators (`..`) are not allowed.
///
/// These rules mean that matching the backslash (`\`) is forbidden, which avoid collisions with the windows path separator.
pub fn parse(&self, glob: &str) -> Result<Glob, PortableGlobError> {
self.check(glob)?;
Ok(GlobBuilder::new(glob).literal_separator(true).build()?)
Ok(GlobBuilder::new(glob)
.literal_separator(true)
.backslash_escape(self.backslash_escape())
.build()?)
}

/// See [`Self::parse`].
/// See [`parse_portable_glob`].
pub fn check(&self, glob: &str) -> Result<(), PortableGlobError> {
let mut chars = glob.chars().enumerate().peekable();
// A `..` is on a parent directory indicator at the start of the string or after a directory
Expand Down Expand Up @@ -119,12 +162,50 @@ impl PortableGlobParser {
}
}
start_or_slash = false;
} else if c == '\\' {
match *self {
PortableGlobParser::Pep639 => {
return Err(PortableGlobError::InvalidBackslash {
glob: glob.to_string(),
pos,
});
}
PortableGlobParser::Uv => {
match chars.next() {
Some((pos, '/' | '\\')) => {
// For cross-platform compatibility, we don't allow forward slashes or
// backslashes to be escaped.
return Err(PortableGlobError::InvalidEscapee {
glob: glob.to_string(),
pos,
});
}
Some(_) => {
// Escaped character
}
None => {
return Err(PortableGlobError::TrailingEscape {
glob: glob.to_string(),
pos,
});
}
}
}
}
} else {
return Err(PortableGlobError::InvalidCharacter {
glob: glob.to_string(),
pos,
invalid: c,
});
let err = match *self {
PortableGlobParser::Pep639 => PortableGlobError::InvalidCharacter {
glob: glob.to_string(),
pos,
invalid: c,
},
PortableGlobParser::Uv => PortableGlobError::InvalidCharacterUv {
glob: glob.to_string(),
pos,
invalid: c,
},
};
return Err(err);
}
}
Ok(())
Expand All @@ -138,7 +219,10 @@ mod tests {

#[test]
fn test_error() {
let parse_err = |glob| PortableGlobParser.parse(glob).unwrap_err().to_string();
let parse_err = |glob| {
let error = PortableGlobParser::Pep639.parse(glob).unwrap_err();
anstream::adapter::strip_str(&error.to_string()).to_string()
};
assert_snapshot!(
parse_err(".."),
@"The parent directory operator (`..`) at position 0 is not allowed in glob: `..`"
Expand Down Expand Up @@ -173,30 +257,64 @@ mod tests {
);
assert_snapshot!(
parse_err(r"licenses\eula.txt"),
@r"Invalid character `\` at position 8 in glob: `licenses\eula.txt`"
@r"Only forward slashes are allowed as path separator, invalid character at position 8 in glob: `licenses\eula.txt`"
);
assert_snapshot!(
parse_err(r"**/@test"),
@"Invalid character `@` at position 3 in glob: `**/@test`"
);
// Escapes are not allowed in strict PEP 639 mode
assert_snapshot!(
parse_err(r"public domain/Gulliver\\’s Travels.txt"),
@r"Invalid character ` ` at position 6 in glob: `public domain/Gulliver\\’s Travels.txt`"
);
let parse_err_uv = |glob| {
let error = PortableGlobParser::Uv.parse(glob).unwrap_err();
anstream::adapter::strip_str(&error.to_string()).to_string()
};
assert_snapshot!(
parse_err_uv(r"**/@test"),
@"Invalid character `@` at position 3 in glob: `**/@test`. hint: Characters can be escaped with a backslash"
);
// Escaping slashes is not allowed.
assert_snapshot!(
parse_err_uv(r"licenses\\MIT.txt"),
@r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\\MIT.txt`"
);
assert_snapshot!(
parse_err_uv(r"licenses\/MIT.txt"),
@r"Path separators can't be escaped, invalid character at position 9 in glob: `licenses\/MIT.txt`"
);
}

#[test]
fn test_valid() {
let cases = [
"licenses/*.txt",
"licenses/**/*.txt",
"LICEN[CS]E.txt",
"LICEN?E.txt",
"[a-z].txt",
"[a-z._-].txt",
"*/**",
"LICENSE..txt",
"LICENSE_file-1.txt",
r"licenses/*.txt",
r"licenses/**/*.txt",
r"LICEN[CS]E.txt",
r"LICEN?E.txt",
r"[a-z].txt",
r"[a-z._-].txt",
r"*/**",
r"LICENSE..txt",
r"LICENSE_file-1.txt",
// (google translate)
"licenses/라이센스*.txt",
"licenses/ライセンス*.txt",
"licenses/执照*.txt",
"src/**",
r"licenses/라이센스*.txt",
r"licenses/ライセンス*.txt",
r"licenses/执照*.txt",
r"src/**",
];
let cases_uv = [
r"public-domain/Gulliver\’s\ Travels.txt",
Copy link
Member Author

@konstin konstin May 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's verbose that you have to support all non-alphanumeric characters even if they aren't "special", but I expect that these are rare in project paths.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the thing that perplexes me the most here. Are you doing things this way because of PEP 639? But since you're already going beyond PEP 639 by introducing escaping, does it make sense to only require escaping for meta characters?

// https://github.com/astral-sh/uv/issues/13280
r"**/\@test",
];
for case in cases {
PortableGlobParser.parse(case).unwrap();
PortableGlobParser::Pep639.parse(case).unwrap();
}
for case in cases.iter().chain(cases_uv.iter()) {
PortableGlobParser::Uv.parse(case).unwrap();
}
}
}
Loading
Loading