Skip to content

Commit

Permalink
Decode HTML escapes when extracting SHA (#1440)
Browse files Browse the repository at this point in the history
## Summary

If a distribution contains a `+`, it'll be HTML-escaped; so when we try
to identify the `#`, we'll split in the wrong location.

Closes #1338.
  • Loading branch information
charliermarsh authored Feb 16, 2024
1 parent 33dd5f0 commit 0d005a2
Showing 1 changed file with 55 additions and 1 deletion.
56 changes: 55 additions & 1 deletion crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ impl SimpleHtml {
.ok_or(Error::MissingHref)?;
let href = std::str::from_utf8(href.as_bytes())?;

let (path, hashes) = if let Some((path, fragment)) = href.split_once('#') {
let decoded = html_escape::decode_html_entities(href);
let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') {
// Extract the hash, which should be in the fragment.
(path, Self::parse_hash(fragment)?)
} else {
Expand Down Expand Up @@ -316,6 +317,59 @@ mod tests {
"###);
}

#[test]
fn parse_escaped_fragment() {
let text = r#"
<!DOCTYPE html>
<html>
<body>
<h1>Links for jinja2</h1>
<a href="/whl/Jinja2-3.1.2&#43;233fca715f49-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61">Jinja2-3.1.2+233fca715f49-py3-none-any.whl</a><br/>
</body>
</html>
<!--TIMESTAMP 1703347410-->
"#;
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
let result = SimpleHtml::parse(text, &base).unwrap();
insta::assert_debug_snapshot!(result, @r###"
SimpleHtml {
base: BaseUrl(
Url {
scheme: "https",
cannot_be_a_base: false,
username: "",
password: None,
host: Some(
Domain(
"download.pytorch.org",
),
),
port: None,
path: "/whl/jinja2/",
query: None,
fragment: None,
},
),
files: [
File {
dist_info_metadata: None,
filename: "Jinja2-3.1.2+233fca715f49-py3-none-any.whl",
hashes: Hashes {
sha256: Some(
"6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
),
},
requires_python: None,
size: None,
upload_time: None,
url: "/whl/Jinja2-3.1.2&#43;233fca715f49-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61",
yanked: None,
},
],
}
"###);
}

#[test]
fn parse_missing_hash() {
let text = r#"
Expand Down

0 comments on commit 0d005a2

Please sign in to comment.