Skip to content

Commit

Permalink
Ensure authentication is passed from the index url to distribution fi…
Browse files Browse the repository at this point in the history
…les (#1886)

Closes #1709
Closes #1371

Tested with the reproduction provided in #1709 which gets past the HTTP
401.

Reuses the same copying logic we introduced in
#1874 to ensure authentication is
attached to file URLs with a realm that matches that of the index. I had
to move the authentication logic into a new crate so it could be used in
`distribution-types`.

We will want to something more robust in the future, like track all
realms with authentication in a central store and perform lookups there.
That's what `pip` does and it allows consolidation of logic like netrc
lookups. That refactor feels significant though, and I'd like to get
this fixed ASAP so this is a minimal fix.
  • Loading branch information
zanieb authored Feb 23, 2024
1 parent 3542a65 commit 8a12b2e
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 36 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/distribution-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ distribution-filename = { path = "../distribution-filename", features = ["serde"
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-fs = { path = "../uv-fs" }
uv-git = { path = "../uv-git", features = ["vendored-openssl"] }
uv-normalize = { path = "../uv-normalize" }
Expand Down
11 changes: 9 additions & 2 deletions crates/distribution-types/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use thiserror::Error;

use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use pypi_types::{DistInfoMetadata, Hashes, Yanked};
use url::Url;
use uv_auth::safe_copy_url_auth_to_str;

/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, Error)]
Expand Down Expand Up @@ -39,7 +41,7 @@ pub struct File {

impl File {
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
pub fn try_from(file: pypi_types::File, base: &str) -> Result<Self, FileConversionError> {
pub fn try_from(file: pypi_types::File, base: &Url) -> Result<Self, FileConversionError> {
Ok(Self {
dist_info_metadata: file.dist_info_metadata,
filename: file.filename,
Expand All @@ -51,7 +53,12 @@ impl File {
size: file.size,
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
url: if file.url.contains("://") {
FileLocation::AbsoluteUrl(file.url)
let url = safe_copy_url_auth_to_str(base, &file.url)
.map_err(|err| FileConversionError::Url(file.url.clone(), err))?
.map(|url| url.to_string())
.unwrap_or(file.url);

FileLocation::AbsoluteUrl(url)
} else {
FileLocation::RelativeUrl(base.to_string(), file.url)
},
Expand Down
6 changes: 6 additions & 0 deletions crates/pypi-types/src/base_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ impl BaseUrl {
pub fn as_url(&self) -> &Url {
&self.0
}

/// Convert to the underlying [`Url`].
#[must_use]
pub fn into_url(self) -> Url {
self.0
}
}

impl From<Url> for BaseUrl {
Expand Down
8 changes: 8 additions & 0 deletions crates/uv-auth/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "uv-auth"
version = "0.0.1"
edition = "2021"

[dependencies]
url = { workspace = true }
tracing = { workspace = true }
51 changes: 32 additions & 19 deletions crates/uv-client/src/auth.rs → crates/uv-auth/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,34 +1,47 @@
/// HTTP authentication utilities.
use tracing::warn;
use url::Url;

/// HTTP authentication utilities.
/// Optimized version of [`safe_copy_url_auth`] which avoids parsing a string
/// into a URL unless the given URL has authentication to copy. Useful for patterns
/// where the returned URL would immediately be cast into a string.
///
/// Returns [`Err`] if there is authentication to copy and `new_url` is not a valid URL.
/// Returns [`None`] if there is no authentication to copy.
pub fn safe_copy_url_auth_to_str(
trusted_url: &Url,
new_url: &str,
) -> Result<Option<Url>, url::ParseError> {
if trusted_url.username().is_empty() && trusted_url.password().is_none() {
return Ok(None);
}

let new_url = Url::parse(new_url)?;
Ok(Some(safe_copy_url_auth(trusted_url, new_url)))
}

/// Copy authentication from one URL to another URL if applicable.
///
/// See [`should_retain_auth`] for details on when authentication is retained.
#[must_use]
pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url {
if should_retain_auth(request_url, &response_url) {
response_url
.set_username(request_url.username())
.unwrap_or_else(|_| {
warn!("Failed to transfer username to response URL: {response_url}")
});
response_url
.set_password(request_url.password())
.unwrap_or_else(|_| {
warn!("Failed to transfer password to response URL: {response_url}")
});
pub fn safe_copy_url_auth(trusted_url: &Url, mut new_url: Url) -> Url {
if should_retain_auth(trusted_url, &new_url) {
new_url
.set_username(trusted_url.username())
.unwrap_or_else(|_| warn!("Failed to transfer username to response URL: {new_url}"));
new_url
.set_password(trusted_url.password())
.unwrap_or_else(|_| warn!("Failed to transfer password to response URL: {new_url}"));
}
response_url
new_url
}

/// Determine if authentication information should be retained on a new URL.
/// Implements the specification defined in RFC 7235 and 7230.
///
/// <https://datatracker.ietf.org/doc/html/rfc7235#section-2.2>
/// <https://datatracker.ietf.org/doc/html/rfc7230#section-5.5>
fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
fn should_retain_auth(trusted_url: &Url, new_url: &Url) -> bool {
// The "scheme" and "authority" components must match to retain authentication
// The "authority", is composed of the host and port.

Expand All @@ -37,20 +50,20 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
// Note some clients such as Python's `requests` library allow an upgrade
// from `http` to `https` but this is not spec-compliant.
// <https://github.com/pypa/pip/blob/75f54cae9271179b8cc80435f92336c97e349f9d/src/pip/_vendor/requests/sessions.py#L133-L136>
if request_url.scheme() != response_url.scheme() {
if trusted_url.scheme() != new_url.scheme() {
return false;
}

// The host must always be an exact match.
if request_url.host() != response_url.host() {
if trusted_url.host() != new_url.host() {
return false;
}

// Check the port.
// The port is only allowed to differ if it it matches the "default port" for the scheme.
// However, `reqwest` sets the `port` to `None` if it matches the default port so we do
// not need any special handling here.
if request_url.port() != response_url.port() {
if trusted_url.port() != new_url.port() {
return false;
}

Expand All @@ -61,7 +74,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
mod tests {
use url::{ParseError, Url};

use crate::auth::should_retain_auth;
use crate::should_retain_auth;

#[test]
fn test_should_retain_auth() -> Result<(), ParseError> {
Expand Down
1 change: 1 addition & 0 deletions crates/uv-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ install-wheel-rs = { path = "../install-wheel-rs" }
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-cache = { path = "../uv-cache" }
uv-fs = { path = "../uv-fs", features = ["tokio"] }
uv-normalize = { path = "../uv-normalize" }
Expand Down
7 changes: 4 additions & 3 deletions crates/uv-client/src/flat_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ use distribution_types::{
use pep440_rs::Version;
use platform_tags::Tags;
use pypi_types::{Hashes, Yanked};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket};
use uv_normalize::PackageName;

use crate::auth::safe_copy_auth;
use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
use crate::{Connectivity, Error, ErrorKind, RegistryClient};
Expand Down Expand Up @@ -156,16 +156,17 @@ impl<'a> FlatIndexClient<'a> {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(url, response.url().clone());
let url = safe_copy_url_auth(url, response.url().clone());

let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;

let base = safe_copy_url_auth(&url, base.into_url());
let files: Vec<File> = files
.into_iter()
.filter_map(|file| {
match File::try_from(file, base.as_url().as_str()) {
match File::try_from(file, &base) {
Ok(file) => Some(file),
Err(err) => {
// Ignore files with unparsable version specifiers.
Expand Down
1 change: 0 additions & 1 deletion crates/uv-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ pub use registry_client::{
};
pub use rkyvutil::OwnedArchive;

mod auth;
mod cached_client;
mod error;
mod flat_index;
Expand Down
21 changes: 10 additions & 11 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Nam
use install_wheel_rs::{find_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pypi_types::{Metadata21, SimpleJson};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_normalize::PackageName;
use uv_warnings::warn_user_once;

use crate::auth::safe_copy_auth;
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::middleware::OfflineMiddleware;
Expand Down Expand Up @@ -249,7 +249,7 @@ impl RegistryClient {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(&url, response.url().clone());
let url = safe_copy_url_auth(&url, response.url().clone());

let content_type = response
.headers()
Expand All @@ -271,17 +271,16 @@ impl RegistryClient {
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
.map_err(|err| Error::from_json_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(data.files, package_name, url.as_str());
metadata

SimpleMetadata::from_files(data.files, package_name, &url)
}
MediaType::Html => {
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(files, package_name, base.as_url().as_str());
metadata
let base = safe_copy_url_auth(&url, base.into_url());

SimpleMetadata::from_files(files, package_name, &base)
}
};
OwnedArchive::from_unarchived(&unarchived)
Expand Down Expand Up @@ -670,7 +669,7 @@ impl SimpleMetadata {
self.0.iter()
}

fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &str) -> Self {
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &Url) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();

// Group the distributions by version and kind
Expand Down Expand Up @@ -810,11 +809,11 @@ mod tests {
}
"#;
let data: SimpleJson = serde_json::from_str(response).unwrap();
let base = "https://pypi.org/simple/pyflyby/";
let base = Url::parse("https://pypi.org/simple/pyflyby/").unwrap();
let simple_metadata = SimpleMetadata::from_files(
data.files,
&PackageName::from_str("pyflyby").unwrap(),
base,
&base,
);
let versions: Vec<String> = simple_metadata
.iter()
Expand Down

0 comments on commit 8a12b2e

Please sign in to comment.