Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion idna/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl core::fmt::Display for Errors {
///
/// This process may fail.
///
/// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before
/// If you have a `&str` instead of `&[u8]`, just call `.as_bytes()` on it before
/// passing it to this function. It's still preferable to use this function over
/// the sibling functions that take `&str`.
pub fn domain_to_ascii_cow(
Expand Down
61 changes: 50 additions & 11 deletions url/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::string::String;
use alloc::string::ToString;
use alloc::vec::Vec;
use core::cmp;
use core::fmt::{self, Formatter};
Expand All @@ -30,8 +29,8 @@
Ipv6(Ipv6Addr),
}

impl From<Host<String>> for HostInternal {
fn from(host: Host<String>) -> HostInternal {
impl From<Host<Cow<'_, str>>> for HostInternal {
fn from(host: Host<Cow<'_, str>>) -> HostInternal {
match host {
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
Host::Domain(_) => HostInternal::Domain,
Expand Down Expand Up @@ -80,15 +79,34 @@
///
/// <https://url.spec.whatwg.org/#host-parsing>
pub fn parse(input: &str) -> Result<Self, ParseError> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't add, change, or remove from the public API, but please verify this as well in case I missed something.

Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
}

/// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())

Check warning on line 87 in url/src/host.rs

View check run for this annotation

Codecov / codecov/patch

url/src/host.rs#L86-L87

Added lines #L86 - L87 were not covered by tests
}
}

impl<'a> Host<Cow<'a, str>> {
pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address);
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
}
let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
let domain: Cow<'a, [u8]> = match domain {
Cow::Owned(v) => Cow::Owned(v),
// if borrowed then we can use the original cow
Cow::Borrowed(_) => match input {
Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
Cow::Owned(input) => Cow::Owned(input.into_bytes()),
},
};

let domain = Self::domain_to_ascii(&domain)?;
let domain = domain_to_ascii(domain)?;

if domain.is_empty() {
return Err(ParseError::EmptyHost);
Expand All @@ -98,12 +116,11 @@
let address = parse_ipv4addr(&domain)?;
Ok(Host::Ipv4(address))
} else {
Ok(Host::Domain(domain.to_string()))
Ok(Host::Domain(domain))
}
}

// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address);
Expand Down Expand Up @@ -137,17 +154,39 @@
Err(ParseError::InvalidDomainCharacter)
} else {
Ok(Host::Domain(
utf8_percent_encode(input, CONTROLS).to_string(),
match utf8_percent_encode(&input, CONTROLS).into() {
Cow::Owned(v) => Cow::Owned(v),
// if we're borrowing, then we can return the original Cow
Cow::Borrowed(_) => input,
},
))
}
}

/// convert domain with idna
fn domain_to_ascii(domain: &[u8]) -> Result<Cow<'_, str>, ParseError> {
idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into)
pub(crate) fn into_owned(self) -> Host<String> {
match self {
Host::Domain(s) => Host::Domain(s.into_owned()),
Host::Ipv4(ip) => Host::Ipv4(ip),
Host::Ipv6(ip) => Host::Ipv6(ip),

Check warning on line 170 in url/src/host.rs

View check run for this annotation

Codecov / codecov/patch

url/src/host.rs#L169-L170

Added lines #L169 - L170 were not covered by tests
}
}
}

/// convert domain with idna
fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result<Cow<'_, str>, ParseError> {
let value = idna::domain_to_ascii_cow(&domain, idna::AsciiDenyList::URL)?;
// TODO: would be better to move this into the idna crate
Ok(match value {
Cow::Owned(value) => Cow::Owned(value),
// SAFETY: If borrowed, then the original string is ascii and we can return the
// original Cow in order to save an allocation
Cow::Borrowed(_) => match domain {
Cow::Borrowed(value) => unsafe { Cow::Borrowed(core::str::from_utf8_unchecked(value)) },
Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() },
},
})
}

impl<S: AsRef<str>> fmt::Display for Host<S> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
Expand Down
9 changes: 5 additions & 4 deletions url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
))]
use crate::net::{SocketAddr, ToSocketAddrs};
use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::str;
use alloc::string::{String, ToString};
Expand Down Expand Up @@ -2032,9 +2033,9 @@
}
}
if SchemeType::from(self.scheme()).is_special() {
self.set_host_internal(Host::parse(host_substr)?, None);
self.set_host_internal(Host::parse_cow(host_substr.into())?, None);
} else {
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None);
}
} else if self.has_host() {
if scheme_type.is_special() && !scheme_type.is_file() {
Expand Down Expand Up @@ -2070,7 +2071,7 @@
}

/// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
fn set_host_internal(&mut self, host: Host<Cow<'_, str>>, opt_new_port: Option<Option<u16>>) {
let old_suffix_pos = if opt_new_port.is_some() {
self.path_start
} else {
Expand Down Expand Up @@ -2987,7 +2988,7 @@
serialization.push(':');
}
Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?;

Check warning on line 2991 in url/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

url/src/lib.rs#L2991

Added line #L2991 was not covered by tests
write!(serialization, "{}", host).unwrap();
host_end = to_u32(serialization.len()).unwrap();
host_internal = host.into();
Expand Down
42 changes: 22 additions & 20 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use alloc::borrow::Cow;
use alloc::string::String;
use alloc::string::ToString;
use core::fmt::{self, Formatter, Write};
use core::str;

Expand Down Expand Up @@ -325,6 +325,10 @@
.by_ref()
.find(|&c| !matches!(c, '\t' | '\n' | '\r'))
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.chars.as_str().len()))
}
}

pub struct Parser<'a> {
Expand Down Expand Up @@ -979,7 +983,7 @@
pub fn parse_host(
mut input: Input<'_>,
scheme_type: SchemeType,
) -> ParseResult<(Host<String>, Input<'_>)> {
) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
if scheme_type.is_file() {
return Parser::get_file_host(input);
}
Expand Down Expand Up @@ -1010,34 +1014,34 @@
}
bytes += c.len_utf8();
}
let replaced: String;
let host_str;
{
let host_input = input.by_ref().take(non_ignored_chars);
if has_ignored_chars {
replaced = host_input.collect();
host_str = &*replaced
host_str = Cow::Owned(host_input.collect());
} else {
for _ in host_input {}
host_str = &input_str[..bytes]
host_str = Cow::Borrowed(&input_str[..bytes]);
}
}
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
return Err(ParseError::EmptyHost);
}
if !scheme_type.is_special() {
let host = Host::parse_opaque(host_str)?;
let host = Host::parse_opaque_cow(host_str)?;
return Ok((host, input));
}
let host = Host::parse(host_str)?;
let host = Host::parse_cow(host_str)?;
Ok((host, input))
}

fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
let (_, host_str, remaining) = Parser::file_host(input)?;
let host = match Host::parse(&host_str)? {
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
host => host,
Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
Host::Domain(s) => Host::Domain(Cow::Owned(s)),
Host::Ipv4(ip) => Host::Ipv4(ip),
Host::Ipv6(ip) => Host::Ipv6(ip),

Check warning on line 1044 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1042-L1044

Added lines #L1042 - L1044 were not covered by tests
};
Ok((host, remaining))
}
Expand All @@ -1052,7 +1056,7 @@
has_host = false;
HostInternal::None
} else {
match Host::parse(&host_str)? {
match Host::parse_cow(host_str)? {
Host::Domain(ref d) if d == "localhost" => {
has_host = false;
HostInternal::None
Expand All @@ -1067,7 +1071,7 @@
Ok((has_host, host, remaining))
}

pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
// Undo the Input abstraction here to avoid allocating in the common case
// where the host part of the input does not contain any tab or newline
let input_str = input.chars.as_str();
Expand All @@ -1082,23 +1086,21 @@
}
bytes += c.len_utf8();
}
let replaced: String;
let host_str;
let mut remaining = input.clone();
{
let host_input = remaining.by_ref().take(non_ignored_chars);
if has_ignored_chars {
replaced = host_input.collect();
host_str = &*replaced
host_str = Cow::Owned(host_input.collect());

Check warning on line 1094 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1094

Added line #L1094 was not covered by tests
} else {
for _ in host_input {}
host_str = &input_str[..bytes]
host_str = Cow::Borrowed(&input_str[..bytes]);
}
}
if is_windows_drive_letter(host_str) {
return Ok((false, "".to_string(), input));
if is_windows_drive_letter(&host_str) {
return Ok((false, "".into(), input));
}
Ok((true, host_str.to_string(), remaining))
Ok((true, host_str, remaining))
}

pub fn parse_port<P>(
Expand Down
4 changes: 2 additions & 2 deletions url/src/quirks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
let scheme = url.scheme();
let scheme_type = SchemeType::from(scheme);
if scheme_type == SchemeType::File && new_host.is_empty() {
url.set_host_internal(Host::Domain(String::new()), None);
url.set_host_internal(Host::Domain("".into()), None);
return Ok(());
}

Expand Down Expand Up @@ -208,7 +208,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
let input = Input::new_no_trim(new_hostname);
let scheme_type = SchemeType::from(url.scheme());
if scheme_type == SchemeType::File && new_hostname.is_empty() {
url.set_host_internal(Host::Domain(String::new()), None);
url.set_host_internal(Host::Domain("".into()), None);
return Ok(());
}

Expand Down
Loading