Skip to content

Commit

Permalink
feat: use "inherit left, wildcard right" behavior (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
crowlKats authored Jul 26, 2024
1 parent 8be8cf5 commit 218f408
Show file tree
Hide file tree
Showing 5 changed files with 457 additions and 177 deletions.
22 changes: 22 additions & 0 deletions src/canonicalize_and_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,25 @@ pub fn special_scheme_default_port(scheme: &str) -> Option<&'static str> {
_ => None,
}
}

// Ref: https://urlpattern.spec.whatwg.org/#process-a-base-url-string
pub fn process_base_url(input: &str, kind: &ProcessType) -> String {
if kind != &ProcessType::Pattern {
input.to_string()
} else {
escape_pattern_string(input)
}
}

// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
pub fn escape_pattern_string(input: &str) -> String {
assert!(input.is_ascii());
let mut result = String::new();
for char in input.chars() {
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
result.push('\\');
}
result.push(char);
}
result
}
14 changes: 1 addition & 13 deletions src/component.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.

use crate::canonicalize_and_process::escape_pattern_string;
use crate::matcher::InnerMatcher;
use crate::matcher::Matcher;
use crate::parser::Options;
Expand Down Expand Up @@ -262,19 +263,6 @@ fn generate_pattern_string(part_list: &[&Part], options: &Options) -> String {
result
}

// Ref: https://wicg.github.io/urlpattern/#escape-a-pattern-string
fn escape_pattern_string(input: &str) -> String {
assert!(input.is_ascii());
let mut result = String::new();
for char in input.chars() {
if matches!(char, '+' | '*' | '?' | ':' | '{' | '}' | '(' | ')' | '\\') {
result.push('\\');
}
result.push(char);
}
result
}

/// This function generates a matcher for a given parts list.
fn generate_matcher<R: RegExp>(
mut part_list: &[&Part],
Expand Down
87 changes: 71 additions & 16 deletions src/constructor_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ impl<'a> ConstructorStringParser<'a> {
}

// Ref: https://wicg.github.io/urlpattern/#change-state
fn change_state(&mut self, state: ConstructorStringParserState, skip: usize) {
fn change_state(
&mut self,
new_state: ConstructorStringParserState,
skip: usize,
) {
match self.state {
ConstructorStringParserState::Protocol => {
self.result.protocol = Some(self.make_component_string())
Expand All @@ -153,10 +157,69 @@ impl<'a> ConstructorStringParser<'a> {
ConstructorStringParserState::Hash => {
self.result.hash = Some(self.make_component_string())
}
_ => {}
ConstructorStringParserState::Init
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Done => {}
}

self.state = state;
if self.state != ConstructorStringParserState::Init
&& new_state != ConstructorStringParserState::Done
{
if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
) && matches!(
new_state,
ConstructorStringParserState::Port
| ConstructorStringParserState::Pathname
| ConstructorStringParserState::Search
| ConstructorStringParserState::Hash
) && self.result.hostname.is_none()
{
self.result.hostname = Some(String::new());
}

if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
| ConstructorStringParserState::Hostname
| ConstructorStringParserState::Port
) && matches!(
new_state,
ConstructorStringParserState::Search
| ConstructorStringParserState::Hash
) && self.result.pathname.is_none()
{
if self.protocol_matches_special_scheme {
self.result.pathname = Some(String::from("/"));
} else {
self.result.pathname = Some(String::new());
}
}

if matches!(
self.state,
ConstructorStringParserState::Protocol
| ConstructorStringParserState::Authority
| ConstructorStringParserState::Username
| ConstructorStringParserState::Password
| ConstructorStringParserState::Hostname
| ConstructorStringParserState::Port
| ConstructorStringParserState::Pathname
) && new_state == ConstructorStringParserState::Hash
&& self.result.search.is_none()
{
self.result.search = Some(String::new());
}
}

self.state = new_state;
self.token_index += skip;
self.component_start = self.token_index;
self.token_increment = 0;
Expand Down Expand Up @@ -273,11 +336,8 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
parser.change_state(ConstructorStringParserState::Hash, 1);
} else if parser.is_search_prefix() {
parser.change_state(ConstructorStringParserState::Search, 1);
parser.result.hash = Some(String::new());
} else {
parser.change_state(ConstructorStringParserState::Pathname, 0);
parser.result.search = Some(String::new());
parser.result.hash = Some(String::new());
}
parser.token_index += parser.token_increment;
continue;
Expand Down Expand Up @@ -306,22 +366,12 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
match parser.state {
ConstructorStringParserState::Init => {
if parser.is_protocol_suffix() {
parser.result.username = Some(String::new());
parser.result.password = Some(String::new());
parser.result.hostname = Some(String::new());
parser.result.port = Some(String::new());
parser.result.pathname = Some(String::new());
parser.result.search = Some(String::new());
parser.result.hash = Some(String::new());
parser.rewind_and_set_state(ConstructorStringParserState::Protocol);
}
}
ConstructorStringParserState::Protocol => {
if parser.is_protocol_suffix() {
parser.compute_protocol_matches_special_scheme::<R>()?;
if parser.protocol_matches_special_scheme {
parser.result.pathname = Some(String::from("/"));
}
let mut next_state = ConstructorStringParserState::Pathname;
let mut skip = 1;
if parser.next_is_authority_slashes() {
Expand Down Expand Up @@ -398,5 +448,10 @@ pub(crate) fn parse_constructor_string<R: RegExp>(
}
parser.token_index += parser.token_increment;
}

if parser.result.hostname.is_some() && parser.result.port.is_none() {
parser.result.port = Some(String::new());
}

Ok(parser.result)
}
150 changes: 129 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ mod tokenizer;
pub use error::Error;
use url::Url;

use crate::canonicalize_and_process::is_special_scheme;
use crate::canonicalize_and_process::special_scheme_default_port;
use crate::canonicalize_and_process::ProcessType;
use crate::canonicalize_and_process::{is_special_scheme, process_base_url};
use crate::component::Component;
use crate::regexp::RegExp;

Expand Down Expand Up @@ -55,7 +56,7 @@ impl UrlPatternInit {
#[allow(clippy::too_many_arguments)]
fn process(
&self,
kind: canonicalize_and_process::ProcessType,
kind: ProcessType,
protocol: Option<String>,
username: Option<String>,
password: Option<String>,
Expand All @@ -78,18 +79,84 @@ impl UrlPatternInit {
};

let base_url = if let Some(parsed_base_url) = &self.base_url {
// TODO: check if these are correct
result.protocol = Some(parsed_base_url.scheme().to_string());
result.username = Some(parsed_base_url.username().to_string());
result.password =
Some(parsed_base_url.password().unwrap_or_default().to_string());
result.hostname =
Some(parsed_base_url.host_str().unwrap_or_default().to_string());
result.port = Some(url::quirks::port(parsed_base_url).to_string());
result.pathname =
Some(url::quirks::pathname(parsed_base_url).to_string());
result.search = Some(parsed_base_url.query().unwrap_or("").to_string());
result.hash = Some(parsed_base_url.fragment().unwrap_or("").to_string());
if self.protocol.is_none() {
result.protocol =
Some(process_base_url(parsed_base_url.scheme(), &kind));
}

if kind != ProcessType::Pattern
&& (self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
&& self.username.is_none())
{
result.username =
Some(process_base_url(parsed_base_url.username(), &kind));
}

if kind != ProcessType::Pattern
&& (self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
&& self.username.is_none()
&& self.password.is_none())
{
result.password = Some(process_base_url(
parsed_base_url.password().unwrap_or_default(),
&kind,
));
}

if self.protocol.is_none() && self.hostname.is_none() {
result.hostname = Some(process_base_url(
parsed_base_url.host_str().unwrap_or_default(),
&kind,
));
}

if self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
{
result.port =
Some(process_base_url(url::quirks::port(parsed_base_url), &kind));
}

if self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
&& self.pathname.is_none()
{
result.pathname = Some(process_base_url(
url::quirks::pathname(parsed_base_url),
&kind,
));
}

if self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
&& self.pathname.is_none()
&& self.search.is_none()
{
result.search = Some(process_base_url(
parsed_base_url.query().unwrap_or_default(),
&kind,
));
}

if self.protocol.is_none()
&& self.hostname.is_none()
&& self.port.is_none()
&& self.pathname.is_none()
&& self.search.is_none()
&& self.hash.is_none()
{
result.hash = Some(process_base_url(
parsed_base_url.fragment().unwrap_or_default(),
&kind,
));
}

Some(parsed_base_url)
} else {
Expand Down Expand Up @@ -235,7 +302,7 @@ impl<R: RegExp> UrlPattern<R> {
report_regex_errors: bool,
) -> Result<Self, Error> {
let mut processed_init = init.process(
canonicalize_and_process::ProcessType::Pattern,
ProcessType::Pattern,
None,
None,
None,
Expand Down Expand Up @@ -413,7 +480,7 @@ impl<R: RegExp> UrlPattern<R> {
&self,
input: UrlPatternMatchInput,
) -> Result<Option<UrlPatternResult>, Error> {
let input = match crate::quirks::parse_match_input(input) {
let input = match quirks::parse_match_input(input) {
Some(input) => input,
None => return Ok(None),
};
Expand Down Expand Up @@ -591,9 +658,9 @@ mod tests {

fn test_case(case: TestCase) {
let input = case.pattern.first().cloned();
let mut base_url = case.pattern.get(1).map(|input| match input {
StringOrInit::String(str) => str.clone(),
StringOrInit::Init(_) => unreachable!(),
let mut base_url = case.pattern.get(1).and_then(|input| match input {
StringOrInit::String(str) => Some(str.clone()),
StringOrInit::Init(_) => None,
});

println!("\n=====");
Expand Down Expand Up @@ -664,7 +731,48 @@ mod tests {
}) = &input
{
expected = Some($field.to_owned())
} else if let Some(base_url) = &base_url {
} else if {
if let StringOrInit::Init(init) = &input {
match stringify!($field) {
"protocol" => false,
"hostname" => init.protocol.is_some(),
"port" => init.protocol.is_some() || init.hostname.is_some(),
"username" => false,
"password" => false,
"pathname" => {
init.protocol.is_some()
|| init.hostname.is_some()
|| init.port.is_some()
}
"search" => {
init.protocol.is_some()
|| init.hostname.is_some()
|| init.port.is_some()
|| init.pathname.is_some()
}
"hash" => {
init.protocol.is_some()
|| init.hostname.is_some()
|| init.port.is_some()
|| init.pathname.is_some()
|| init.search.is_some()
}
_ => unreachable!(),
}
} else {
false
}
} {
expected = Some("*".to_owned())
} else if let Some(base_url) =
base_url.as_ref().and_then(|base_url| {
if !matches!(stringify!($field), "username" | "password") {
Some(base_url)
} else {
None
}
})
{
let base_url = Url::parse(base_url).unwrap();
let field = url::quirks::$field(&base_url);
let field: String = match stringify!($field) {
Expand All @@ -684,8 +792,8 @@ mod tests {
let pattern = &pattern.$field.pattern_string;

assert_eq!(
pattern,
&expected,
pattern,
"pattern for {} does not match",
stringify!($field)
);
Expand Down
Loading

0 comments on commit 218f408

Please sign in to comment.