Skip to content

Commit

Permalink
Merge branch 'refs/heads/main' into fork/crowlKats/hasRegExpGroups
Browse files Browse the repository at this point in the history
  • Loading branch information
crowlKats committed Jul 26, 2024
2 parents 56057da + 783c76b commit 31129b9
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 60 deletions.
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ repository = "https://github.com/denoland/rust-urlpattern"
license = "MIT"

[dependencies]
derive_more = "0.99.16"
url = "2.2.2"
regex = "1.4.3"
url = "2.4.1"
regex = "1.10.5"
serde = { version = "1.0.127", features = ["derive"] }
unic-ucd-ident = { version = "0.9.0", features = ["id"] }

Expand Down
4 changes: 2 additions & 2 deletions src/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ impl<R: RegExp> Component<R> {
pub(crate) fn create_match_result(
&self,
input: String,
exec_result: Vec<&str>,
exec_result: Vec<Option<&str>>,
) -> crate::UrlPatternComponentResult {
let groups = self
.group_name_list
.clone()
.into_iter()
.zip(exec_result.into_iter().map(str::to_owned))
.zip(exec_result.into_iter().map(|s| s.map(str::to_owned)))
.collect();
crate::UrlPatternComponentResult { input, groups }
}
Expand Down
81 changes: 54 additions & 27 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,53 +1,80 @@
use derive_more::Display;
use std::fmt;

use crate::tokenizer::TokenType;

/// A error occurring during URL pattern construction, or matching.
#[derive(Display)]
#[derive(Debug)]
pub enum Error {
#[display(fmt = "a relative input without a base URL is not valid")]
BaseUrlRequired,

#[display(
fmt = "specifying both an init object, and a separate base URL is not valid"
)]
BaseUrlWithInit,

#[display(fmt = "tokenizer error: {_0} (at char {_1})")]
Tokenizer(TokenizerError, usize),

#[display(fmt = "parser error: {_0}")]
Parser(ParserError),

Url(url::ParseError),

#[display(fmt = "regexp error")]
RegExp(()),
}

impl std::error::Error for Error {}

impl std::fmt::Debug for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(self, f)
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Error::BaseUrlRequired => {
f.write_str("a relative input without a base URL is not valid")
}
Error::BaseUrlWithInit => f.write_str(
"specifying both an init object, and a separate base URL is not valid",
),
Error::Tokenizer(err, pos) => {
write!(f, "tokenizer error: {err} (at char {pos})")
}
Error::Parser(err) => write!(f, "parser error: {err}"),
Error::Url(err) => err.fmt(f),
Error::RegExp(_) => f.write_str("regexp error"),
}
}
}

#[derive(Debug, Display)]
impl std::error::Error for Error {}

#[derive(Debug)]
pub enum TokenizerError {
#[display(fmt = "incomplete escape code")]
IncompleteEscapeCode,
#[display(fmt = "invalid name; must be at least length 1")]
InvalidName,
#[display(fmt = "invalid regex: {_0}")]
InvalidRegex(&'static str),
}

#[derive(Debug, Display)]
impl fmt::Display for TokenizerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::IncompleteEscapeCode => f.write_str("incomplete escape code"),
Self::InvalidName => {
f.write_str("invalid name; must be at least length 1")
}
Self::InvalidRegex(err) => write!(f, "invalid regex: {err}"),
}
}
}

impl std::error::Error for TokenizerError {}

#[derive(Debug)]
pub enum ParserError {
#[display(fmt = "expected token {_0}, found '{_2}' of type {_1}")]
ExpectedToken(TokenType, TokenType, String),

#[display(fmt = "pattern contains duplicate name {_0}")]
DuplicateName(String),
}

impl fmt::Display for ParserError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ExpectedToken(expected_ty, found_ty, found_val) => {
write!(
f,
"expected token {expected_ty:?}, found '{found_val}' of type {found_ty:?}"
)
}
Self::DuplicateName(name) => {
write!(f, "pattern contains duplicate name {name}")
}
}
}
}

impl std::error::Error for ParserError {}
12 changes: 6 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ fn is_absolute_pathname(
/// // Match the pattern against a URL.
/// let url = "https://example.com/users/123".parse().unwrap();
/// let result = pattern.exec(UrlPatternMatchInput::Url(url)).unwrap().unwrap();
/// assert_eq!(result.pathname.groups.get("id").unwrap(), "123");
/// assert_eq!(result.pathname.groups.get("id").unwrap().as_ref().unwrap(), "123");
///# }
/// ```
#[derive(Debug)]
Expand Down Expand Up @@ -508,7 +508,7 @@ pub struct UrlPatternComponentResult {
/// The matched input for this component.
pub input: String,
/// The values for all named groups in the pattern.
pub groups: std::collections::HashMap<String, String>,
pub groups: std::collections::HashMap<String, Option<String>>,
}

#[cfg(test)]
Expand Down Expand Up @@ -537,7 +537,7 @@ mod tests {
#[derive(Debug, Deserialize)]
struct ComponentResult {
input: String,
groups: HashMap<String, String>,
groups: HashMap<String, Option<String>>,
}

#[derive(Deserialize)]
Expand Down Expand Up @@ -590,7 +590,7 @@ mod tests {
}

fn test_case(case: TestCase) {
let input = case.pattern.get(0).cloned();
let input = case.pattern.first().cloned();
let mut base_url = case.pattern.get(1).map(|input| match input {
StringOrInit::String(str) => str.clone(),
StringOrInit::Init(_) => unreachable!(),
Expand Down Expand Up @@ -701,7 +701,7 @@ mod tests {
assert_field!(search);
assert_field!(hash);

let input = case.inputs.get(0).cloned();
let input = case.inputs.first().cloned();
let base_url = case.inputs.get(1).map(|input| match input {
StringOrInit::String(str) => str.clone(),
StringOrInit::Init(_) => unreachable!(),
Expand Down Expand Up @@ -799,7 +799,7 @@ mod tests {
if !exactly_empty_components
.contains(&stringify!($component).to_owned())
{
groups.insert("0".to_owned(), "".to_owned());
groups.insert("0".to_owned(), Some("".to_owned()));
}
UrlPatternComponentResult {
input: "".to_owned(),
Expand Down
7 changes: 5 additions & 2 deletions src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@ impl<R: RegExp> Matcher<R> {
}
}

pub fn matches<'a>(&self, mut input: &'a str) -> Option<Vec<&'a str>> {
pub fn matches<'a>(
&self,
mut input: &'a str,
) -> Option<Vec<Option<&'a str>>> {
let prefix_len = self.prefix.len();
let suffix_len = self.suffix.len();
let input_len = input.len();
Expand Down Expand Up @@ -82,7 +85,7 @@ impl<R: RegExp> Matcher<R> {
return None;
}
}
Some(vec![input])
Some(vec![Some(input)])
}
InnerMatcher::RegExp { regexp, .. } => {
regexp.as_ref().unwrap().matches(input)
Expand Down
2 changes: 1 addition & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ where
if name_token.is_some() || regexp_or_wildcard_token.is_some() {
let mut prefix = String::new();
if let Some(char_token) = char_token {
prefix = char_token.value.to_owned();
char_token.value.clone_into(&mut prefix);
}
if !prefix.is_empty() && prefix != options.prefix_code_point {
parser.pending_fixed_value.push_str(&prefix);
Expand Down
2 changes: 1 addition & 1 deletion src/quirks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ impl RegExp for EcmaRegexp {
Ok(EcmaRegexp(pattern.to_string()))
}

fn matches<'a>(&self, text: &'a str) -> Option<Vec<&'a str>> {
fn matches<'a>(&self, text: &'a str) -> Option<Vec<Option<&'a str>>> {
let regexp = regex::Regex::parse(&self.0).ok()?;
regexp.matches(text)
}
Expand Down
8 changes: 4 additions & 4 deletions src/regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ pub trait RegExp: Sized {
/// of captures. The matches are returned in the order they appear in the
/// regular expression. It is **not** prefixed with the full match. For groups
/// that occur in the regular expression, but did not match, the corresponding
/// capture should be the empty string ("").
/// capture should be `None`.
///
/// Returns `None` if the text does not match the regular expression.
fn matches<'a>(&self, text: &'a str) -> Option<Vec<&'a str>>;
fn matches<'a>(&self, text: &'a str) -> Option<Vec<Option<&'a str>>>;
}

impl RegExp for regex::Regex {
Expand All @@ -26,13 +26,13 @@ impl RegExp for regex::Regex {
regex::Regex::new(pattern).map_err(|_| ())
}

fn matches<'a>(&self, text: &'a str) -> Option<Vec<&'a str>> {
fn matches<'a>(&self, text: &'a str) -> Option<Vec<Option<&'a str>>> {
let captures = self.captures(text)?;

let captures = captures
.iter()
.skip(1)
.map(|c| c.map(|m| m.as_str()).unwrap_or(""))
.map(|c| c.map(|m| m.as_str()))
.collect();

Some(captures)
Expand Down
32 changes: 21 additions & 11 deletions src/testdata/urlpatterntestdata.json
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,9 @@
{
"pattern": [{ "pathname": "/foo/:bar?" }],
"inputs": [{ "pathname": "/foo" }],
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "bar": "" } }
"pathname": { "input": "/foo", "groups": { "bar": null } }
}
},
{
Expand Down Expand Up @@ -419,8 +420,9 @@
{
"pattern": [{ "pathname": "/foo/:bar*" }],
"inputs": [{ "pathname": "/foo" }],
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "bar": "" } }
"pathname": { "input": "/foo", "groups": { "bar": null } }
}
},
{
Expand Down Expand Up @@ -473,15 +475,17 @@
"expected_obj": {
"pathname": "/foo/*?"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "0": "" } }
"pathname": { "input": "/foo", "groups": { "0": null } }
}
},
{
"pattern": [{ "pathname": "/foo/*?" }],
"inputs": [{ "pathname": "/foo" }],
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "0": "" } }
"pathname": { "input": "/foo", "groups": { "0": null } }
}
},
{
Expand Down Expand Up @@ -657,15 +661,17 @@
"expected_obj": {
"pathname": "/foo/**"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "0": "" } }
"pathname": { "input": "/foo", "groups": { "0": null } }
}
},
{
"pattern": [{ "pathname": "/foo/**" }],
"inputs": [{ "pathname": "/foo" }],
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "/foo", "groups": { "0": "" } }
"pathname": { "input": "/foo", "groups": { "0": null } }
}
},
{
Expand Down Expand Up @@ -1812,9 +1818,10 @@
"hostname": "(sub.)?example.com",
"pathname": "/foo"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"protocol": { "input": "https", "groups": {} },
"hostname": { "input": "example.com", "groups": { "0": "" } },
"hostname": { "input": "example.com", "groups": { "0": null } },
"pathname": { "input": "/foo", "groups": {} }
}
},
Expand Down Expand Up @@ -1850,9 +1857,10 @@
"hostname": "(sub(?:.))?example.com",
"pathname": "/foo"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"protocol": { "input": "https", "groups": {} },
"hostname": { "input": "example.com", "groups": { "0": "" } },
"hostname": { "input": "example.com", "groups": { "0": null } },
"pathname": { "input": "/foo", "groups": {} }
}
},
Expand Down Expand Up @@ -2299,9 +2307,10 @@
"protocol": "data",
"pathname": "text/javascript,let x = 100/:tens?5;"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"protocol": { "input": "data", "groups": {} },
"pathname": { "input": "text/javascript,let x = 100/5;", "groups": { "tens": "" } }
"pathname": { "input": "text/javascript,let x = 100/5;", "groups": { "tens": null } }
}
},
{
Expand Down Expand Up @@ -2416,7 +2425,6 @@
"expected_obj": "error"
},
{
"skip": "bug in rust-url: https://github.com/servo/rust-url/pull/718",
"pattern": [{ "hostname": "bad|hostname" }],
"expected_obj": "error"
},
Expand Down Expand Up @@ -2618,13 +2626,15 @@
}
},
{
"skip": "only works in ecmascript variety of regex",
"pattern": [{ "pathname": "*{}**?" }],
"inputs": [{ "pathname": "foobar" }],
"expected_obj": {
"pathname": "*(.*)?"
},
"//": "The `null` below is translated to undefined in the test harness.",
"expected_match": {
"pathname": { "input": "foobar", "groups": { "0": "foobar", "1": "" }}
"pathname": { "input": "foobar", "groups": { "0": "foobar", "1": null }}
}
},
{
Expand Down
4 changes: 1 addition & 3 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.

use derive_more::Display;

use crate::error::TokenizerError;
use crate::Error;

// Ref: https://wicg.github.io/urlpattern/#tokens
// Ref: https://wicg.github.io/urlpattern/#tokenizing

// Ref: https://wicg.github.io/urlpattern/#token-type
#[derive(Debug, Display, Clone, Eq, PartialEq)]
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TokenType {
Open,
Close,
Expand Down

0 comments on commit 31129b9

Please sign in to comment.