Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crates/oxc_regular_expression/src/body_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ mod test {
("a{,", ParserOptions::default().with_unicode_mode()),
("x{9007199254740992}", ParserOptions::default()),
("x{9007199254740991,9007199254740992}", ParserOptions::default()),
("x{99999999999999999999999999999999999999999999999999}", ParserOptions::default()),
(r"\99999999999999999999999999999999999999999999999999", ParserOptions::default()),
(r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", ParserOptions::default().with_unicode_mode()),
("(?=a", ParserOptions::default()),
("(?<!a", ParserOptions::default()),
(r"\c0", ParserOptions::default().with_unicode_mode()),
Expand Down
77 changes: 48 additions & 29 deletions crates/oxc_regular_expression/src/body_parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ impl<'a> PatternParser<'a> {
let checkpoint = self.reader.checkpoint();

// DecimalEscape: \1 means indexed reference
if let Some(index) = self.consume_decimal_escape() {
if let Some(index) = self.consume_decimal_escape()? {
if self.state.unicode_mode {
// [SS:EE] AtomEscape :: DecimalEscape
// It is a Syntax Error if the CapturingGroupNumber of DecimalEscape is strictly greater than CountLeftCapturingParensWithin(the Pattern containing AtomEscape).
Expand Down Expand Up @@ -601,7 +601,7 @@ impl<'a> PatternParser<'a> {
// - and it is binary property of strings(can be true only with `UnicodeSetsMode`)
if negative && strings {
return Err(OxcDiagnostic::error(
"Invalid property name(negative + property of strings)",
"Invalid regular expression: Invalid property name(negative + property of strings)",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand Down Expand Up @@ -1120,7 +1120,7 @@ impl<'a> PatternParser<'a> {
let span_start = self.reader.offset();
if self.reader.eat('&') {
return Err(OxcDiagnostic::error(
"Unexpected `&` inside of class interseciton", // spellchecker:disable-line
"Invalid regular expression: Unexpected `&` inside of class interseciton", // spellchecker:disable-line
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand All @@ -1133,7 +1133,7 @@ impl<'a> PatternParser<'a> {

let span_start = self.reader.offset();
return Err(OxcDiagnostic::error(
"Invalid character in character class set interseciton", // spellchecker:disable-line
"Invalid regular expression: Invalid character in character class set interseciton", // spellchecker:disable-line
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand Down Expand Up @@ -1167,7 +1167,7 @@ impl<'a> PatternParser<'a> {

let span_start = self.reader.offset();
return Err(OxcDiagnostic::error(
"Invalid character in character class set subtraction",
"Invalid regular expression: Invalid character in character class set subtraction",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand Down Expand Up @@ -1582,11 +1582,11 @@ impl<'a> PatternParser<'a> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();
if self.reader.eat('{') {
if let Some(min) = self.consume_decimal_digits() {
if let Some(min) = self.consume_decimal_digits()? {
if self.reader.eat('}') {
if MAX_QUANTIFIER < min {
return Err(OxcDiagnostic::error(
"Number is too large in braced quantifier",
"Invalid regular expression: Number is too large in braced quantifier",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand All @@ -1598,7 +1598,7 @@ impl<'a> PatternParser<'a> {
if self.reader.eat('}') {
if MAX_QUANTIFIER < min {
return Err(OxcDiagnostic::error(
"Number is too large in braced quantifier",
"Invalid regular expression: Number is too large in braced quantifier",
)
.with_label(
self.span_factory.create(span_start, self.reader.offset()),
Expand All @@ -1608,21 +1608,21 @@ impl<'a> PatternParser<'a> {
return Ok(Some(((min, None), is_greedy(&mut self.reader))));
}

if let Some(max) = self.consume_decimal_digits() {
if let Some(max) = self.consume_decimal_digits()? {
if self.reader.eat('}') {
if max < min {
// [SS:EE] QuantifierPrefix :: { DecimalDigits , DecimalDigits }
// It is a Syntax Error if the MV of the first DecimalDigits is strictly greater than the MV of the second DecimalDigits.
return Err(OxcDiagnostic::error(
"Numbers out of order in braced quantifier",
"Invalid regular expression: Numbers out of order in braced quantifier",
)
.with_label(
self.span_factory.create(span_start, self.reader.offset()),
));
}
if MAX_QUANTIFIER < min || MAX_QUANTIFIER < max {
return Err(OxcDiagnostic::error(
"Number is too large in braced quantifier",
"Invalid regular expression: Number is too large in braced quantifier",
)
.with_label(
self.span_factory.create(span_start, self.reader.offset()),
Expand All @@ -1645,20 +1645,20 @@ impl<'a> PatternParser<'a> {
// DecimalEscape ::
// NonZeroDigit DecimalDigits[~Sep][opt] [lookahead ∉ DecimalDigit]
// ```
fn consume_decimal_escape(&mut self) -> Option<u32> {
fn consume_decimal_escape(&mut self) -> Result<Option<u32>> {
let checkpoint = self.reader.checkpoint();

if let Some(index) = self.consume_decimal_digits() {
if let Some(index) = self.consume_decimal_digits()? {
// \0 is CharacterEscape, not DecimalEscape
if index != 0 {
#[allow(clippy::cast_possible_truncation)]
return Some(index as u32);
return Ok(Some(index as u32));
}

self.reader.rewind(checkpoint);
}

None
Ok(None)
}

// ```
Expand All @@ -1668,23 +1668,33 @@ impl<'a> PatternParser<'a> {
// [+Sep] DecimalDigits[+Sep] NumericLiteralSeparator DecimalDigit
// ```
// ([Sep] is disabled for `QuantifierPrefix` and `DecimalEscape`, skip it)
fn consume_decimal_digits(&mut self) -> Option<u64> {
fn consume_decimal_digits(&mut self) -> Result<Option<u64>> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();

let mut value = 0;
let mut value: u64 = 0;
while let Some(cp) = self.reader.peek().filter(|&cp| unicode::is_decimal_digit(cp)) {
// `- '0' as u32`: convert code point to digit
#[allow(clippy::cast_lossless)]
let d = (cp - '0' as u32) as u64;
value = (10 * value) + d;
self.reader.advance();

// To prevent panic on overflow cases like `\999999999999999999999`, `a{999999999999999999999}`
if let Some(v) = value.checked_mul(10).and_then(|v| v.checked_add(d)) {
value = v;
self.reader.advance();
} else {
return Err(OxcDiagnostic::error(
"Invalid regular expression: Number is too large in decimal digits",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
}

if self.reader.checkpoint() != checkpoint {
return Some(value);
return Ok(Some(value));
}

None
Ok(None)
}

// ```
Expand Down Expand Up @@ -1737,7 +1747,7 @@ impl<'a> PatternParser<'a> {
if unicode_property::is_valid_lone_unicode_property_of_strings(&name_or_value) {
if !self.state.unicode_sets_mode {
return Err(OxcDiagnostic::error(
"`UnicodeSetsMode` is required for binary property of strings",
"Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
Expand Down Expand Up @@ -2014,7 +2024,7 @@ impl<'a> PatternParser<'a> {

if self.reader.eat('{') {
if let Some(hex_digits) =
self.consume_hex_digits().filter(|&cp| unicode::is_valid_unicode(cp))
self.consume_hex_digits()?.filter(|&cp| unicode::is_valid_unicode(cp))
{
if self.reader.eat('}') {
return Ok(Some(hex_digits));
Expand Down Expand Up @@ -2150,20 +2160,29 @@ impl<'a> PatternParser<'a> {
Some(cp)
}

fn consume_hex_digits(&mut self) -> Option<u32> {
fn consume_hex_digits(&mut self) -> Result<Option<u32>> {
let span_start = self.reader.offset();
let checkpoint = self.reader.checkpoint();

let mut value = 0;
let mut value: u32 = 0;
while let Some(hex) = self.reader.peek().and_then(unicode::map_hex_digit) {
value = (16 * value) + hex;
self.reader.advance();
// To prevent panic on overflow cases like `\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}`
if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) {
value = v;
self.reader.advance();
} else {
return Err(OxcDiagnostic::error(
"Invalid regular expression: Number is too large in hex digits",
)
.with_label(self.span_factory.create(span_start, self.reader.offset())));
}
}

if self.reader.checkpoint() != checkpoint {
return Some(value);
return Ok(Some(value));
}

None
Ok(None)
}

fn consume_fixed_hex_digits(&mut self, len: usize) -> Option<u32> {
Expand Down
28 changes: 14 additions & 14 deletions tasks/coverage/parser_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,14 @@ Negative Passed: 4220/4220 (100.00%)
· ──────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-P.js:20:2]
19 │
20 │ /\P{Basic_Emoji}/v;
· ───────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-u.js:20:5]
19 │
20 │ /\p{Basic_Emoji}/u;
Expand All @@ -212,14 +212,14 @@ Negative Passed: 4220/4220 (100.00%)
· ────────────────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-P.js:20:2]
19 │
20 │ /\P{Emoji_Keycap_Sequence}/v;
· ─────────────────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-u.js:20:5]
19 │
20 │ /\p{Emoji_Keycap_Sequence}/u;
Expand All @@ -233,14 +233,14 @@ Negative Passed: 4220/4220 (100.00%)
· ────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-P.js:20:2]
19 │
20 │ /\P{RGI_Emoji}/v;
· ─────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-u.js:20:5]
19 │
20 │ /\p{RGI_Emoji}/u;
Expand All @@ -254,14 +254,14 @@ Negative Passed: 4220/4220 (100.00%)
· ──────────────────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-P.js:20:2]
19 │
20 │ /\P{RGI_Emoji_Flag_Sequence}/v;
· ───────────────────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-u.js:20:5]
19 │
20 │ /\p{RGI_Emoji_Flag_Sequence}/u;
Expand All @@ -275,14 +275,14 @@ Negative Passed: 4220/4220 (100.00%)
· ──────────────────────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-P.js:20:2]
19 │
20 │ /\P{RGI_Emoji_Modifier_Sequence}/v;
· ───────────────────────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-u.js:20:5]
19 │
20 │ /\p{RGI_Emoji_Modifier_Sequence}/u;
Expand All @@ -296,14 +296,14 @@ Negative Passed: 4220/4220 (100.00%)
· ─────────────────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-P.js:20:2]
19 │
20 │ /\P{RGI_Emoji_Tag_Sequence}/v;
· ──────────────────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-u.js:20:5]
19 │
20 │ /\p{RGI_Emoji_Tag_Sequence}/u;
Expand All @@ -317,14 +317,14 @@ Negative Passed: 4220/4220 (100.00%)
· ─────────────────────────────
╰────

× Invalid property name(negative + property of strings)
× Invalid regular expression: Invalid property name(negative + property of strings)
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P.js:20:2]
19 │
20 │ /\P{RGI_Emoji_ZWJ_Sequence}/v;
· ──────────────────────────
╰────

× `UnicodeSetsMode` is required for binary property of strings
× Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings
╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u.js:20:5]
19 │
20 │ /\p{RGI_Emoji_ZWJ_Sequence}/u;
Expand Down
Loading