diff --git a/crates/oxc_regular_expression/src/body_parser/mod.rs b/crates/oxc_regular_expression/src/body_parser/mod.rs index 602eb95d206bd..9c29c6e0781ad 100644 --- a/crates/oxc_regular_expression/src/body_parser/mod.rs +++ b/crates/oxc_regular_expression/src/body_parser/mod.rs @@ -141,6 +141,9 @@ mod test { ("a{,", ParserOptions::default().with_unicode_mode()), ("x{9007199254740992}", ParserOptions::default()), ("x{9007199254740991,9007199254740992}", ParserOptions::default()), + ("x{99999999999999999999999999999999999999999999999999}", ParserOptions::default()), + (r"\99999999999999999999999999999999999999999999999999", ParserOptions::default()), + (r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", ParserOptions::default().with_unicode_mode()), ("(?=a", ParserOptions::default()), ("(? PatternParser<'a> { let checkpoint = self.reader.checkpoint(); // DecimalEscape: \1 means indexed reference - if let Some(index) = self.consume_decimal_escape() { + if let Some(index) = self.consume_decimal_escape()? { if self.state.unicode_mode { // [SS:EE] AtomEscape :: DecimalEscape // It is a Syntax Error if the CapturingGroupNumber of DecimalEscape is strictly greater than CountLeftCapturingParensWithin(the Pattern containing AtomEscape). @@ -601,7 +601,7 @@ impl<'a> PatternParser<'a> { // - and it is binary property of strings(can be true only with `UnicodeSetsMode`) if negative && strings { return Err(OxcDiagnostic::error( - "Invalid property name(negative + property of strings)", + "Invalid regular expression: Invalid property name(negative + property of strings)", ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -1120,7 +1120,7 @@ impl<'a> PatternParser<'a> { let span_start = self.reader.offset(); if self.reader.eat('&') { return Err(OxcDiagnostic::error( - "Unexpected `&` inside of class interseciton", // spellchecker:disable-line + "Invalid regular expression: Unexpected `&` inside of class interseciton", // spellchecker:disable-line ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -1133,7 +1133,7 @@ impl<'a> PatternParser<'a> { let span_start = self.reader.offset(); return Err(OxcDiagnostic::error( - "Invalid character in character class set interseciton", // spellchecker:disable-line + "Invalid regular expression: Invalid character in character class set interseciton", // spellchecker:disable-line ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -1167,7 +1167,7 @@ impl<'a> PatternParser<'a> { let span_start = self.reader.offset(); return Err(OxcDiagnostic::error( - "Invalid character in character class set subtraction", + "Invalid regular expression: Invalid character in character class set subtraction", ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -1582,11 +1582,11 @@ impl<'a> PatternParser<'a> { let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); if self.reader.eat('{') { - if let Some(min) = self.consume_decimal_digits() { + if let Some(min) = self.consume_decimal_digits()? { if self.reader.eat('}') { if MAX_QUANTIFIER < min { return Err(OxcDiagnostic::error( - "Number is too large in braced quantifier", + "Invalid regular expression: Number is too large in braced quantifier", ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -1598,7 +1598,7 @@ impl<'a> PatternParser<'a> { if self.reader.eat('}') { if MAX_QUANTIFIER < min { return Err(OxcDiagnostic::error( - "Number is too large in braced quantifier", + "Invalid regular expression: Number is too large in braced quantifier", ) .with_label( self.span_factory.create(span_start, self.reader.offset()), @@ -1608,13 +1608,13 @@ impl<'a> PatternParser<'a> { return Ok(Some(((min, None), is_greedy(&mut self.reader)))); } - if let Some(max) = self.consume_decimal_digits() { + if let Some(max) = self.consume_decimal_digits()? { if self.reader.eat('}') { if max < min { // [SS:EE] QuantifierPrefix :: { DecimalDigits , DecimalDigits } // It is a Syntax Error if the MV of the first DecimalDigits is strictly greater than the MV of the second DecimalDigits. return Err(OxcDiagnostic::error( - "Numbers out of order in braced quantifier", + "Invalid regular expression: Numbers out of order in braced quantifier", ) .with_label( self.span_factory.create(span_start, self.reader.offset()), @@ -1622,7 +1622,7 @@ impl<'a> PatternParser<'a> { } if MAX_QUANTIFIER < min || MAX_QUANTIFIER < max { return Err(OxcDiagnostic::error( - "Number is too large in braced quantifier", + "Invalid regular expression: Number is too large in braced quantifier", ) .with_label( self.span_factory.create(span_start, self.reader.offset()), @@ -1645,20 +1645,20 @@ impl<'a> PatternParser<'a> { // DecimalEscape :: // NonZeroDigit DecimalDigits[~Sep][opt] [lookahead ∉ DecimalDigit] // ``` - fn consume_decimal_escape(&mut self) -> Option { + fn consume_decimal_escape(&mut self) -> Result> { let checkpoint = self.reader.checkpoint(); - if let Some(index) = self.consume_decimal_digits() { + if let Some(index) = self.consume_decimal_digits()? { // \0 is CharacterEscape, not DecimalEscape if index != 0 { #[allow(clippy::cast_possible_truncation)] - return Some(index as u32); + return Ok(Some(index as u32)); } self.reader.rewind(checkpoint); } - None + Ok(None) } // ``` @@ -1668,23 +1668,33 @@ impl<'a> PatternParser<'a> { // [+Sep] DecimalDigits[+Sep] NumericLiteralSeparator DecimalDigit // ``` // ([Sep] is disabled for `QuantifierPrefix` and `DecimalEscape`, skip it) - fn consume_decimal_digits(&mut self) -> Option { + fn consume_decimal_digits(&mut self) -> Result> { + let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); - let mut value = 0; + let mut value: u64 = 0; while let Some(cp) = self.reader.peek().filter(|&cp| unicode::is_decimal_digit(cp)) { // `- '0' as u32`: convert code point to digit #[allow(clippy::cast_lossless)] let d = (cp - '0' as u32) as u64; - value = (10 * value) + d; - self.reader.advance(); + + // To prevent panic on overflow cases like `\999999999999999999999`, `a{999999999999999999999}` + if let Some(v) = value.checked_mul(10).and_then(|v| v.checked_add(d)) { + value = v; + self.reader.advance(); + } else { + return Err(OxcDiagnostic::error( + "Invalid regular expression: Number is too large in decimal digits", + ) + .with_label(self.span_factory.create(span_start, self.reader.offset()))); + } } if self.reader.checkpoint() != checkpoint { - return Some(value); + return Ok(Some(value)); } - None + Ok(None) } // ``` @@ -1737,7 +1747,7 @@ impl<'a> PatternParser<'a> { if unicode_property::is_valid_lone_unicode_property_of_strings(&name_or_value) { if !self.state.unicode_sets_mode { return Err(OxcDiagnostic::error( - "`UnicodeSetsMode` is required for binary property of strings", + "Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings", ) .with_label(self.span_factory.create(span_start, self.reader.offset()))); } @@ -2014,7 +2024,7 @@ impl<'a> PatternParser<'a> { if self.reader.eat('{') { if let Some(hex_digits) = - self.consume_hex_digits().filter(|&cp| unicode::is_valid_unicode(cp)) + self.consume_hex_digits()?.filter(|&cp| unicode::is_valid_unicode(cp)) { if self.reader.eat('}') { return Ok(Some(hex_digits)); @@ -2150,20 +2160,29 @@ impl<'a> PatternParser<'a> { Some(cp) } - fn consume_hex_digits(&mut self) -> Option { + fn consume_hex_digits(&mut self) -> Result> { + let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); - let mut value = 0; + let mut value: u32 = 0; while let Some(hex) = self.reader.peek().and_then(unicode::map_hex_digit) { - value = (16 * value) + hex; - self.reader.advance(); + // To prevent panic on overflow cases like `\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}` + if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) { + value = v; + self.reader.advance(); + } else { + return Err(OxcDiagnostic::error( + "Invalid regular expression: Number is too large in hex digits", + ) + .with_label(self.span_factory.create(span_start, self.reader.offset()))); + } } if self.reader.checkpoint() != checkpoint { - return Some(value); + return Ok(Some(value)); } - None + Ok(None) } fn consume_fixed_hex_digits(&mut self, len: usize) -> Option { diff --git a/tasks/coverage/parser_test262.snap b/tasks/coverage/parser_test262.snap index 144c65e4e059e..7594275c36090 100644 --- a/tasks/coverage/parser_test262.snap +++ b/tasks/coverage/parser_test262.snap @@ -191,14 +191,14 @@ Negative Passed: 4220/4220 (100.00%) · ────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-P.js:20:2] 19 │ 20 │ /\P{Basic_Emoji}/v; · ─────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Basic_Emoji-negative-u.js:20:5] 19 │ 20 │ /\p{Basic_Emoji}/u; @@ -212,14 +212,14 @@ Negative Passed: 4220/4220 (100.00%) · ──────────────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-P.js:20:2] 19 │ 20 │ /\P{Emoji_Keycap_Sequence}/v; · ───────────────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/Emoji_Keycap_Sequence-negative-u.js:20:5] 19 │ 20 │ /\p{Emoji_Keycap_Sequence}/u; @@ -233,14 +233,14 @@ Negative Passed: 4220/4220 (100.00%) · ──────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-P.js:20:2] 19 │ 20 │ /\P{RGI_Emoji}/v; · ───────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji-negative-u.js:20:5] 19 │ 20 │ /\p{RGI_Emoji}/u; @@ -254,14 +254,14 @@ Negative Passed: 4220/4220 (100.00%) · ────────────────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-P.js:20:2] 19 │ 20 │ /\P{RGI_Emoji_Flag_Sequence}/v; · ─────────────────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Flag_Sequence-negative-u.js:20:5] 19 │ 20 │ /\p{RGI_Emoji_Flag_Sequence}/u; @@ -275,14 +275,14 @@ Negative Passed: 4220/4220 (100.00%) · ────────────────────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-P.js:20:2] 19 │ 20 │ /\P{RGI_Emoji_Modifier_Sequence}/v; · ─────────────────────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Modifier_Sequence-negative-u.js:20:5] 19 │ 20 │ /\p{RGI_Emoji_Modifier_Sequence}/u; @@ -296,14 +296,14 @@ Negative Passed: 4220/4220 (100.00%) · ───────────────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-P.js:20:2] 19 │ 20 │ /\P{RGI_Emoji_Tag_Sequence}/v; · ────────────────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_Tag_Sequence-negative-u.js:20:5] 19 │ 20 │ /\p{RGI_Emoji_Tag_Sequence}/u; @@ -317,14 +317,14 @@ Negative Passed: 4220/4220 (100.00%) · ───────────────────────────── ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P.js:20:2] 19 │ 20 │ /\P{RGI_Emoji_ZWJ_Sequence}/v; · ────────────────────────── ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[test262/test/built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u.js:20:5] 19 │ 20 │ /\p{RGI_Emoji_ZWJ_Sequence}/u; diff --git a/tasks/coverage/parser_typescript.snap b/tasks/coverage/parser_typescript.snap index d40e91963050f..ae49d9538dad8 100644 --- a/tasks/coverage/parser_typescript.snap +++ b/tasks/coverage/parser_typescript.snap @@ -10251,7 +10251,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 26 │ /{2,1}??/, ╰──── - × Numbers out of order in braced quantifier + × Invalid regular expression: Numbers out of order in braced quantifier ╭─[typescript/tests/cases/compiler/regularExpressionAnnexB.ts:26:4] 25 │ /{1,2}??/, 26 │ /{2,1}??/, @@ -10363,7 +10363,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 42 │ /{2,1}??/u, ╰──── - × Numbers out of order in braced quantifier + × Invalid regular expression: Numbers out of order in braced quantifier ╭─[typescript/tests/cases/compiler/regularExpressionAnnexB.ts:42:4] 41 │ /{1,2}??/u, 42 │ /{2,1}??/u, @@ -10497,7 +10497,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 16 │ // Quantifiers ╰──── - × Numbers out of order in braced quantifier + × Invalid regular expression: Numbers out of order in braced quantifier ╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:17:31] 16 │ // Quantifiers 17 │ /{}{1,2}_{3}.{4,}?(foo){008}${32,16}\b{064,128}.+&*?\???\n{,256}{\\{,/, @@ -10545,7 +10545,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 26 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/, ╰──── - × `UnicodeSetsMode` is required for binary property of strings + × Invalid regular expression: `UnicodeSetsMode` is required for binary property of strings ╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:27:6] 26 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/, 27 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/u, @@ -10553,7 +10553,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 28 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/v, ╰──── - × Invalid property name(negative + property of strings) + × Invalid regular expression: Invalid property name(negative + property of strings) ╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:28:16] 27 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/u, 28 │ /\p{RGI_Emoji}\P{RGI_Emoji}[^\p{RGI_Emoji}\P{RGI_Emoji}]/v, @@ -10601,7 +10601,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 38 │ /[a--b[--][\d++[]]&&[[&0-9--]&&[\p{L}]--\P{L}-_-]]&&&\q{foo}[0---9][&&q&&&\q{bar}&&]/v, ╰──── - × Invalid character in character class set subtraction + × Invalid regular expression: Invalid character in character class set subtraction ╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:38:8] 37 │ /[a--b[--][\d++[]]&&[[&0-9--]&&[\p{L}]--\P{L}-_-]]&&&\q{foo}[0---9][&&q&&&\q{bar}&&]/u, 38 │ /[a--b[--][\d++[]]&&[[&0-9--]&&[\p{L}]--\P{L}-_-]]&&&\q{foo}[0---9][&&q&&&\q{bar}&&]/v, @@ -10609,7 +10609,7 @@ Expect to Parse: tasks/coverage/typescript/tests/cases/conformance/salsa/typeFro 39 │ /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v, ╰──── - × Invalid character in character class set interseciton + × Invalid regular expression: Invalid character in character class set interseciton ╭─[typescript/tests/cases/compiler/regularExpressionScanning.ts:39:56] 38 │ /[a--b[--][\d++[]]&&[[&0-9--]&&[\p{L}]--\P{L}-_-]]&&&\q{foo}[0---9][&&q&&&\q{bar}&&]/v, 39 │ /[[^\P{Decimal_Number}&&[0-9]]&&\p{L}&&\p{ID_Continue}--\p{ASCII}\p{CWCF}]/v,