diff --git a/crates/oxc_regular_expression/src/body_parser/mod.rs b/crates/oxc_regular_expression/src/body_parser/mod.rs index 602eb95d206bd..9c29c6e0781ad 100644 --- a/crates/oxc_regular_expression/src/body_parser/mod.rs +++ b/crates/oxc_regular_expression/src/body_parser/mod.rs @@ -141,6 +141,9 @@ mod test { ("a{,", ParserOptions::default().with_unicode_mode()), ("x{9007199254740992}", ParserOptions::default()), ("x{9007199254740991,9007199254740992}", ParserOptions::default()), + ("x{99999999999999999999999999999999999999999999999999}", ParserOptions::default()), + (r"\99999999999999999999999999999999999999999999999999", ParserOptions::default()), + (r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", ParserOptions::default().with_unicode_mode()), ("(?=a", ParserOptions::default()), ("(? PatternParser<'a> { let checkpoint = self.reader.checkpoint(); // DecimalEscape: \1 means indexed reference - if let Some(index) = self.consume_decimal_escape() { + if let Some(index) = self.consume_decimal_escape()? { if self.state.unicode_mode { // [SS:EE] AtomEscape :: DecimalEscape // It is a Syntax Error if the CapturingGroupNumber of DecimalEscape is strictly greater than CountLeftCapturingParensWithin(the Pattern containing AtomEscape). @@ -1582,7 +1582,7 @@ impl<'a> PatternParser<'a> { let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); if self.reader.eat('{') { - if let Some(min) = self.consume_decimal_digits() { + if let Some(min) = self.consume_decimal_digits()? { if self.reader.eat('}') { if MAX_QUANTIFIER < min { return Err(OxcDiagnostic::error( @@ -1608,7 +1608,7 @@ impl<'a> PatternParser<'a> { return Ok(Some(((min, None), is_greedy(&mut self.reader)))); } - if let Some(max) = self.consume_decimal_digits() { + if let Some(max) = self.consume_decimal_digits()? { if self.reader.eat('}') { if max < min { // [SS:EE] QuantifierPrefix :: { DecimalDigits , DecimalDigits } @@ -1645,20 +1645,20 @@ impl<'a> PatternParser<'a> { // DecimalEscape :: // NonZeroDigit DecimalDigits[~Sep][opt] [lookahead ∉ DecimalDigit] // ``` - fn consume_decimal_escape(&mut self) -> Option { + fn consume_decimal_escape(&mut self) -> Result> { let checkpoint = self.reader.checkpoint(); - if let Some(index) = self.consume_decimal_digits() { + if let Some(index) = self.consume_decimal_digits()? { // \0 is CharacterEscape, not DecimalEscape if index != 0 { #[allow(clippy::cast_possible_truncation)] - return Some(index as u32); + return Ok(Some(index as u32)); } self.reader.rewind(checkpoint); } - None + Ok(None) } // ``` @@ -1668,23 +1668,33 @@ impl<'a> PatternParser<'a> { // [+Sep] DecimalDigits[+Sep] NumericLiteralSeparator DecimalDigit // ``` // ([Sep] is disabled for `QuantifierPrefix` and `DecimalEscape`, skip it) - fn consume_decimal_digits(&mut self) -> Option { + fn consume_decimal_digits(&mut self) -> Result> { + let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); - let mut value = 0; + let mut value: u64 = 0; while let Some(cp) = self.reader.peek().filter(|&cp| unicode::is_decimal_digit(cp)) { // `- '0' as u32`: convert code point to digit #[allow(clippy::cast_lossless)] let d = (cp - '0' as u32) as u64; - value = (10 * value) + d; - self.reader.advance(); + + // To prevent panic on overflow cases like `\999999999999999999999`, `a{999999999999999999999}` + if let Some(v) = value.checked_mul(10).and_then(|v| v.checked_add(d)) { + value = v; + self.reader.advance(); + } else { + return Err(OxcDiagnostic::error( + "Invalid regular expression: Number is too large in decimal digits", + ) + .with_label(self.span_factory.create(span_start, self.reader.offset()))); + } } if self.reader.checkpoint() != checkpoint { - return Some(value); + return Ok(Some(value)); } - None + Ok(None) } // ``` @@ -2014,7 +2024,7 @@ impl<'a> PatternParser<'a> { if self.reader.eat('{') { if let Some(hex_digits) = - self.consume_hex_digits().filter(|&cp| unicode::is_valid_unicode(cp)) + self.consume_hex_digits()?.filter(|&cp| unicode::is_valid_unicode(cp)) { if self.reader.eat('}') { return Ok(Some(hex_digits)); @@ -2150,20 +2160,29 @@ impl<'a> PatternParser<'a> { Some(cp) } - fn consume_hex_digits(&mut self) -> Option { + fn consume_hex_digits(&mut self) -> Result> { + let span_start = self.reader.offset(); let checkpoint = self.reader.checkpoint(); - let mut value = 0; + let mut value: u32 = 0; while let Some(hex) = self.reader.peek().and_then(unicode::map_hex_digit) { - value = (16 * value) + hex; - self.reader.advance(); + // To prevent panic on overflow cases like `\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}` + if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) { + value = v; + self.reader.advance(); + } else { + return Err(OxcDiagnostic::error( + "Invalid regular expression: Number is too large in hex digits", + ) + .with_label(self.span_factory.create(span_start, self.reader.offset()))); + } } if self.reader.checkpoint() != checkpoint { - return Some(value); + return Ok(Some(value)); } - None + Ok(None) } fn consume_fixed_hex_digits(&mut self, len: usize) -> Option {