diff --git a/crates/oxc_regular_expression/src/body_parser/mod.rs b/crates/oxc_regular_expression/src/body_parser/mod.rs index d3d2ef105b2c4..00cc7f969bcb0 100644 --- a/crates/oxc_regular_expression/src/body_parser/mod.rs +++ b/crates/oxc_regular_expression/src/body_parser/mod.rs @@ -1,4 +1,3 @@ -mod diagnostics; mod parser; mod reader; mod state; diff --git a/crates/oxc_regular_expression/src/body_parser/parser.rs b/crates/oxc_regular_expression/src/body_parser/parser.rs index dec6903fb2352..d23d5bdfbf092 100644 --- a/crates/oxc_regular_expression/src/body_parser/parser.rs +++ b/crates/oxc_regular_expression/src/body_parser/parser.rs @@ -4,7 +4,8 @@ use oxc_span::Atom as SpanAtom; use crate::{ ast, - body_parser::{diagnostics, reader::Reader, state::State, unicode, unicode_property}, + body_parser::{reader::Reader, state::State, unicode, unicode_property}, + diagnostics, options::ParserOptions, span::SpanFactory, surrogate_pair, diff --git a/crates/oxc_regular_expression/src/body_parser/diagnostics.rs b/crates/oxc_regular_expression/src/diagnostics.rs similarity index 83% rename from crates/oxc_regular_expression/src/body_parser/diagnostics.rs rename to crates/oxc_regular_expression/src/diagnostics.rs index 73d0097699353..8eef61fe70b4c 100644 --- a/crates/oxc_regular_expression/src/body_parser/diagnostics.rs +++ b/crates/oxc_regular_expression/src/diagnostics.rs @@ -3,6 +3,43 @@ use oxc_span::Span; const PREFIX: &str = "Invalid regular expression:"; +// For (Literal)Parser --- + +#[cold] +pub fn unexpected_literal_char(span0: Span) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Unexpected literal character")).with_label(span0) +} + +#[cold] +pub fn unterminated_literal(span0: Span, kind: &str) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Unterminated {kind}")).with_label(span0) +} + +#[cold] +pub fn empty_literal(span0: Span) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Empty literal")).with_label(span0) +} + +// For FlagsParser --- + +#[cold] +pub fn duplicated_flag(span0: Span) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Duplicated flag")).with_label(span0) +} + +#[cold] +pub fn unknown_flag(span0: Span) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Unknown flag")).with_label(span0) +} + +#[cold] +pub fn invalid_unicode_flags(span0: Span) -> OxcDiagnostic { + OxcDiagnostic::error(format!("{PREFIX} Invalid flags, `u` and `v` should be used alone")) + .with_label(span0) +} + +// For PatternParser --- + #[cold] pub fn duplicated_capturing_group_names(spans: Vec) -> OxcDiagnostic { OxcDiagnostic::error(format!("{PREFIX} Duplicated capturing group names")).with_labels(spans) diff --git a/crates/oxc_regular_expression/src/flag_parser.rs b/crates/oxc_regular_expression/src/flag_parser.rs index 90f8b0cffdad4..1c4b059ed32fc 100644 --- a/crates/oxc_regular_expression/src/flag_parser.rs +++ b/crates/oxc_regular_expression/src/flag_parser.rs @@ -1,8 +1,8 @@ use oxc_allocator::Allocator; -use oxc_diagnostics::{OxcDiagnostic, Result}; +use oxc_diagnostics::Result; use rustc_hash::FxHashSet; -use crate::{ast, options::ParserOptions, span::SpanFactory}; +use crate::{ast, diagnostics, options::ParserOptions, span::SpanFactory}; pub struct FlagsParser<'a> { source_text: &'a str, @@ -20,8 +20,7 @@ impl<'a> FlagsParser<'a> { } pub fn parse(&mut self) -> Result { - let mut existing_flags = FxHashSet::default(); - + let span = self.span_factory.create(0, self.source_text.len()); let mut global = false; let mut ignore_case = false; let mut multiline = false; @@ -31,9 +30,10 @@ impl<'a> FlagsParser<'a> { let mut has_indices = false; let mut unicode_sets = false; - for c in self.source_text.chars() { + let mut existing_flags = FxHashSet::default(); + for (idx, c) in self.source_text.char_indices() { if !existing_flags.insert(c) { - return Err(OxcDiagnostic::error(format!("Duplicated flag `{c}`"))); + return Err(diagnostics::duplicated_flag(self.span_factory.create(idx, idx))); } match c { @@ -45,17 +45,16 @@ impl<'a> FlagsParser<'a> { 's' => dot_all = true, 'd' => has_indices = true, 'v' => unicode_sets = true, - _ => return Err(OxcDiagnostic::error(format!("Invalid flag `{c}`"))), + _ => return Err(diagnostics::unknown_flag(self.span_factory.create(idx, idx))), } } - // This should be a `SyntaxError` if unicode && unicode_sets { - return Err(OxcDiagnostic::error("Invalid regular expression flags")); + return Err(diagnostics::invalid_unicode_flags(span)); } Ok(ast::Flags { - span: self.span_factory.create(0, self.source_text.len()), + span, global, ignore_case, multiline, diff --git a/crates/oxc_regular_expression/src/lib.rs b/crates/oxc_regular_expression/src/lib.rs index 9177cd4c4a5ed..d1b19e075df96 100644 --- a/crates/oxc_regular_expression/src/lib.rs +++ b/crates/oxc_regular_expression/src/lib.rs @@ -2,6 +2,7 @@ pub mod ast; mod body_parser; +mod diagnostics; mod display; mod flag_parser; mod literal_parser; diff --git a/crates/oxc_regular_expression/src/literal_parser.rs b/crates/oxc_regular_expression/src/literal_parser.rs index 2276a0a9d4593..56a9e0eb1991b 100644 --- a/crates/oxc_regular_expression/src/literal_parser.rs +++ b/crates/oxc_regular_expression/src/literal_parser.rs @@ -1,8 +1,8 @@ use oxc_allocator::Allocator; -use oxc_diagnostics::{OxcDiagnostic, Result}; +use oxc_diagnostics::Result; use crate::{ - ast, body_parser::PatternParser, flag_parser::FlagsParser, options::ParserOptions, + ast, body_parser::PatternParser, diagnostics, flag_parser::FlagsParser, options::ParserOptions, span::SpanFactory, }; @@ -28,7 +28,7 @@ impl<'a> Parser<'a> { // Precheck if the source text is a valid regular expression literal // If valid, parse the pattern and flags with returned span offsets let (body_start_offset, body_end_offset, flag_start_offset) = - parse_reg_exp_literal(self.source_text)?; + parse_reg_exp_literal(self.source_text, &self.span_factory)?; // Parse flags first to know if unicode mode is enabled or not let flags = FlagsParser::new( @@ -67,12 +67,15 @@ impl<'a> Parser<'a> { /// / RegularExpressionBody / RegularExpressionFlags /// ``` /// Returns `(body_start_offset, body_end_offset, flag_start_offset)`. -fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> { +fn parse_reg_exp_literal( + source_text: &str, + span_factory: &SpanFactory, +) -> Result<(usize, usize, usize)> { let mut offset = 0; let mut chars = source_text.chars().peekable(); let Some('/') = chars.next() else { - return Err(OxcDiagnostic::error("Invalid regular expression: Unexpected character")); + return Err(diagnostics::unexpected_literal_char(span_factory.create(offset, offset))); }; offset += 1; // '/' @@ -84,9 +87,10 @@ fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> { match chars.peek() { // Line terminators are not allowed Some('\u{a}' | '\u{d}' | '\u{2028}' | '\u{2029}') | None => { - let kind = - if in_character_class { "character class" } else { "regular expression" }; - return Err(OxcDiagnostic::error(format!("Unterminated {kind}"))); + return Err(diagnostics::unterminated_literal( + span_factory.create(body_start, offset), + if in_character_class { "character class" } else { "regular expression" }, + )); } Some(&ch) => { if in_escape { @@ -112,12 +116,12 @@ fn parse_reg_exp_literal(source_text: &str) -> Result<(usize, usize, usize)> { } let Some('/') = chars.next() else { - return Err(OxcDiagnostic::error("Invalid regular expression: Unexpected character")); + return Err(diagnostics::unexpected_literal_char(span_factory.create(offset, offset))); }; let body_end = offset; if body_end == body_start { - return Err(OxcDiagnostic::error("Invalid regular expression: Empty")); + return Err(diagnostics::empty_literal(span_factory.create(0, body_end + 1))); } Ok((body_start, body_end, body_end + 1)) @@ -141,7 +145,7 @@ mod test { "/👈🏻こっち/u", ] { let (body_start_offset, body_end_offset, flag_start_offset) = - parse_reg_exp_literal(literal_text) + parse_reg_exp_literal(literal_text, &SpanFactory::new(0)) .unwrap_or_else(|_| panic!("{literal_text} should be parsed")); let body_text = &literal_text[body_start_offset..body_end_offset]; @@ -155,7 +159,7 @@ mod test { for literal_text in ["", "foo", ":(", "a\nb", "/", "/x", "/y\nz/", "/1[\n]/", "//", "///", "/*abc/", "/\\/"] { - assert!(parse_reg_exp_literal(literal_text).is_err()); + assert!(parse_reg_exp_literal(literal_text, &SpanFactory::new(0)).is_err()); } } }