diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 0216002e49423..6c6724d8de5a5 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -6,21 +6,21 @@ use oxc_diagnostics::OxcDiagnostic; use oxc_span::{GetSpan, Span}; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig, ParserImpl, diagnostics, error_handler::FatalError, lexer::{Kind, LexerCheckpoint, LexerContext, Token}, }; #[derive(Clone)] -pub struct ParserCheckpoint<'a> { - lexer: LexerCheckpoint<'a>, +pub struct ParserCheckpoint<'a, C: ParserConfig> { + lexer: LexerCheckpoint<'a, C>, cur_token: Token, prev_span_end: u32, errors_pos: usize, fatal_error: Option, } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { #[inline] pub(crate) fn start_span(&self) -> u32 { self.token.start() @@ -300,7 +300,7 @@ impl<'a> ParserImpl<'a> { } } - pub(crate) fn checkpoint(&mut self) -> ParserCheckpoint<'a> { + pub(crate) fn checkpoint(&mut self) -> ParserCheckpoint<'a, C> { ParserCheckpoint { lexer: self.lexer.checkpoint(), cur_token: self.token, @@ -310,7 +310,7 @@ impl<'a> ParserImpl<'a> { } } - pub(crate) fn checkpoint_with_error_recovery(&mut self) -> ParserCheckpoint<'a> { + pub(crate) fn checkpoint_with_error_recovery(&mut self) -> ParserCheckpoint<'a, C> { ParserCheckpoint { lexer: self.lexer.checkpoint_with_error_recovery(), cur_token: self.token, @@ -320,7 +320,7 @@ impl<'a> ParserImpl<'a> { } } - pub(crate) fn rewind(&mut self, checkpoint: ParserCheckpoint<'a>) { + pub(crate) fn rewind(&mut self, checkpoint: ParserCheckpoint<'a, C>) { let ParserCheckpoint { lexer, cur_token, prev_span_end, errors_pos, fatal_error } = checkpoint; @@ -333,7 +333,7 @@ impl<'a> ParserImpl<'a> { pub(crate) fn try_parse( &mut self, - func: impl FnOnce(&mut ParserImpl<'a>) -> T, + func: impl FnOnce(&mut ParserImpl<'a, C>) -> T, ) -> Option { let checkpoint = self.checkpoint_with_error_recovery(); let ctx = self.ctx; @@ -347,7 +347,7 @@ impl<'a> ParserImpl<'a> { } } - pub(crate) fn lookahead(&mut self, predicate: impl Fn(&mut ParserImpl<'a>) -> U) -> U { + pub(crate) fn lookahead(&mut self, predicate: impl Fn(&mut ParserImpl<'a, C>) -> U) -> U { let checkpoint = self.checkpoint(); let answer = predicate(self); self.rewind(checkpoint); diff --git a/crates/oxc_parser/src/error_handler.rs b/crates/oxc_parser/src/error_handler.rs index b520be2266e9d..1188a0f154bf1 100644 --- a/crates/oxc_parser/src/error_handler.rs +++ b/crates/oxc_parser/src/error_handler.rs @@ -4,7 +4,7 @@ use oxc_allocator::Dummy; use oxc_diagnostics::OxcDiagnostic; use oxc_span::Span; -use crate::{ParserImpl, diagnostics, lexer::Kind}; +use crate::{ParserConfig, ParserImpl, diagnostics, lexer::Kind}; /// Fatal parsing error. #[derive(Debug, Clone)] @@ -15,7 +15,7 @@ pub struct FatalError { pub errors_len: usize, } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { #[cold] pub(crate) fn set_unexpected(&mut self) { // The lexer should have reported a more meaningful diagnostic @@ -91,7 +91,7 @@ impl<'a> ParserImpl<'a> { // error, we detect these patterns and provide helpful guidance on how to resolve the conflict. // // Inspired by rust-lang/rust#106242 -impl ParserImpl<'_> { +impl ParserImpl<'_, C> { /// Check if the current position looks like a merge conflict marker. /// /// Detects the following Git conflict markers: diff --git a/crates/oxc_parser/src/js/arrow.rs b/crates/oxc_parser/src/js/arrow.rs index 911b9cfa34d21..ff560f4f176f2 100644 --- a/crates/oxc_parser/src/js/arrow.rs +++ b/crates/oxc_parser/src/js/arrow.rs @@ -4,7 +4,7 @@ use oxc_span::GetSpan; use oxc_syntax::precedence::Precedence; use super::{FunctionKind, Tristate}; -use crate::{ParserImpl, diagnostics, lexer::Kind}; +use crate::{ParserConfig, ParserImpl, diagnostics, lexer::Kind}; struct ArrowFunctionHead<'a> { type_parameters: Option>>, @@ -14,7 +14,7 @@ struct ArrowFunctionHead<'a> { span: u32, } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(super) fn try_parse_parenthesized_arrow_function_expression( &mut self, allow_return_type_in_arrow_function: bool, diff --git a/crates/oxc_parser/src/js/binding.rs b/crates/oxc_parser/src/js/binding.rs index 8b529ad549ecb..4ace785049fd0 100644 --- a/crates/oxc_parser/src/js/binding.rs +++ b/crates/oxc_parser/src/js/binding.rs @@ -1,9 +1,9 @@ use oxc_ast::{NONE, ast::*}; use oxc_span::GetSpan; -use crate::{Context, ParserImpl, diagnostics, lexer::Kind}; +use crate::{Context, ParserConfig, ParserImpl, diagnostics, lexer::Kind}; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { /// `BindingElement` /// `SingleNameBinding` /// `BindingPattern`[?Yield, ?Await] `Initializer`[+In, ?Yield, ?Await]opt diff --git a/crates/oxc_parser/src/js/class.rs b/crates/oxc_parser/src/js/class.rs index 49e0e4b082534..2662572c3433e 100644 --- a/crates/oxc_parser/src/js/class.rs +++ b/crates/oxc_parser/src/js/class.rs @@ -4,7 +4,7 @@ use oxc_ecmascript::PropName; use oxc_span::{GetSpan, Span}; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; @@ -15,7 +15,7 @@ type Extends<'a> = Vec<'a, (Expression<'a>, Option>>, Span)>; /// Section 15.7 Class Definitions -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { // `start_span` points at the start of all decoractors and `class` keyword. pub(crate) fn parse_class_statement( &mut self, diff --git a/crates/oxc_parser/src/js/declaration.rs b/crates/oxc_parser/src/js/declaration.rs index e4bdfc5468a8b..bf77e846bf347 100644 --- a/crates/oxc_parser/src/js/declaration.rs +++ b/crates/oxc_parser/src/js/declaration.rs @@ -3,9 +3,9 @@ use oxc_ast::{NONE, ast::*}; use oxc_span::GetSpan; use super::VariableDeclarationParent; -use crate::{ParserImpl, StatementContext, diagnostics, lexer::Kind}; +use crate::{ParserConfig, ParserImpl, StatementContext, diagnostics, lexer::Kind}; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn parse_let(&mut self, stmt_ctx: StatementContext) -> Statement<'a> { let span = self.start_span(); diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index 750890e49bb2f..a959e22095b0e 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -17,12 +17,12 @@ use super::{ }, }; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig, ParserImpl, diagnostics, lexer::{Kind, parse_big_int, parse_float, parse_int}, modifiers::Modifiers, }; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn parse_paren_expression(&mut self) -> Expression<'a> { let opening_span = self.cur_token().span(); self.expect(Kind::LParen); diff --git a/crates/oxc_parser/src/js/function.rs b/crates/oxc_parser/src/js/function.rs index 7d6d8719eeb72..45f066ce2eb85 100644 --- a/crates/oxc_parser/src/js/function.rs +++ b/crates/oxc_parser/src/js/function.rs @@ -4,7 +4,7 @@ use oxc_span::Span; use super::FunctionKind; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; @@ -19,7 +19,7 @@ impl FunctionKind { } } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn at_function_with_async(&mut self) -> bool { self.at(Kind::Function) || self.at(Kind::Async) && { diff --git a/crates/oxc_parser/src/js/grammar.rs b/crates/oxc_parser/src/js/grammar.rs index 0692416a1e366..ef37d6431838a 100644 --- a/crates/oxc_parser/src/js/grammar.rs +++ b/crates/oxc_parser/src/js/grammar.rs @@ -3,14 +3,14 @@ use oxc_ast::ast::*; use oxc_span::GetSpan; -use crate::{ParserImpl, diagnostics}; +use crate::{ParserConfig, ParserImpl, diagnostics}; -pub trait CoverGrammar<'a, T>: Sized { - fn cover(value: T, p: &mut ParserImpl<'a>) -> Self; +pub trait CoverGrammar<'a, T, C: ParserConfig>: Sized { + fn cover(value: T, p: &mut ParserImpl<'a, C>) -> Self; } -impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTarget<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, Expression<'a>, C> for AssignmentTarget<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::ArrayExpression(array_expr) => { let pat = ArrayAssignmentTarget::cover(array_expr.unbox(), p); @@ -25,8 +25,8 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, Expression<'a>> for SimpleAssignmentTarget<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, Expression<'a>, C> for SimpleAssignmentTarget<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::Identifier(ident) => { SimpleAssignmentTarget::AssignmentTargetIdentifier(ident) @@ -90,8 +90,8 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for SimpleAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, ArrayExpression<'a>> for ArrayAssignmentTarget<'a> { - fn cover(expr: ArrayExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, ArrayExpression<'a>, C> for ArrayAssignmentTarget<'a> { + fn cover(expr: ArrayExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { let mut elements = p.ast.vec(); let mut rest = None; @@ -136,8 +136,8 @@ impl<'a> CoverGrammar<'a, ArrayExpression<'a>> for ArrayAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTargetMaybeDefault<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, Expression<'a>, C> for AssignmentTargetMaybeDefault<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::AssignmentExpression(assignment_expr) => { if assignment_expr.operator != AssignmentOperator::Assign { @@ -156,14 +156,16 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTargetMaybeDefault<'a> { } } -impl<'a> CoverGrammar<'a, AssignmentExpression<'a>> for AssignmentTargetWithDefault<'a> { - fn cover(expr: AssignmentExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, AssignmentExpression<'a>, C> + for AssignmentTargetWithDefault<'a> +{ + fn cover(expr: AssignmentExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { p.ast.assignment_target_with_default(expr.span, expr.left, expr.right) } } -impl<'a> CoverGrammar<'a, ObjectExpression<'a>> for ObjectAssignmentTarget<'a> { - fn cover(expr: ObjectExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, ObjectExpression<'a>, C> for ObjectAssignmentTarget<'a> { + fn cover(expr: ObjectExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { let mut properties = p.ast.vec(); let mut rest = None; @@ -203,8 +205,8 @@ impl<'a> CoverGrammar<'a, ObjectExpression<'a>> for ObjectAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, ObjectProperty<'a>> for AssignmentTargetProperty<'a> { - fn cover(property: ObjectProperty<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: ParserConfig> CoverGrammar<'a, ObjectProperty<'a>, C> for AssignmentTargetProperty<'a> { + fn cover(property: ObjectProperty<'a>, p: &mut ParserImpl<'a, C>) -> Self { if property.shorthand { let binding = match property.key { PropertyKey::StaticIdentifier(ident) => { diff --git a/crates/oxc_parser/src/js/module.rs b/crates/oxc_parser/src/js/module.rs index 2b1a5360b2794..23fb4595de244 100644 --- a/crates/oxc_parser/src/js/module.rs +++ b/crates/oxc_parser/src/js/module.rs @@ -5,7 +5,7 @@ use rustc_hash::FxHashMap; use super::FunctionKind; use crate::{ - ParserImpl, StatementContext, diagnostics, + ParserConfig, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{Modifier, ModifierFlags, ModifierKind, Modifiers}, }; @@ -26,7 +26,7 @@ enum ImportOrExportSpecifier<'a> { Export(ExportSpecifier<'a>), } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { /// [Import Call](https://tc39.es/ecma262/#sec-import-calls) /// `ImportCall` : import ( `AssignmentExpression` ) pub(crate) fn parse_import_expression( diff --git a/crates/oxc_parser/src/js/object.rs b/crates/oxc_parser/src/js/object.rs index 983f39ef3dc61..86f87f2fb8e97 100644 --- a/crates/oxc_parser/src/js/object.rs +++ b/crates/oxc_parser/src/js/object.rs @@ -3,14 +3,14 @@ use oxc_ast::ast::*; use oxc_syntax::operator::AssignmentOperator; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig, ParserImpl, diagnostics, lexer::Kind, modifiers::{ModifierFlags, Modifiers}, }; use super::FunctionKind; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { /// [Object Expression](https://tc39.es/ecma262/#sec-object-initializer) /// `ObjectLiteral`[Yield, Await] : /// { } diff --git a/crates/oxc_parser/src/js/statement.rs b/crates/oxc_parser/src/js/statement.rs index a87b0d4482ca3..dc27fe11d2b5c 100644 --- a/crates/oxc_parser/src/js/statement.rs +++ b/crates/oxc_parser/src/js/statement.rs @@ -4,12 +4,12 @@ use oxc_span::{Atom, GetSpan, Span}; use super::{VariableDeclarationParent, grammar::CoverGrammar}; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{Modifier, ModifierFlags, ModifierKind, Modifiers}, }; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { // Section 12 // The InputElementHashbangOrRegExp goal is used at the start of a Script // or Module. diff --git a/crates/oxc_parser/src/jsx/mod.rs b/crates/oxc_parser/src/jsx/mod.rs index ec22202b05c5e..ac64c7c71a519 100644 --- a/crates/oxc_parser/src/jsx/mod.rs +++ b/crates/oxc_parser/src/jsx/mod.rs @@ -4,9 +4,9 @@ use oxc_allocator::{Box, Dummy, Vec}; use oxc_ast::ast::*; use oxc_span::{Atom, GetSpan, Span}; -use crate::{ParserImpl, diagnostics, lexer::Kind}; +use crate::{ParserConfig, ParserImpl, diagnostics, lexer::Kind}; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn parse_jsx_expression(&mut self) -> Expression<'a> { let span = self.start_span(); self.bump_any(); // bump `<` diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs index fa88842794504..f0da481ade7d5 100644 --- a/crates/oxc_parser/src/lexer/byte_handlers.rs +++ b/crates/oxc_parser/src/lexer/byte_handlers.rs @@ -1,10 +1,13 @@ use oxc_data_structures::assert_unchecked; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{Kind, Lexer}; -impl Lexer<'_> { +impl Lexer<'_, C> { + /// Byte handlers for this `ParserConfig`. + const BYTE_HANDLERS: [ByteHandler; 256] = byte_handlers(); + /// Handle next byte of source. /// /// # SAFETY @@ -17,34 +20,37 @@ impl Lexer<'_> { #[inline(always)] pub(super) unsafe fn handle_byte(&mut self, byte: u8) -> Kind { // SAFETY: Caller guarantees to uphold safety invariants - unsafe { BYTE_HANDLERS[byte as usize](self) } + unsafe { Self::BYTE_HANDLERS[byte as usize](self) } } } -type ByteHandler = unsafe fn(&mut Lexer<'_>) -> Kind; +#[expect(type_alias_bounds)] +type ByteHandler = unsafe fn(&mut Lexer<'_, Config>) -> Kind; /// Lookup table mapping any incoming byte to a handler function defined below. /// #[rustfmt::skip] -static BYTE_HANDLERS: [ByteHandler; 256] = [ -// 0 1 2 3 4 5 6 7 8 9 A B C D E F // - ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, ISP, ISP, LIN, ERR, ERR, // 0 - ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1 - SPS, EXL, QOD, HAS, IDT, PRC, AMP, QOS, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2 - ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3 - AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4 - IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5 - TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6 - L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 8 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 9 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // A - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // B - UER, UER, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C - UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D - UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E - UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F -]; +const fn byte_handlers() -> [ByteHandler; 256] { + [ + // 0 1 2 3 4 5 6 7 8 9 A B C D E F // + ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, ISP, ISP, LIN, ERR, ERR, // 0 + ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1 + SPS, EXL, QOD, HAS, IDT, PRC, AMP, QOS, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2 + ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3 + AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4 + IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5 + TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6 + L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 8 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 9 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // A + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // B + UER, UER, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E + UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F + ] +} /// Macro for defining byte handler for an ASCII character. /// @@ -73,7 +79,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ /// /// ``` /// #[expect(non_snake_case)] -/// fn SPS(lexer: &mut Lexer) { +/// fn SPS(lexer: &mut Lexer<'_, C>) -> Kind { /// // SAFETY: This macro is only used for ASCII characters /// unsafe { /// assert_unchecked!(!lexer.source.is_eof()); @@ -88,7 +94,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ macro_rules! ascii_byte_handler { ($id:ident($lex:ident) $body:expr) => { #[expect(non_snake_case)] - fn $id($lex: &mut Lexer) -> Kind { + fn $id($lex: &mut Lexer<'_, C>) -> Kind { // SAFETY: This macro is only used for ASCII characters unsafe { assert_unchecked!(!$lex.source.is_eof()); @@ -136,7 +142,7 @@ macro_rules! ascii_byte_handler { macro_rules! ascii_identifier_handler { ($id:ident($str:ident) $body:expr) => { #[expect(non_snake_case)] - fn $id(lexer: &mut Lexer) -> Kind { + fn $id(lexer: &mut Lexer<'_, C>) -> Kind { // SAFETY: This macro is only used for ASCII characters let $str = unsafe { lexer.identifier_name_handler() }; $body @@ -653,7 +659,7 @@ ascii_identifier_handler!(L_Y(id_without_first_char) match id_without_first_char // // Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars. #[expect(non_snake_case)] -fn UNI(lexer: &mut Lexer) -> Kind { +fn UNI(lexer: &mut Lexer<'_, C>) -> Kind { lexer.unicode_char_handler() } @@ -665,6 +671,6 @@ fn UNI(lexer: &mut Lexer) -> Kind { // // Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes. #[expect(non_snake_case)] -fn UER(_lexer: &mut Lexer) -> Kind { +fn UER(_lexer: &mut Lexer<'_, C>) -> Kind { unreachable!(); } diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index aad0e5a1d503e..78865f2198e4a 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -3,7 +3,7 @@ use memchr::memmem::Finder; use oxc_ast::CommentKind; use oxc_syntax::line_terminator::is_line_terminator; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{ Kind, Lexer, cold_branch, @@ -22,7 +22,7 @@ static LINE_BREAK_TABLE: SafeByteMatchTable = static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable = safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST)); -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { /// Section 12.4 Single Line Comment pub(super) fn skip_single_line_comment(&mut self) -> Kind { byte_search! { diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index c08957cddede4..d0380f89528fb 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -6,7 +6,7 @@ use oxc_syntax::identifier::{ is_identifier_part, is_identifier_part_unicode, is_identifier_start_unicode, }; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{ Kind, Lexer, SourcePosition, cold_branch, @@ -26,7 +26,7 @@ fn is_identifier_start_ascii_byte(byte: u8) -> bool { ASCII_ID_START_TABLE.matches(byte) } -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { /// Handle identifier with ASCII start character. /// Returns text of the identifier, minus its first char. /// diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs index 436017587bb32..8cd3730feafbe 100644 --- a/crates/oxc_parser/src/lexer/jsx.rs +++ b/crates/oxc_parser/src/lexer/jsx.rs @@ -3,7 +3,7 @@ use memchr::memchr; use oxc_span::Span; use oxc_syntax::identifier::is_identifier_part; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{ Kind, Lexer, Token, cold_branch, @@ -26,7 +26,7 @@ static JSX_CHILD_END_TABLE: SafeByteMatchTable = /// `JSXStringCharacter` but not ' /// `JSXStringCharacter` :: /// `SourceCharacter` but not one of `HTMLCharacterReference` -impl Lexer<'_> { +impl Lexer<'_, C> { /// Read JSX string literal. /// # SAFETY /// * `delimiter` must be an ASCII character. diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index ce91d1b015dbb..ef191e0c9b60c 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -6,13 +6,14 @@ //! * [v8](https://v8.dev/blog/scanner) use rustc_hash::FxHashMap; +use std::{fmt::Debug, marker::PhantomData}; use oxc_allocator::Allocator; use oxc_ast::ast::RegExpFlags; use oxc_diagnostics::OxcDiagnostic; use oxc_span::{SourceType, Span}; -use crate::{UniquePromise, diagnostics}; +use crate::{ParserConfig, StandardParserConfig, UniquePromise, diagnostics}; mod byte_handlers; mod comment; @@ -41,10 +42,11 @@ use source::{Source, SourcePosition}; use trivia_builder::TriviaBuilder; #[derive(Debug, Clone)] -pub struct LexerCheckpoint<'a> { +pub struct LexerCheckpoint<'a, C: ParserConfig = StandardParserConfig> { source_position: SourcePosition<'a>, token: Token, errors_snapshot: ErrorSnapshot, + marker: PhantomData, } #[derive(Debug, Clone)] @@ -61,7 +63,7 @@ pub enum LexerContext { JsxAttributeValue, } -pub struct Lexer<'a> { +pub struct Lexer<'a, C: ParserConfig = StandardParserConfig> { allocator: &'a Allocator, // Wrapper around source text. Must not be changed after initialization. @@ -86,9 +88,11 @@ pub struct Lexer<'a> { /// `memchr` Finder for end of multi-line comments. Created lazily when first used. multi_line_comment_end_finder: Option>, + + marker: PhantomData, } -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { /// Create new `Lexer`. /// /// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist @@ -114,6 +118,7 @@ impl<'a> Lexer<'a> { escaped_strings: FxHashMap::default(), escaped_templates: FxHashMap::default(), multi_line_comment_end_finder: None, + marker: PhantomData, } } @@ -143,7 +148,7 @@ impl<'a> Lexer<'a> { /// Creates a checkpoint storing the current lexer state. /// Use `rewind` to restore the lexer to the state stored in the checkpoint. - pub fn checkpoint(&self) -> LexerCheckpoint<'a> { + pub fn checkpoint(&self) -> LexerCheckpoint<'a, C> { let errors_snapshot = if self.errors.is_empty() { ErrorSnapshot::Empty } else { @@ -153,12 +158,13 @@ impl<'a> Lexer<'a> { source_position: self.source.position(), token: self.token, errors_snapshot, + marker: PhantomData, } } /// Create a checkpoint that can handle error popping. /// This is more expensive as it clones the errors vector. - pub(crate) fn checkpoint_with_error_recovery(&self) -> LexerCheckpoint<'a> { + pub(crate) fn checkpoint_with_error_recovery(&self) -> LexerCheckpoint<'a, C> { let errors_snapshot = if self.errors.is_empty() { ErrorSnapshot::Empty } else { @@ -168,11 +174,12 @@ impl<'a> Lexer<'a> { source_position: self.source.position(), token: self.token, errors_snapshot, + marker: PhantomData, } } /// Rewinds the lexer to the same state as when the passed in `checkpoint` was created. - pub fn rewind(&mut self, checkpoint: LexerCheckpoint<'a>) { + pub fn rewind(&mut self, checkpoint: LexerCheckpoint<'a, C>) { match checkpoint.errors_snapshot { ErrorSnapshot::Empty => self.errors.clear(), ErrorSnapshot::Count(len) => self.errors.truncate(len), diff --git a/crates/oxc_parser/src/lexer/numeric.rs b/crates/oxc_parser/src/lexer/numeric.rs index 1c2bedd66cb31..da8a1ac3db7f5 100644 --- a/crates/oxc_parser/src/lexer/numeric.rs +++ b/crates/oxc_parser/src/lexer/numeric.rs @@ -1,10 +1,10 @@ use oxc_syntax::identifier::{is_identifier_part_ascii, is_identifier_start}; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{Kind, Lexer, Span}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// 12.9.3 Numeric Literals with `0` prefix pub(super) fn read_zero(&mut self) -> Kind { match self.peek_byte() { diff --git a/crates/oxc_parser/src/lexer/punctuation.rs b/crates/oxc_parser/src/lexer/punctuation.rs index 9f8fca7c13d79..62dd5f16b8f2d 100644 --- a/crates/oxc_parser/src/lexer/punctuation.rs +++ b/crates/oxc_parser/src/lexer/punctuation.rs @@ -1,6 +1,8 @@ +use crate::ParserConfig; + use super::{Kind, Lexer, Token}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Section 12.8 Punctuators pub(super) fn read_dot(&mut self) -> Kind { if self.peek_2_bytes() == Some([b'.', b'.']) { diff --git a/crates/oxc_parser/src/lexer/regex.rs b/crates/oxc_parser/src/lexer/regex.rs index baacfa6d24d75..00e5802ec5c40 100644 --- a/crates/oxc_parser/src/lexer/regex.rs +++ b/crates/oxc_parser/src/lexer/regex.rs @@ -2,9 +2,9 @@ use oxc_syntax::line_terminator::is_line_terminator; use crate::diagnostics; -use super::{Kind, Lexer, RegExpFlags, Token}; +use super::{Kind, Lexer, ParserConfig, RegExpFlags, Token}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Re-tokenize the current `/` or `/=` and return `RegExp` /// See Section 12: /// The `InputElementRegExp` goal symbol is used in all syntactic grammar contexts diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index 74a8b7aed16bb..8f63299495b32 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -2,7 +2,7 @@ use std::cmp::max; use oxc_allocator::StringBuilder; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{ Kind, Lexer, LexerContext, Span, Token, cold_branch, @@ -202,7 +202,7 @@ macro_rules! handle_string_literal_escape { } /// 12.9.4 String Literals -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { /// Read string literal delimited with `"`. /// # SAFETY /// Next character must be `"`. diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index 41d93e8d300d8..edc829b479288 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -2,7 +2,7 @@ use std::{cmp::max, str}; use oxc_allocator::StringBuilder; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use super::{ Kind, Lexer, SourcePosition, Token, cold_branch, @@ -33,7 +33,7 @@ static TEMPLATE_LITERAL_ESCAPED_MATCH_TABLE: SafeByteMatchTable = safe_byte_matc ); /// 12.8.6 Template Literal Lexical Components -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { /// Read template literal component. /// /// This function handles the common case where template contains no escapes or `\r` characters @@ -409,6 +409,8 @@ mod test { use oxc_allocator::Allocator; use oxc_span::SourceType; + use crate::StandardParserConfig; + use super::super::{Kind, Lexer, UniquePromise}; #[test] @@ -442,7 +444,12 @@ mod test { fn run_test(source_text: String, expected_escaped: String, is_only_part: bool) { let allocator = Allocator::default(); let unique = UniquePromise::new_for_tests_and_benchmarks(); - let mut lexer = Lexer::new(&allocator, &source_text, SourceType::default(), unique); + let mut lexer = Lexer::::new( + &allocator, + &source_text, + SourceType::default(), + unique, + ); let token = lexer.next_token(); assert_eq!( token.kind(), diff --git a/crates/oxc_parser/src/lexer/typescript.rs b/crates/oxc_parser/src/lexer/typescript.rs index f13fd805e2072..d98188e41c343 100644 --- a/crates/oxc_parser/src/lexer/typescript.rs +++ b/crates/oxc_parser/src/lexer/typescript.rs @@ -1,6 +1,8 @@ +use crate::ParserConfig; + use super::{Kind, Lexer, Token}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Re-tokenize '<<' or '<=' or '<<=' to '<' pub(crate) fn re_lex_as_typescript_l_angle(&mut self, offset: u32) -> Token { self.token.set_start(self.offset() - offset); diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index 6e2dbf7e4fd82..9b29dd2ac56f7 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, fmt::Write}; use cow_utils::CowUtils; -use crate::diagnostics; +use crate::{ParserConfig, diagnostics}; use oxc_allocator::StringBuilder; use oxc_syntax::{ identifier::{ @@ -29,7 +29,7 @@ enum UnicodeEscape { LoneSurrogate(u32), } -impl<'a> Lexer<'a> { +impl<'a, C: ParserConfig> Lexer<'a, C> { pub(super) fn unicode_char_handler(&mut self) -> Kind { let c = self.peek_char().unwrap(); match c { diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs index a770362a37b18..ce2be0d13c30a 100644 --- a/crates/oxc_parser/src/lexer/whitespace.rs +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -1,3 +1,5 @@ +use crate::ParserConfig; + use super::{ Kind, Lexer, search::{SafeByteMatchTable, byte_search, safe_byte_match_table}, @@ -6,7 +8,7 @@ use super::{ static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = safe_byte_match_table!(|b| !matches!(b, b' ' | b'\t' | b'\r' | b'\n')); -impl Lexer<'_> { +impl Lexer<'_, C> { pub(super) fn line_break_handler(&mut self) -> Kind { self.token.set_is_on_new_line(true); self.trivia_builder.handle_newline(); diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index e10e84355b328..c2fcdc27da21c 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -94,6 +94,7 @@ use oxc_ast::{ use oxc_diagnostics::OxcDiagnostic; use oxc_span::{ModuleKind, SourceType, Span}; use oxc_syntax::module_record::ModuleRecord; +use std::marker::PhantomData; use crate::{ context::{Context, StatementContext}, @@ -182,6 +183,16 @@ pub struct ParserReturn<'a> { pub is_flow_language: bool, } +/// Compile-time configuration for the parser and lexer. +/// +/// Will be used to configure token collection and UTF-8 to UTF-16 translation only for oxlint. +pub trait ParserConfig {} + +/// Parser configuration intended for general use. +pub struct StandardParserConfig; + +impl ParserConfig for StandardParserConfig {} + /// Parse options /// /// You may provide options to the [`Parser`] using [`Parser::with_options`]. @@ -238,23 +249,47 @@ impl Default for ParseOptions { /// Recursive Descent Parser for ECMAScript and TypeScript /// /// See [`Parser::parse`] for entry function. -pub struct Parser<'a> { +pub struct Parser<'a, C: ParserConfig = StandardParserConfig> { allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, options: ParseOptions, + marker: PhantomData, } -impl<'a> Parser<'a> { - /// Create a new [`Parser`] +impl<'a> Parser<'a, StandardParserConfig> { + /// Create a new [`Parser`] using the standard configuration. /// /// # Parameters /// - `allocator`: [Memory arena](oxc_allocator::Allocator) for allocating AST nodes /// - `source_text`: Source code to parse /// - `source_type`: Source type (e.g. JavaScript, TypeScript, JSX, ESM Module, Script) + #[inline] pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self { + Self { + allocator, + source_text, + source_type, + options: ParseOptions::default(), + marker: PhantomData, + } + } +} + +impl<'a, C: ParserConfig> Parser<'a, C> { + /// Create a new [`Parser`] + /// + /// # Parameters + /// - `allocator`: [Memory arena](oxc_allocator::Allocator) for allocating AST nodes + /// - `source_text`: Source code to parse + /// - `source_type`: Source type (e.g. JavaScript, TypeScript, JSX, ESM Module, Script) + pub fn new_with_config( + allocator: &'a Allocator, + source_text: &'a str, + source_type: SourceType, + ) -> Self { let options = ParseOptions::default(); - Self { allocator, source_text, source_type, options } + Self { allocator, source_text, source_type, options, marker: PhantomData } } /// Set parse options @@ -299,7 +334,7 @@ mod parser_parse { } } - impl<'a> Parser<'a> { + impl<'a, C: ParserConfig> Parser<'a, C> { /// Main entry point /// /// Returns an empty `Program` on unrecoverable error, @@ -354,10 +389,11 @@ use parser_parse::UniquePromise; /// Implementation of parser. /// `Parser` is just a public wrapper, the guts of the implementation is in this type. -struct ParserImpl<'a> { +struct ParserImpl<'a, C: ParserConfig = StandardParserConfig> { options: ParseOptions, - pub(crate) lexer: Lexer<'a>, + // TODO: investigate whether this needs to be `pub(crate)` + pub(crate) lexer: Lexer<'a, C>, /// SourceType: JavaScript or TypeScript, Script or Module, jsx support? source_type: SourceType, @@ -393,7 +429,7 @@ struct ParserImpl<'a> { is_ts: bool, } -impl<'a> ParserImpl<'a> { +impl<'a> ParserImpl<'a, StandardParserConfig> { /// Create a new `ParserImpl`. /// /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist @@ -405,6 +441,23 @@ impl<'a> ParserImpl<'a> { source_type: SourceType, options: ParseOptions, unique: UniquePromise, + ) -> Self { + Self::new_with_config(allocator, source_text, source_type, options, unique) + } +} + +impl<'a, C: ParserConfig> ParserImpl<'a, C> { + /// Create a new `ParserImpl`. + /// + /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist + /// on a single thread at one time. + #[inline] + pub fn new_with_config( + allocator: &'a Allocator, + source_text: &'a str, + source_type: SourceType, + options: ParseOptions, + unique: UniquePromise, ) -> Self { Self { options, diff --git a/crates/oxc_parser/src/modifiers.rs b/crates/oxc_parser/src/modifiers.rs index e9f98912eb929..ed2d48a476c29 100644 --- a/crates/oxc_parser/src/modifiers.rs +++ b/crates/oxc_parser/src/modifiers.rs @@ -8,7 +8,7 @@ use oxc_diagnostics::OxcDiagnostic; use oxc_span::Span; use crate::{ - ParserImpl, diagnostics, + ParserConfig, ParserImpl, diagnostics, lexer::{Kind, Token}, }; @@ -317,7 +317,7 @@ impl std::fmt::Display for ModifierKind { } } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn eat_modifiers_before_declaration(&mut self) -> Modifiers<'a> { if !self.at_modifier() { return Modifiers::empty(); @@ -522,8 +522,8 @@ impl<'a> ParserImpl<'a> { // Also `#[inline(never)]` to help `verify_modifiers` to get inlined. #[cold] #[inline(never)] - fn report<'a, F>( - parser: &mut ParserImpl<'a>, + fn report<'a, C: ParserConfig, F>( + parser: &mut ParserImpl<'a, C>, modifiers: &Modifiers<'a>, allowed: ModifierFlags, strict: bool, diff --git a/crates/oxc_parser/src/ts/statement.rs b/crates/oxc_parser/src/ts/statement.rs index b26a0d757aa10..edf30b00d9010 100644 --- a/crates/oxc_parser/src/ts/statement.rs +++ b/crates/oxc_parser/src/ts/statement.rs @@ -3,7 +3,7 @@ use oxc_ast::ast::*; use oxc_span::GetSpan; use crate::{ - ParserImpl, diagnostics, + ParserConfig, ParserImpl, diagnostics, js::{FunctionKind, VariableDeclarationParent}, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, @@ -15,7 +15,7 @@ pub(super) enum CallOrConstructorSignature { Constructor, } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { /* ------------------- Enum ------------------ */ /// `https://www.typescriptlang.org/docs/handbook/enums.html` pub(crate) fn parse_ts_enum_declaration( diff --git a/crates/oxc_parser/src/ts/types.rs b/crates/oxc_parser/src/ts/types.rs index 73016609372db..921169901af80 100644 --- a/crates/oxc_parser/src/ts/types.rs +++ b/crates/oxc_parser/src/ts/types.rs @@ -4,14 +4,14 @@ use oxc_span::GetSpan; use oxc_syntax::operator::UnaryOperator; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig, ParserImpl, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; use super::{super::js::FunctionKind, statement::CallOrConstructorSignature}; -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { pub(crate) fn parse_ts_type(&mut self) -> TSType<'a> { if self.is_start_of_function_type_or_constructor_type() { return self.parse_function_or_constructor_type();