diff --git a/crates/oxc_formatter/src/service/mod.rs b/crates/oxc_formatter/src/service/mod.rs index 010ca3527a65e..2ade92da4fccd 100644 --- a/crates/oxc_formatter/src/service/mod.rs +++ b/crates/oxc_formatter/src/service/mod.rs @@ -11,7 +11,6 @@ pub fn get_parse_options() -> ParseOptions { allow_v8_intrinsics: true, // `oxc_formatter` expects this to be `false`, otherwise panics preserve_parens: false, - collect_tokens: false, } } diff --git a/crates/oxc_parser/src/config.rs b/crates/oxc_parser/src/config.rs new file mode 100644 index 0000000000000..e13b09eb5dfa3 --- /dev/null +++ b/crates/oxc_parser/src/config.rs @@ -0,0 +1,166 @@ +// All methods are `#[inline(always)]` to ensure compiler removes dead code resulting from static values +#![expect(clippy::inline_always)] + +use std::ops::Index; + +use crate::lexer::{ByteHandler, ByteHandlers, byte_handler_tables}; + +/// Parser config. +/// +/// The purpose of parser config (as opposed to `ParseOptions`) is to allow setting options at either +/// compile time or runtime. +/// +/// 3 configs are provided: +/// * [`NoTokensParserConfig`]: Parse without tokens, static (default) +/// * [`TokensParserConfig`]: Parse with tokens, static +/// * [`RuntimeParserConfig`]: Parse with or without tokens, decided at runtime +/// +/// The trade-off is: +/// +/// * The 2 static configs will produce better performance, because compiler can remove code that relates +/// to the other option as dead code, and remove branches. +/// +/// * The runtime config will produce a smaller binary than using 2 different configs in the same application, +/// which would cause 2 polymorphic variants of the parser to be compiled. +/// +/// Advised usage: +/// * If your application uses only a specific set of options, use a static config. +/// * If your application uses multiple sets of options, probably a runtime config is preferable. +/// +/// At present the only option controlled by `ParserConfig` is whether to parse with or without tokens. +/// Other options will be added in future. +/// +/// You can also create your own config by implementing [`ParserConfig`] on a type. +pub trait ParserConfig: Default { + type LexerConfig: LexerConfig; + + fn lexer_config(&self) -> Self::LexerConfig; +} + +/// Parser config for parsing without tokens (default). +/// +/// See [`ParserConfig`] for more details. +#[derive(Copy, Clone, Default)] +pub struct NoTokensParserConfig; + +impl ParserConfig for NoTokensParserConfig { + type LexerConfig = NoTokensLexerConfig; + + #[inline(always)] + fn lexer_config(&self) -> NoTokensLexerConfig { + NoTokensLexerConfig + } +} + +/// Parser config for parsing with tokens. +/// +/// See [`ParserConfig`] for more details. +#[derive(Copy, Clone, Default)] +pub struct TokensParserConfig; + +impl ParserConfig for TokensParserConfig { + type LexerConfig = TokensLexerConfig; + + #[inline(always)] + fn lexer_config(&self) -> TokensLexerConfig { + TokensLexerConfig + } +} + +/// Parser config for parsing with/without tokens, decided at runtime. +/// +/// See [`ParserConfig`] for more details. +#[derive(Copy, Clone, Default)] +#[repr(transparent)] +pub struct RuntimeParserConfig { + lexer_config: RuntimeLexerConfig, +} + +impl RuntimeParserConfig { + #[inline(always)] + pub fn new(tokens: bool) -> Self { + Self { lexer_config: RuntimeLexerConfig::new(tokens) } + } +} + +impl ParserConfig for RuntimeParserConfig { + type LexerConfig = RuntimeLexerConfig; + + #[inline(always)] + fn lexer_config(&self) -> RuntimeLexerConfig { + self.lexer_config + } +} + +/// Lexer config. +pub trait LexerConfig: Default { + type ByteHandlers: Index>; + + fn tokens(&self) -> bool; + + fn byte_handlers(&self) -> &Self::ByteHandlers; +} + +/// Lexer config for lexing without tokens. +#[derive(Copy, Clone, Default)] +pub struct NoTokensLexerConfig; + +impl LexerConfig for NoTokensLexerConfig { + type ByteHandlers = ByteHandlers; + + #[inline(always)] + fn tokens(&self) -> bool { + false + } + + #[inline(always)] + fn byte_handlers(&self) -> &Self::ByteHandlers { + &byte_handler_tables::NO_TOKENS + } +} + +/// Lexer config for parsing with tokens. +#[derive(Copy, Clone, Default)] +pub struct TokensLexerConfig; + +impl LexerConfig for TokensLexerConfig { + type ByteHandlers = ByteHandlers; + + #[inline(always)] + fn tokens(&self) -> bool { + true + } + + #[inline(always)] + fn byte_handlers(&self) -> &Self::ByteHandlers { + &byte_handler_tables::WITH_TOKENS + } +} + +/// Lexer config for lexing with/without tokens, decided at runtime. +#[derive(Copy, Clone, Default)] +#[repr(transparent)] +pub struct RuntimeLexerConfig { + tokens: bool, +} + +impl RuntimeLexerConfig { + #[inline(always)] + pub fn new(tokens: bool) -> Self { + Self { tokens } + } +} + +impl LexerConfig for RuntimeLexerConfig { + type ByteHandlers = ByteHandlers; + + #[inline(always)] + fn tokens(&self) -> bool { + self.tokens + } + + #[inline(always)] + fn byte_handlers(&self) -> &Self::ByteHandlers { + &byte_handler_tables::RUNTIME_TOKENS + } +} diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 48b500a213272..df7189dba0965 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -6,7 +6,7 @@ use oxc_diagnostics::OxcDiagnostic; use oxc_span::{GetSpan, Span}; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig as Config, ParserImpl, diagnostics, error_handler::FatalError, lexer::{Kind, LexerCheckpoint, LexerContext, Token}, }; @@ -20,7 +20,7 @@ pub struct ParserCheckpoint<'a> { fatal_error: Option, } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { #[inline] pub(crate) fn start_span(&self) -> u32 { self.token.start() @@ -327,7 +327,7 @@ impl<'a> ParserImpl<'a> { pub(crate) fn try_parse( &mut self, - func: impl FnOnce(&mut ParserImpl<'a>) -> T, + func: impl FnOnce(&mut ParserImpl<'a, C>) -> T, ) -> Option { let checkpoint = self.checkpoint_with_error_recovery(); let ctx = self.ctx; @@ -341,7 +341,7 @@ impl<'a> ParserImpl<'a> { } } - pub(crate) fn lookahead(&mut self, predicate: impl Fn(&mut ParserImpl<'a>) -> U) -> U { + pub(crate) fn lookahead(&mut self, predicate: impl Fn(&mut ParserImpl<'a, C>) -> U) -> U { let checkpoint = self.checkpoint(); let answer = predicate(self); self.rewind(checkpoint); diff --git a/crates/oxc_parser/src/error_handler.rs b/crates/oxc_parser/src/error_handler.rs index 29e444134fd0f..29da0f92f9339 100644 --- a/crates/oxc_parser/src/error_handler.rs +++ b/crates/oxc_parser/src/error_handler.rs @@ -4,7 +4,7 @@ use oxc_allocator::Dummy; use oxc_diagnostics::OxcDiagnostic; use oxc_span::Span; -use crate::{ParserImpl, diagnostics, lexer::Kind}; +use crate::{ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind}; /// Fatal parsing error. #[derive(Debug, Clone)] @@ -15,7 +15,7 @@ pub struct FatalError { pub errors_len: usize, } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { #[cold] pub(crate) fn set_unexpected(&mut self) { // The lexer should have reported a more meaningful diagnostic @@ -105,7 +105,7 @@ impl<'a> ParserImpl<'a> { // error, we detect these patterns and provide helpful guidance on how to resolve the conflict. // // Inspired by rust-lang/rust#106242 -impl ParserImpl<'_> { +impl ParserImpl<'_, C> { /// Check if the current position looks like a merge conflict marker. /// /// Detects the following Git conflict markers: diff --git a/crates/oxc_parser/src/js/arrow.rs b/crates/oxc_parser/src/js/arrow.rs index 0ef09bf6993fa..b7419df8e7fb6 100644 --- a/crates/oxc_parser/src/js/arrow.rs +++ b/crates/oxc_parser/src/js/arrow.rs @@ -4,7 +4,7 @@ use oxc_span::{FileExtension, GetSpan}; use oxc_syntax::precedence::Precedence; use super::{FunctionKind, Tristate}; -use crate::{Context, ParserImpl, diagnostics, lexer::Kind}; +use crate::{Context, ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind}; struct ArrowFunctionHead<'a> { type_parameters: Option>>, @@ -14,7 +14,7 @@ struct ArrowFunctionHead<'a> { span: u32, } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(super) fn try_parse_parenthesized_arrow_function_expression( &mut self, allow_return_type_in_arrow_function: bool, diff --git a/crates/oxc_parser/src/js/binding.rs b/crates/oxc_parser/src/js/binding.rs index 0a227336abcd8..24d2670ada0c6 100644 --- a/crates/oxc_parser/src/js/binding.rs +++ b/crates/oxc_parser/src/js/binding.rs @@ -2,9 +2,9 @@ use oxc_allocator::Box; use oxc_ast::ast::*; use oxc_span::GetSpan; -use crate::{Context, ParserImpl, diagnostics, lexer::Kind}; +use crate::{Context, ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind}; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { /// `BindingElement` /// `SingleNameBinding` /// `BindingPattern`[?Yield, ?Await] `Initializer`[+In, ?Yield, ?Await]opt diff --git a/crates/oxc_parser/src/js/class.rs b/crates/oxc_parser/src/js/class.rs index 97f4bff014fcb..33d4b4600d3fa 100644 --- a/crates/oxc_parser/src/js/class.rs +++ b/crates/oxc_parser/src/js/class.rs @@ -4,7 +4,7 @@ use oxc_ecmascript::PropName; use oxc_span::{GetSpan, Span}; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig as Config, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; @@ -14,7 +14,7 @@ use super::FunctionKind; type ImplementsWithKeywordSpan<'a> = (Span, Vec<'a, TSClassImplements<'a>>); /// Section 15.7 Class Definitions -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { // `start_span` points at the start of all decoractors and `class` keyword. pub(crate) fn parse_class_statement( &mut self, diff --git a/crates/oxc_parser/src/js/declaration.rs b/crates/oxc_parser/src/js/declaration.rs index 3b7323379e59e..de18134af89e7 100644 --- a/crates/oxc_parser/src/js/declaration.rs +++ b/crates/oxc_parser/src/js/declaration.rs @@ -3,9 +3,9 @@ use oxc_ast::ast::*; use oxc_span::GetSpan; use super::VariableDeclarationParent; -use crate::{ParserImpl, StatementContext, diagnostics, lexer::Kind}; +use crate::{ParserConfig as Config, ParserImpl, StatementContext, diagnostics, lexer::Kind}; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn parse_let(&mut self, stmt_ctx: StatementContext) -> Statement<'a> { let span = self.start_span(); diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index e89b0db64e5cb..1d61b87bf02e5 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -17,12 +17,12 @@ use super::{ }, }; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig as Config, ParserImpl, diagnostics, lexer::{Kind, parse_big_int, parse_float, parse_int}, modifiers::Modifiers, }; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn parse_paren_expression(&mut self) -> Expression<'a> { let opening_span = self.cur_token().span(); self.expect(Kind::LParen); diff --git a/crates/oxc_parser/src/js/function.rs b/crates/oxc_parser/src/js/function.rs index 940014f4a41c6..624537c500f8c 100644 --- a/crates/oxc_parser/src/js/function.rs +++ b/crates/oxc_parser/src/js/function.rs @@ -4,7 +4,7 @@ use oxc_span::{GetSpan, Span}; use super::FunctionKind; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig as Config, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; @@ -19,7 +19,7 @@ impl FunctionKind { } } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn at_function_with_async(&mut self) -> bool { self.at(Kind::Function) || self.at(Kind::Async) && { diff --git a/crates/oxc_parser/src/js/grammar.rs b/crates/oxc_parser/src/js/grammar.rs index 0692416a1e366..0a1fa549dbddd 100644 --- a/crates/oxc_parser/src/js/grammar.rs +++ b/crates/oxc_parser/src/js/grammar.rs @@ -3,14 +3,14 @@ use oxc_ast::ast::*; use oxc_span::GetSpan; -use crate::{ParserImpl, diagnostics}; +use crate::{ParserConfig as Config, ParserImpl, diagnostics}; -pub trait CoverGrammar<'a, T>: Sized { - fn cover(value: T, p: &mut ParserImpl<'a>) -> Self; +pub trait CoverGrammar<'a, T, C: Config>: Sized { + fn cover(value: T, p: &mut ParserImpl<'a, C>) -> Self; } -impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTarget<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, Expression<'a>, C> for AssignmentTarget<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::ArrayExpression(array_expr) => { let pat = ArrayAssignmentTarget::cover(array_expr.unbox(), p); @@ -25,8 +25,8 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, Expression<'a>> for SimpleAssignmentTarget<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, Expression<'a>, C> for SimpleAssignmentTarget<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::Identifier(ident) => { SimpleAssignmentTarget::AssignmentTargetIdentifier(ident) @@ -90,8 +90,8 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for SimpleAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, ArrayExpression<'a>> for ArrayAssignmentTarget<'a> { - fn cover(expr: ArrayExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, ArrayExpression<'a>, C> for ArrayAssignmentTarget<'a> { + fn cover(expr: ArrayExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { let mut elements = p.ast.vec(); let mut rest = None; @@ -136,8 +136,8 @@ impl<'a> CoverGrammar<'a, ArrayExpression<'a>> for ArrayAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTargetMaybeDefault<'a> { - fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, Expression<'a>, C> for AssignmentTargetMaybeDefault<'a> { + fn cover(expr: Expression<'a>, p: &mut ParserImpl<'a, C>) -> Self { match expr { Expression::AssignmentExpression(assignment_expr) => { if assignment_expr.operator != AssignmentOperator::Assign { @@ -156,14 +156,16 @@ impl<'a> CoverGrammar<'a, Expression<'a>> for AssignmentTargetMaybeDefault<'a> { } } -impl<'a> CoverGrammar<'a, AssignmentExpression<'a>> for AssignmentTargetWithDefault<'a> { - fn cover(expr: AssignmentExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, AssignmentExpression<'a>, C> + for AssignmentTargetWithDefault<'a> +{ + fn cover(expr: AssignmentExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { p.ast.assignment_target_with_default(expr.span, expr.left, expr.right) } } -impl<'a> CoverGrammar<'a, ObjectExpression<'a>> for ObjectAssignmentTarget<'a> { - fn cover(expr: ObjectExpression<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, ObjectExpression<'a>, C> for ObjectAssignmentTarget<'a> { + fn cover(expr: ObjectExpression<'a>, p: &mut ParserImpl<'a, C>) -> Self { let mut properties = p.ast.vec(); let mut rest = None; @@ -203,8 +205,8 @@ impl<'a> CoverGrammar<'a, ObjectExpression<'a>> for ObjectAssignmentTarget<'a> { } } -impl<'a> CoverGrammar<'a, ObjectProperty<'a>> for AssignmentTargetProperty<'a> { - fn cover(property: ObjectProperty<'a>, p: &mut ParserImpl<'a>) -> Self { +impl<'a, C: Config> CoverGrammar<'a, ObjectProperty<'a>, C> for AssignmentTargetProperty<'a> { + fn cover(property: ObjectProperty<'a>, p: &mut ParserImpl<'a, C>) -> Self { if property.shorthand { let binding = match property.key { PropertyKey::StaticIdentifier(ident) => { diff --git a/crates/oxc_parser/src/js/module.rs b/crates/oxc_parser/src/js/module.rs index 40c68e68d6e33..428b49ba5b08c 100644 --- a/crates/oxc_parser/src/js/module.rs +++ b/crates/oxc_parser/src/js/module.rs @@ -5,7 +5,7 @@ use rustc_hash::FxHashMap; use super::FunctionKind; use crate::{ - ParserImpl, diagnostics, + ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind, modifiers::{Modifier, ModifierFlags, ModifierKind, Modifiers}, }; @@ -26,7 +26,7 @@ enum ImportOrExportSpecifier<'a> { Export(ExportSpecifier<'a>), } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { /// [Import Call](https://tc39.es/ecma262/#sec-import-calls) /// `ImportCall` : import ( `AssignmentExpression` ) pub(crate) fn parse_import_expression( diff --git a/crates/oxc_parser/src/js/object.rs b/crates/oxc_parser/src/js/object.rs index 983f39ef3dc61..d5f6da9219081 100644 --- a/crates/oxc_parser/src/js/object.rs +++ b/crates/oxc_parser/src/js/object.rs @@ -3,14 +3,14 @@ use oxc_ast::ast::*; use oxc_syntax::operator::AssignmentOperator; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind, modifiers::{ModifierFlags, Modifiers}, }; use super::FunctionKind; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { /// [Object Expression](https://tc39.es/ecma262/#sec-object-initializer) /// `ObjectLiteral`[Yield, Await] : /// { } diff --git a/crates/oxc_parser/src/js/statement.rs b/crates/oxc_parser/src/js/statement.rs index bcd3c07860970..42b2a4a6bcbda 100644 --- a/crates/oxc_parser/src/js/statement.rs +++ b/crates/oxc_parser/src/js/statement.rs @@ -4,12 +4,12 @@ use oxc_span::{Atom, GetSpan, Span}; use super::{VariableDeclarationParent, grammar::CoverGrammar}; use crate::{ - Context, ParserImpl, StatementContext, diagnostics, + Context, ParserConfig as Config, ParserImpl, StatementContext, diagnostics, lexer::Kind, modifiers::{Modifier, ModifierFlags, ModifierKind, Modifiers}, }; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { // Section 12 // The InputElementHashbangOrRegExp goal is used at the start of a Script // or Module. diff --git a/crates/oxc_parser/src/jsx/mod.rs b/crates/oxc_parser/src/jsx/mod.rs index da2ab25d520cf..d6ed161b47738 100644 --- a/crates/oxc_parser/src/jsx/mod.rs +++ b/crates/oxc_parser/src/jsx/mod.rs @@ -4,7 +4,7 @@ use oxc_allocator::{Allocator, Box, Dummy, Vec}; use oxc_ast::ast::*; use oxc_span::{Atom, GetSpan, Span}; -use crate::{ParserImpl, diagnostics, lexer::Kind}; +use crate::{ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind}; /// Represents either a closing JSX element or fragment. enum JSXClosing<'a> { @@ -20,7 +20,7 @@ impl<'a> Dummy<'a> for JSXClosing<'a> { } } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn parse_jsx_expression(&mut self) -> Expression<'a> { let span = self.start_span(); self.bump_any(); // bump `<` diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs index fa88842794504..ff3efe1668d57 100644 --- a/crates/oxc_parser/src/lexer/byte_handlers.rs +++ b/crates/oxc_parser/src/lexer/byte_handlers.rs @@ -1,50 +1,67 @@ use oxc_data_structures::assert_unchecked; -use crate::diagnostics; +use crate::{ + config::{LexerConfig as Config, NoTokensLexerConfig, RuntimeLexerConfig, TokensLexerConfig}, + diagnostics, +}; use super::{Kind, Lexer}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Handle next byte of source. /// /// # SAFETY /// /// * Lexer must not be at end of file. /// * `byte` must be next byte of source code, corresponding to current position of `lexer.source`. - /// * Only `BYTE_HANDLERS` for ASCII characters may use the `ascii_byte_handler!()` macro. + /// * Only byte handlers for ASCII characters may use the `ascii_byte_handler!()` macro. // `#[inline(always)]` to ensure is inlined into `read_next_token` #[expect(clippy::inline_always)] #[inline(always)] pub(super) unsafe fn handle_byte(&mut self, byte: u8) -> Kind { + let byte_handlers = self.config.byte_handlers(); // SAFETY: Caller guarantees to uphold safety invariants - unsafe { BYTE_HANDLERS[byte as usize](self) } + unsafe { byte_handlers[byte as usize](self) } } } -type ByteHandler = unsafe fn(&mut Lexer<'_>) -> Kind; +pub type ByteHandler = unsafe fn(&mut Lexer<'_, C>) -> Kind; +pub type ByteHandlers = [ByteHandler; 256]; -/// Lookup table mapping any incoming byte to a handler function defined below. +/// Macro to create a lookup table mapping any incoming byte to a handler function defined below. /// #[rustfmt::skip] -static BYTE_HANDLERS: [ByteHandler; 256] = [ -// 0 1 2 3 4 5 6 7 8 9 A B C D E F // - ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, ISP, ISP, LIN, ERR, ERR, // 0 - ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1 - SPS, EXL, QOD, HAS, IDT, PRC, AMP, QOS, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2 - ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3 - AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4 - IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5 - TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6 - L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 8 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 9 - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // A - UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // B - UER, UER, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C - UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D - UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E - UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F -]; +macro_rules! byte_handlers { + () => { + [ + // 0 1 2 3 4 5 6 7 8 9 A B C D E F // + ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, SPS, LIN, ISP, ISP, LIN, ERR, ERR, // 0 + ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1 + SPS, EXL, QOD, HAS, IDT, PRC, AMP, QOS, PNO, PNC, ATR, PLS, COM, MIN, PRD, SLH, // 2 + ZER, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, COL, SEM, LSS, EQL, GTR, QST, // 3 + AT_, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, // 4 + IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, IDT, BTO, ESC, BTC, CRT, IDT, // 5 + TPL, L_A, L_B, L_C, L_D, L_E, L_F, L_G, IDT, L_I, IDT, L_K, L_L, L_M, L_N, L_O, // 6 + L_P, IDT, L_R, L_S, L_T, L_U, L_V, L_W, IDT, L_Y, IDT, BEO, PIP, BEC, TLD, ERR, // 7 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 8 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // 9 + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // A + UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // B + UER, UER, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E + UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F + ] + }; +} + +pub mod byte_handler_tables { + use super::*; + + pub static NO_TOKENS: ByteHandlers = byte_handlers!(); + pub static WITH_TOKENS: ByteHandlers = byte_handlers!(); + pub static RUNTIME_TOKENS: ByteHandlers = byte_handlers!(); +} /// Macro for defining byte handler for an ASCII character. /// @@ -55,7 +72,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ /// next char is ASCII, and it uses that information to optimize the rest of the handler. /// e.g. `lexer.consume_char()` becomes just a single assembly instruction. /// Without the assertions, the compiler is unable to deduce the next char is ASCII, due to -/// the indirection of the `BYTE_HANDLERS` jump table. +/// the indirection of the byte handlers jump table. /// /// These assertions are unchecked (i.e. won't panic) and will cause UB if they're incorrect. /// @@ -73,7 +90,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ /// /// ``` /// #[expect(non_snake_case)] -/// fn SPS(lexer: &mut Lexer) { +/// fn SPS(lexer: &mut Lexer<'_, C>) -> Kind { /// // SAFETY: This macro is only used for ASCII characters /// unsafe { /// assert_unchecked!(!lexer.source.is_eof()); @@ -88,7 +105,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ macro_rules! ascii_byte_handler { ($id:ident($lex:ident) $body:expr) => { #[expect(non_snake_case)] - fn $id($lex: &mut Lexer) -> Kind { + fn $id($lex: &mut Lexer<'_, C>) -> Kind { // SAFETY: This macro is only used for ASCII characters unsafe { assert_unchecked!(!$lex.source.is_eof()); @@ -123,7 +140,7 @@ macro_rules! ascii_byte_handler { /// /// ``` /// #[expect(non_snake_case)] -/// fn L_G(lexer: &mut Lexer) -> Kind { +/// fn L_G(lexer: &mut Lexer<'_, C>) -> Kind { /// // SAFETY: This macro is only used for ASCII characters /// let id_without_first_char = unsafe { lexer.identifier_name_handler() }; /// match id_without_first_char { @@ -136,7 +153,7 @@ macro_rules! ascii_byte_handler { macro_rules! ascii_identifier_handler { ($id:ident($str:ident) $body:expr) => { #[expect(non_snake_case)] - fn $id(lexer: &mut Lexer) -> Kind { + fn $id(lexer: &mut Lexer<'_, C>) -> Kind { // SAFETY: This macro is only used for ASCII characters let $str = unsafe { lexer.identifier_name_handler() }; $body @@ -653,7 +670,7 @@ ascii_identifier_handler!(L_Y(id_without_first_char) match id_without_first_char // // Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars. #[expect(non_snake_case)] -fn UNI(lexer: &mut Lexer) -> Kind { +fn UNI(lexer: &mut Lexer<'_, C>) -> Kind { lexer.unicode_char_handler() } @@ -665,6 +682,6 @@ fn UNI(lexer: &mut Lexer) -> Kind { // // Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes. #[expect(non_snake_case)] -fn UER(_lexer: &mut Lexer) -> Kind { +fn UER(_lexer: &mut Lexer<'_, C>) -> Kind { unreachable!(); } diff --git a/crates/oxc_parser/src/lexer/comment.rs b/crates/oxc_parser/src/lexer/comment.rs index aad0e5a1d503e..6cd49f47eb6a7 100644 --- a/crates/oxc_parser/src/lexer/comment.rs +++ b/crates/oxc_parser/src/lexer/comment.rs @@ -3,7 +3,7 @@ use memchr::memmem::Finder; use oxc_ast::CommentKind; use oxc_syntax::line_terminator::is_line_terminator; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{ Kind, Lexer, cold_branch, @@ -22,7 +22,7 @@ static LINE_BREAK_TABLE: SafeByteMatchTable = static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable = safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST)); -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { /// Section 12.4 Single Line Comment pub(super) fn skip_single_line_comment(&mut self) -> Kind { byte_search! { diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index c08957cddede4..6c1149718bae1 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -6,7 +6,7 @@ use oxc_syntax::identifier::{ is_identifier_part, is_identifier_part_unicode, is_identifier_start_unicode, }; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{ Kind, Lexer, SourcePosition, cold_branch, @@ -26,7 +26,7 @@ fn is_identifier_start_ascii_byte(byte: u8) -> bool { ASCII_ID_START_TABLE.matches(byte) } -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { /// Handle identifier with ASCII start character. /// Returns text of the identifier, minus its first char. /// diff --git a/crates/oxc_parser/src/lexer/jsx.rs b/crates/oxc_parser/src/lexer/jsx.rs index db828ceb3f08b..eb2f08b58a3b1 100644 --- a/crates/oxc_parser/src/lexer/jsx.rs +++ b/crates/oxc_parser/src/lexer/jsx.rs @@ -3,7 +3,7 @@ use memchr::memchr; use oxc_span::Span; use oxc_syntax::identifier::is_identifier_part; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{ Kind, Lexer, Token, cold_branch, @@ -26,7 +26,7 @@ static JSX_CHILD_END_TABLE: SafeByteMatchTable = /// `JSXStringCharacter` but not ' /// `JSXStringCharacter` :: /// `SourceCharacter` but not one of `HTMLCharacterReference` -impl Lexer<'_> { +impl Lexer<'_, C> { /// Read JSX string literal. /// # SAFETY /// * `delimiter` must be an ASCII character. diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index f963a3518fef5..0f56ad7d16706 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -12,7 +12,7 @@ use oxc_ast::ast::RegExpFlags; use oxc_diagnostics::OxcDiagnostic; use oxc_span::{SourceType, Span}; -use crate::{UniquePromise, diagnostics}; +use crate::{UniquePromise, config::LexerConfig as Config, diagnostics}; mod byte_handlers; mod comment; @@ -33,6 +33,7 @@ mod typescript; mod unicode; mod whitespace; +pub(crate) use byte_handlers::{ByteHandler, ByteHandlers, byte_handler_tables}; pub use kind::Kind; pub use number::{parse_big_int, parse_float, parse_int}; pub use token::Token; @@ -64,7 +65,7 @@ pub enum LexerContext { JsxAttributeValue, } -pub struct Lexer<'a> { +pub struct Lexer<'a, C: Config> { allocator: &'a Allocator, // Wrapper around source text. Must not be changed after initialization. @@ -100,11 +101,11 @@ pub struct Lexer<'a> { /// Collected tokens in source order. tokens: ArenaVec<'a, Token>, - /// Whether to collect tokens. - collect_tokens: bool, + /// Config + pub(crate) config: C, } -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { /// Create new `Lexer`. /// /// Requiring a `UniquePromise` to be provided guarantees only 1 `Lexer` can exist @@ -113,7 +114,7 @@ impl<'a> Lexer<'a> { allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, - collect_tokens: bool, + config: C, unique: UniquePromise, ) -> Self { let source = Source::new(source_text, unique); @@ -133,7 +134,7 @@ impl<'a> Lexer<'a> { escaped_templates: FxHashMap::default(), multi_line_comment_end_finder: None, tokens: ArenaVec::new_in(allocator), - collect_tokens, + config, } } @@ -144,9 +145,10 @@ impl<'a> Lexer<'a> { allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, + config: C, ) -> Self { let unique = UniquePromise::new_for_tests_and_benchmarks(); - Self::new(allocator, source_text, source_type, false, unique) + Self::new(allocator, source_text, source_type, config, unique) } /// Get errors. @@ -268,7 +270,7 @@ impl<'a> Lexer<'a> { self.token.set_kind(kind); self.token.set_end(self.offset()); let token = self.token; - if self.collect_tokens && !matches!(token.kind(), Kind::Eof | Kind::HashbangComment) { + if self.config.tokens() && !matches!(token.kind(), Kind::Eof | Kind::HashbangComment) { if REPLACE_SAME_START { debug_assert!(self.tokens.last().is_some_and(|last| last.start() == token.start())); let last = self.tokens.last_mut().unwrap(); diff --git a/crates/oxc_parser/src/lexer/numeric.rs b/crates/oxc_parser/src/lexer/numeric.rs index 1c2bedd66cb31..351b8b406d599 100644 --- a/crates/oxc_parser/src/lexer/numeric.rs +++ b/crates/oxc_parser/src/lexer/numeric.rs @@ -1,10 +1,10 @@ use oxc_syntax::identifier::{is_identifier_part_ascii, is_identifier_start}; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{Kind, Lexer, Span}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// 12.9.3 Numeric Literals with `0` prefix pub(super) fn read_zero(&mut self) -> Kind { match self.peek_byte() { diff --git a/crates/oxc_parser/src/lexer/punctuation.rs b/crates/oxc_parser/src/lexer/punctuation.rs index a6586edc7d30c..99033fb3cd3f5 100644 --- a/crates/oxc_parser/src/lexer/punctuation.rs +++ b/crates/oxc_parser/src/lexer/punctuation.rs @@ -1,9 +1,10 @@ use oxc_span::Span; +use crate::{config::LexerConfig as Config, diagnostics}; + use super::{Kind, Lexer, Token}; -use crate::diagnostics; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Section 12.8 Punctuators pub(super) fn read_dot(&mut self) -> Kind { if self.peek_2_bytes() == Some([b'.', b'.']) { diff --git a/crates/oxc_parser/src/lexer/regex.rs b/crates/oxc_parser/src/lexer/regex.rs index 2e19bad160cda..b0e5cb7f51b62 100644 --- a/crates/oxc_parser/src/lexer/regex.rs +++ b/crates/oxc_parser/src/lexer/regex.rs @@ -1,10 +1,10 @@ use oxc_syntax::line_terminator::is_line_terminator; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{Kind, Lexer, RegExpFlags, Token}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Re-tokenize the current `/` or `/=` and return `RegExp` /// See Section 12: /// The `InputElementRegExp` goal symbol is used in all syntactic grammar contexts diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index 74a8b7aed16bb..cf4b7c37d9f05 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -2,7 +2,7 @@ use std::cmp::max; use oxc_allocator::StringBuilder; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{ Kind, Lexer, LexerContext, Span, Token, cold_branch, @@ -202,7 +202,7 @@ macro_rules! handle_string_literal_escape { } /// 12.9.4 String Literals -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { /// Read string literal delimited with `"`. /// # SAFETY /// Next character must be `"`. diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index b8cc64edcb7ec..0deee04066fc7 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -2,7 +2,7 @@ use std::{cmp::max, str}; use oxc_allocator::StringBuilder; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use super::{ Kind, Lexer, SourcePosition, Token, cold_branch, @@ -33,7 +33,7 @@ static TEMPLATE_LITERAL_ESCAPED_MATCH_TABLE: SafeByteMatchTable = safe_byte_matc ); /// 12.8.6 Template Literal Lexical Components -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { /// Read template literal component. /// /// This function handles the common case where template contains no escapes or `\r` characters @@ -409,6 +409,8 @@ mod test { use oxc_allocator::Allocator; use oxc_span::SourceType; + use crate::config::NoTokensLexerConfig; + use super::super::{Kind, Lexer, UniquePromise}; #[test] @@ -442,8 +444,13 @@ mod test { fn run_test(source_text: String, expected_escaped: String, is_only_part: bool) { let allocator = Allocator::default(); let unique = UniquePromise::new_for_tests_and_benchmarks(); - let mut lexer = - Lexer::new(&allocator, &source_text, SourceType::default(), false, unique); + let mut lexer = Lexer::new( + &allocator, + &source_text, + SourceType::default(), + NoTokensLexerConfig, + unique, + ); let token = lexer.next_token(); assert_eq!( token.kind(), diff --git a/crates/oxc_parser/src/lexer/typescript.rs b/crates/oxc_parser/src/lexer/typescript.rs index a04731ce08875..7595964f1b07c 100644 --- a/crates/oxc_parser/src/lexer/typescript.rs +++ b/crates/oxc_parser/src/lexer/typescript.rs @@ -1,6 +1,8 @@ +use crate::config::LexerConfig as Config; + use super::{Kind, Lexer, Token}; -impl Lexer<'_> { +impl Lexer<'_, C> { /// Re-tokenize '<<' or '<=' or '<<=' to '<' pub(crate) fn re_lex_as_typescript_l_angle(&mut self, offset: u32) -> Token { self.token.set_start(self.offset() - offset); diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs index 15894b5f290dc..dabf07ac04910 100644 --- a/crates/oxc_parser/src/lexer/unicode.rs +++ b/crates/oxc_parser/src/lexer/unicode.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, fmt::Write}; use cow_utils::CowUtils; -use crate::diagnostics; +use crate::{config::LexerConfig as Config, diagnostics}; use oxc_allocator::StringBuilder; use oxc_syntax::{ identifier::{ @@ -29,7 +29,7 @@ enum UnicodeEscape { LoneSurrogate(u32), } -impl<'a> Lexer<'a> { +impl<'a, C: Config> Lexer<'a, C> { pub(super) fn unicode_char_handler(&mut self) -> Kind { let c = self.peek_char().unwrap(); match c { diff --git a/crates/oxc_parser/src/lexer/whitespace.rs b/crates/oxc_parser/src/lexer/whitespace.rs index a770362a37b18..bc73e88580d19 100644 --- a/crates/oxc_parser/src/lexer/whitespace.rs +++ b/crates/oxc_parser/src/lexer/whitespace.rs @@ -1,3 +1,5 @@ +use crate::config::LexerConfig as Config; + use super::{ Kind, Lexer, search::{SafeByteMatchTable, byte_search, safe_byte_match_table}, @@ -6,7 +8,7 @@ use super::{ static NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE: SafeByteMatchTable = safe_byte_match_table!(|b| !matches!(b, b' ' | b'\t' | b'\r' | b'\n')); -impl Lexer<'_> { +impl Lexer<'_, C> { pub(super) fn line_break_handler(&mut self) -> Kind { self.token.set_is_on_new_line(true); self.trivia_builder.handle_newline(); diff --git a/crates/oxc_parser/src/lib.rs b/crates/oxc_parser/src/lib.rs index 9b8b91fa7317e..320ee6b6725ae 100644 --- a/crates/oxc_parser/src/lib.rs +++ b/crates/oxc_parser/src/lib.rs @@ -64,6 +64,7 @@ //! //! See [full linter example](https://github.com/Boshen/oxc/blob/ab2ef4f89ba3ca50c68abb2ca43e36b7793f3673/crates/oxc_linter/examples/linter.rs#L38-L39) +pub mod config; mod context; mod cursor; mod error_handler; @@ -95,6 +96,7 @@ use oxc_syntax::module_record::ModuleRecord; pub use crate::lexer::{Kind, Token}; use crate::{ + config::{LexerConfig, NoTokensParserConfig, ParserConfig}, context::{Context, StatementContext}, error_handler::FatalError, lexer::Lexer, @@ -169,7 +171,7 @@ pub struct ParserReturn<'a> { /// Lexed tokens in source order. /// - /// Tokens are only collected when [`ParseOptions::collect_tokens`] is enabled. + /// Tokens are only collected when tokens are enabled in [`ParserConfig`]. pub tokens: oxc_allocator::Vec<'a, Token>, /// Whether the parser panicked and terminated early. @@ -225,11 +227,6 @@ pub struct ParseOptions { /// /// [`V8IntrinsicExpression`]: oxc_ast::ast::V8IntrinsicExpression pub allow_v8_intrinsics: bool, - - /// Collect lexer tokens and return them in [`ParserReturn::tokens`]. - /// - /// Default: `false` - pub collect_tokens: bool, } impl Default for ParseOptions { @@ -240,7 +237,6 @@ impl Default for ParseOptions { allow_return_outside_function: false, preserve_parens: true, allow_v8_intrinsics: false, - collect_tokens: false, } } } @@ -248,11 +244,12 @@ impl Default for ParseOptions { /// Recursive Descent Parser for ECMAScript and TypeScript /// /// See [`Parser::parse`] for entry function. -pub struct Parser<'a> { +pub struct Parser<'a, C: ParserConfig = NoTokensParserConfig> { allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, options: ParseOptions, + config: C, } impl<'a> Parser<'a> { @@ -264,15 +261,31 @@ impl<'a> Parser<'a> { /// - `source_type`: Source type (e.g. JavaScript, TypeScript, JSX, ESM Module, Script) pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self { let options = ParseOptions::default(); - Self { allocator, source_text, source_type, options } + Self { allocator, source_text, source_type, options, config: NoTokensParserConfig } } +} +impl<'a, C: ParserConfig> Parser<'a, C> { /// Set parse options #[must_use] pub fn with_options(mut self, options: ParseOptions) -> Self { self.options = options; self } + + /// Set parser config. + /// + /// See [`ParserConfig`] for more details. + #[must_use] + pub fn with_config(self, config: Config) -> Parser<'a, Config> { + Parser { + allocator: self.allocator, + source_text: self.source_text, + source_type: self.source_type, + options: self.options, + config, + } + } } mod parser_parse { @@ -309,7 +322,7 @@ mod parser_parse { } } - impl<'a> Parser<'a> { + impl<'a, C: ParserConfig> Parser<'a, C> { /// Main entry point /// /// Returns an empty `Program` on unrecoverable error, @@ -323,6 +336,7 @@ mod parser_parse { self.source_text, self.source_type, self.options, + self.config, unique, ); parser.parse() @@ -354,6 +368,7 @@ mod parser_parse { self.source_text, self.source_type, self.options, + self.config, unique, ); parser.parse_expression() @@ -364,10 +379,11 @@ use parser_parse::UniquePromise; /// Implementation of parser. /// `Parser` is just a public wrapper, the guts of the implementation is in this type. -struct ParserImpl<'a> { +struct ParserImpl<'a, C: ParserConfig> { + /// Options options: ParseOptions, - pub(crate) lexer: Lexer<'a>, + pub(crate) lexer: Lexer<'a, C::LexerConfig>, /// SourceType: JavaScript or TypeScript, Script or Module, jsx support? source_type: SourceType, @@ -410,22 +426,24 @@ struct ParserImpl<'a> { is_ts: bool, } -impl<'a> ParserImpl<'a> { +impl<'a, C: ParserConfig> ParserImpl<'a, C> { /// Create a new `ParserImpl`. /// /// Requiring a `UniquePromise` to be provided guarantees only 1 `ParserImpl` can exist /// on a single thread at one time. #[inline] + #[expect(clippy::needless_pass_by_value)] pub fn new( allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, options: ParseOptions, + config: C, unique: UniquePromise, ) -> Self { Self { options, - lexer: Lexer::new(allocator, source_text, source_type, options.collect_tokens, unique), + lexer: Lexer::new(allocator, source_text, source_type, config.lexer_config(), unique), source_type, source_text, errors: vec![], @@ -580,7 +598,7 @@ impl<'a> ParserImpl<'a> { // Token stream is already complete from the first parse. // Reparsing here is only to patch AST nodes, so keep the original token stream. let original_tokens = - if self.options.collect_tokens { Some(self.lexer.take_tokens()) } else { None }; + if self.lexer.config.tokens() { Some(self.lexer.take_tokens()) } else { None }; let checkpoints = std::mem::take(&mut self.state.potential_await_reparse); for (stmt_index, checkpoint) in checkpoints { diff --git a/crates/oxc_parser/src/modifiers.rs b/crates/oxc_parser/src/modifiers.rs index fa29b164b3345..bbeec83d0a236 100644 --- a/crates/oxc_parser/src/modifiers.rs +++ b/crates/oxc_parser/src/modifiers.rs @@ -8,7 +8,7 @@ use oxc_diagnostics::OxcDiagnostic; use oxc_span::Span; use crate::{ - ParserImpl, diagnostics, + ParserConfig as Config, ParserImpl, diagnostics, lexer::{Kind, Token}, }; @@ -313,7 +313,7 @@ impl std::fmt::Display for ModifierKind { } } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn eat_modifiers_before_declaration(&mut self) -> Modifiers<'a> { if !self.at_modifier() { return Modifiers::empty(); @@ -624,8 +624,8 @@ impl<'a> ParserImpl<'a> { // Also `#[inline(never)]` to help `verify_modifiers` to get inlined. #[cold] #[inline(never)] - fn report<'a, F>( - parser: &mut ParserImpl<'a>, + fn report<'a, C: Config, F>( + parser: &mut ParserImpl<'a, C>, modifiers: &Modifiers<'a>, allowed: ModifierFlags, strict: bool, diff --git a/crates/oxc_parser/src/ts/statement.rs b/crates/oxc_parser/src/ts/statement.rs index b9c9e549d981f..0629d7145d271 100644 --- a/crates/oxc_parser/src/ts/statement.rs +++ b/crates/oxc_parser/src/ts/statement.rs @@ -3,7 +3,7 @@ use oxc_ast::ast::*; use oxc_span::{FileExtension, GetSpan}; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig as Config, ParserImpl, diagnostics, js::{FunctionKind, VariableDeclarationParent}, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, @@ -15,7 +15,7 @@ pub(super) enum CallOrConstructorSignature { Constructor, } -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { /* ------------------- Enum ------------------ */ /// `https://www.typescriptlang.org/docs/handbook/enums.html` pub(crate) fn parse_ts_enum_declaration( diff --git a/crates/oxc_parser/src/ts/types.rs b/crates/oxc_parser/src/ts/types.rs index d7ee5e40148b5..cc9c560cc5967 100644 --- a/crates/oxc_parser/src/ts/types.rs +++ b/crates/oxc_parser/src/ts/types.rs @@ -4,14 +4,14 @@ use oxc_span::GetSpan; use oxc_syntax::operator::UnaryOperator; use crate::{ - Context, ParserImpl, diagnostics, + Context, ParserConfig as Config, ParserImpl, diagnostics, lexer::Kind, modifiers::{ModifierFlags, ModifierKind, Modifiers}, }; use super::{super::js::FunctionKind, statement::CallOrConstructorSignature}; -impl<'a> ParserImpl<'a> { +impl<'a, C: Config> ParserImpl<'a, C> { pub(crate) fn parse_ts_type(&mut self) -> TSType<'a> { if self.is_start_of_function_type_or_constructor_type() { return self.parse_function_or_constructor_type(); diff --git a/napi/playground/src/lib.rs b/napi/playground/src/lib.rs index 35209f60c29dd..260236f3158f4 100644 --- a/napi/playground/src/lib.rs +++ b/napi/playground/src/lib.rs @@ -222,7 +222,6 @@ impl Oxc { allow_return_outside_function: parser_options.allow_return_outside_function, preserve_parens: parser_options.preserve_parens, allow_v8_intrinsics: parser_options.allow_v8_intrinsics, - collect_tokens: false, }; let ParserReturn { program, errors, module_record, .. } = Parser::new(allocator, source_text, source_type).with_options(parser_options).parse(); diff --git a/tasks/benchmark/benches/lexer.rs b/tasks/benchmark/benches/lexer.rs index ed85f2922d972..c32befeaa51b4 100644 --- a/tasks/benchmark/benches/lexer.rs +++ b/tasks/benchmark/benches/lexer.rs @@ -8,6 +8,7 @@ use oxc_ast_visit::Visit; use oxc_benchmark::{BenchmarkId, Criterion, criterion_group, criterion_main}; use oxc_parser::{ Parser, + config::{LexerConfig, NoTokensLexerConfig}, lexer::{Kind, Lexer}, }; use oxc_span::SourceType; @@ -51,7 +52,7 @@ fn bench_lexer(criterion: &mut Criterion) { // so we do the same here. let mut allocator = Allocator::default(); b.iter(|| { - lex_whole_file(&allocator, source_text, source_type); + lex_whole_file(&allocator, source_text, source_type, NoTokensLexerConfig); allocator.reset(); }); }); @@ -66,12 +67,13 @@ criterion_main!(lexer); // It's also used in `SourceCleaner` below. #[expect(clippy::inline_always)] #[inline(always)] -fn lex_whole_file<'a>( +fn lex_whole_file<'a, C: LexerConfig>( allocator: &'a Allocator, source_text: &'a str, source_type: SourceType, -) -> Lexer<'a> { - let mut lexer = Lexer::new_for_benchmarks(allocator, source_text, source_type); + config: C, +) -> Lexer<'a, C> { + let mut lexer = Lexer::new_for_benchmarks(allocator, source_text, source_type, config); if lexer.first_token().kind() != Kind::Eof { // Use `next_token_for_benchmarks` instead of `next_token`, to work around problem // where `next_token` wasn't inlined here. @@ -119,7 +121,7 @@ fn clean<'a>(source_text: &'a str, source_type: SourceType, allocator: &'a Alloc clean_source_text.push_str(&source_text[last_index..]); // Check lexer can lex it without any errors - let lexer = lex_whole_file(allocator, &clean_source_text, source_type); + let lexer = lex_whole_file(allocator, &clean_source_text, source_type, NoTokensLexerConfig); assert!(lexer.errors().is_empty()); clean_source_text diff --git a/tasks/coverage/src/tools.rs b/tasks/coverage/src/tools.rs index d5fe0dd277eb2..370d6baf9db4b 100644 --- a/tasks/coverage/src/tools.rs +++ b/tasks/coverage/src/tools.rs @@ -7,7 +7,7 @@ use oxc::{ ast_visit::utf8_to_utf16::Utf8ToUtf16, diagnostics::{GraphicalReportHandler, GraphicalTheme, NamedSource, OxcDiagnostic}, minifier::CompressOptions, - parser::{ParseOptions, Parser, ParserReturn}, + parser::{ParseOptions, Parser, ParserReturn, config::RuntimeParserConfig}, span::{ModuleKind, SourceType, Span}, transformer::{JsxOptions, JsxRuntime, TransformOptions}, }; @@ -838,8 +838,9 @@ pub fn run_estree_test262_tokens(files: &[Test262File]) -> Vec { let is_module = f.meta.flags.contains(&TestFlag::Module); let source_type = SourceType::script().with_module(is_module); let allocator = Allocator::new(); - let options = ParseOptions { collect_tokens: true, ..ParseOptions::default() }; - let ret = Parser::new(&allocator, &f.code, source_type).with_options(options).parse(); + let ret = Parser::new(&allocator, &f.code, source_type) + .with_config(RuntimeParserConfig::new(true)) + .parse(); if ret.panicked || !ret.errors.is_empty() { let error = @@ -887,7 +888,7 @@ pub fn run_estree_acorn_jsx_tokens(files: &[AcornJsxFile]) -> Vec Vec