Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/oxlint/src/js_plugins/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ unsafe fn parse_raw_impl(
.with_options(ParseOptions {
parse_regular_expression: true,
allow_return_outside_function: true,
collect_tokens: true,
..ParseOptions::default()
})
.parse();
Expand Down
1 change: 1 addition & 0 deletions crates/oxc_formatter/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub fn get_parse_options() -> ParseOptions {
allow_v8_intrinsics: true,
// `oxc_formatter` expects this to be `false`, otherwise panics
preserve_parens: false,
collect_tokens: false,
}
}

Expand Down
41 changes: 39 additions & 2 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
//! * [v8](https://v8.dev/blog/scanner)

use rustc_hash::FxHashMap;
use std::fmt::Debug;

use oxc_allocator::Allocator;
use oxc_allocator::{Allocator, Vec as ArenaVec};
use oxc_ast::ast::RegExpFlags;
use oxc_diagnostics::OxcDiagnostic;
use oxc_span::{SourceType, Span};
Expand Down Expand Up @@ -45,6 +46,7 @@ pub struct LexerCheckpoint<'a> {
source_position: SourcePosition<'a>,
token: Token,
errors_snapshot: ErrorSnapshot,
tokens_len: usize,
}

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -86,6 +88,11 @@ pub struct Lexer<'a> {

/// `memchr` Finder for end of multi-line comments. Created lazily when first used.
multi_line_comment_end_finder: Option<memchr::memmem::Finder<'static>>,

/// Tokens collected from the lexer.
///
/// If `collect_tokens` is `false`, this will be `None`.
tokens: Option<ArenaVec<'a, Token>>,
}

impl<'a> Lexer<'a> {
Expand All @@ -97,6 +104,7 @@ impl<'a> Lexer<'a> {
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
collect_tokens: bool,
unique: UniquePromise,
) -> Self {
let source = Source::new(source_text, unique);
Expand All @@ -114,6 +122,7 @@ impl<'a> Lexer<'a> {
escaped_strings: FxHashMap::default(),
escaped_templates: FxHashMap::default(),
multi_line_comment_end_finder: None,
tokens: if collect_tokens { Some(ArenaVec::new_in(allocator)) } else { None },
}
}

Expand All @@ -126,7 +135,7 @@ impl<'a> Lexer<'a> {
source_type: SourceType,
) -> Self {
let unique = UniquePromise::new_for_tests_and_benchmarks();
Self::new(allocator, source_text, source_type, unique)
Self::new(allocator, source_text, source_type, false, unique)
}

/// Get errors.
Expand All @@ -149,10 +158,15 @@ impl<'a> Lexer<'a> {
} else {
ErrorSnapshot::Count(self.errors.len())
};
let tokens_len = match &self.tokens {
Some(tokens) => tokens.len(),
None => 0,
};
LexerCheckpoint {
source_position: self.source.position(),
token: self.token,
errors_snapshot,
tokens_len,
}
}

Expand All @@ -168,6 +182,10 @@ impl<'a> Lexer<'a> {
source_position: self.source.position(),
token: self.token,
errors_snapshot,
tokens_len: match &self.tokens {
Some(tokens) => tokens.len(),
None => 0,
},
}
}

Expand All @@ -180,6 +198,9 @@ impl<'a> Lexer<'a> {
}
self.source.set_position(checkpoint.source_position);
self.token = checkpoint.token;
if let Some(tokens) = self.tokens.as_mut() {
tokens.truncate(checkpoint.tokens_len);
}
}

pub fn peek_token(&mut self) -> Token {
Expand Down Expand Up @@ -229,6 +250,9 @@ impl<'a> Lexer<'a> {
self.token.set_end(self.offset());
let token = self.token;
self.trivia_builder.handle_token(token);
if let Some(tokens) = self.tokens.as_mut() {
tokens.push(token);
}
self.token = Token::default();
token
}
Expand All @@ -239,6 +263,19 @@ impl<'a> Lexer<'a> {
self.source.advance_to_end();
}

/// Retrieve collected tokens.
/// This should only be called once per source text, as it consumes the tokens vec.
/// Panics if the lexer was not configured to collect tokens.
#[inline]
pub fn tokens(&mut self) -> ArenaVec<'a, Token> {
if let Some(tokens) = self.tokens.take() {
tokens
} else {
let backtrace = std::backtrace::Backtrace::capture();
panic!("Can't retrieve tokens because they were not collected\n{backtrace}");
Comment on lines +274 to +275
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've never seen Backtrace used before. I think panic! automatically produces a backtrace, so it's not required. Any reason why you added this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't seeing the names of the methods in the call stack until I added this. It's possible I missed something.

We don't have to keep this, I needed it just for debugging.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

@overlookmotel overlookmotel Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see. Usually running with RUST_BACKTRACE=1 gives you stack traces.

Turning on debug temporarily can also help sometimes:

oxc/Cargo.toml

Lines 247 to 255 in 5a7fcd1

[profile.dev]
# Disabling debug info speeds up local and CI builds,
# and we don't rely on it for debugging that much.
debug = false
[profile.test]
# Disabling debug info speeds up local and CI builds,
# and we don't rely on it for debugging that much.
debug = false

Let me know if neither of those works.

}
}

// ---------- Private Methods ---------- //
fn error(&mut self, error: OxcDiagnostic) {
self.errors.push(error);
Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ mod test {
fn run_test(source_text: String, expected_escaped: String, is_only_part: bool) {
let allocator = Allocator::default();
let unique = UniquePromise::new_for_tests_and_benchmarks();
let mut lexer = Lexer::new(&allocator, &source_text, SourceType::default(), unique);
let mut lexer =
Lexer::new(&allocator, &source_text, SourceType::default(), false, unique);
let token = lexer.next_token();
assert_eq!(
token.kind(),
Expand Down
23 changes: 20 additions & 3 deletions crates/oxc_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,16 @@ mod ts;

mod diagnostics;

pub use lexer::{Kind, Token};

// Expose lexer only in benchmarks
#[cfg(not(feature = "benchmarking"))]
mod lexer;
#[cfg(feature = "benchmarking")]
#[doc(hidden)]
pub mod lexer;

use oxc_allocator::{Allocator, Box as ArenaBox, Dummy};
use oxc_allocator::{Allocator, Box as ArenaBox, Dummy, Vec as ArenaVec};
use oxc_ast::{
AstBuilder,
ast::{Expression, Program},
Expand All @@ -98,7 +100,7 @@ use oxc_syntax::module_record::ModuleRecord;
use crate::{
context::{Context, StatementContext},
error_handler::FatalError,
lexer::{Lexer, Token},
lexer::Lexer,
module_record::ModuleRecordBuilder,
state::ParserState,
};
Expand Down Expand Up @@ -180,6 +182,11 @@ pub struct ParserReturn<'a> {

/// Whether the file is [flow](https://flow.org).
pub is_flow_language: bool,

/// All tokens collected from the lexer, in source order.
///
/// If `ParseOptions::collect_tokens` is set to `false`, this will be `None`.
pub tokens: Option<ArenaVec<'a, Token>>,
}

/// Parse options
Expand Down Expand Up @@ -221,6 +228,13 @@ pub struct ParseOptions {
///
/// [`V8IntrinsicExpression`]: oxc_ast::ast::V8IntrinsicExpression
pub allow_v8_intrinsics: bool,

/// Whether the parser should collect all tokens from the lexer and return them in the `tokens` field of the `ParserReturn` struct.
///
/// If this option is set to `false`, the `tokens` vec will be empty.
///
/// Default: `false`
pub collect_tokens: bool,
}

impl Default for ParseOptions {
Expand All @@ -231,6 +245,7 @@ impl Default for ParseOptions {
allow_return_outside_function: false,
preserve_parens: true,
allow_v8_intrinsics: false,
collect_tokens: false,
}
}
}
Expand Down Expand Up @@ -408,7 +423,7 @@ impl<'a> ParserImpl<'a> {
) -> Self {
Self {
options,
lexer: Lexer::new(allocator, source_text, source_type, unique),
lexer: Lexer::new(allocator, source_text, source_type, options.collect_tokens, unique),
source_type,
source_text,
errors: vec![],
Expand Down Expand Up @@ -465,6 +480,7 @@ impl<'a> ParserImpl<'a> {
errors.push(error);
}
let (module_record, module_record_errors) = self.module_record_builder.build();
let tokens = if self.options.collect_tokens { Some(self.lexer.tokens()) } else { None };
if errors.len() != 1 {
errors.reserve(self.lexer.errors.len() + self.errors.len());
errors.extend(self.lexer.errors);
Expand Down Expand Up @@ -493,6 +509,7 @@ impl<'a> ParserImpl<'a> {
irregular_whitespaces,
panicked,
is_flow_language,
tokens,
}
}

Expand Down
1 change: 1 addition & 0 deletions napi/playground/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ impl Oxc {
) -> (Program<'a>, oxc::syntax::module_record::ModuleRecord<'a>) {
let parser_options = ParseOptions {
parse_regular_expression: true,
collect_tokens: false,
allow_return_outside_function: parser_options.allow_return_outside_function,
preserve_parens: parser_options.preserve_parens,
allow_v8_intrinsics: parser_options.allow_v8_intrinsics,
Expand Down
1 change: 1 addition & 0 deletions tasks/benchmark/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ fn bench_parser_with_tokens(criterion: &mut Criterion) {
Parser::new(&allocator, source_text, source_type)
.with_options(ParseOptions {
parse_regular_expression: true,
collect_tokens: true,
..ParseOptions::default()
})
.parse();
Expand Down
Loading