Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 7 additions & 43 deletions crates/oxc_parser/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,48 +61,6 @@ impl<'a> ParserImpl<'a> {
self.lexer.get_template_string(self.token.start())
}

/// Peek next token, returns EOF for final peek
#[inline]
pub(crate) fn peek_token(&mut self) -> Token {
self.lexer.lookahead(1)
}

/// Peek next kind, returns EOF for final peek
#[inline]
#[expect(dead_code)]
pub(crate) fn peek_kind(&mut self) -> Kind {
self.peek_token().kind()
}

/// Peek at kind
#[inline]
pub(crate) fn peek_at(&mut self, kind: Kind) -> bool {
self.peek_token().kind() == kind
}

/// Peek nth token
#[inline]
pub(crate) fn nth(&mut self, n: u8) -> Token {
if n == 0 {
return self.cur_token();
}
self.lexer.lookahead(n)
}

/// Peek at nth kind
#[inline]
#[expect(dead_code)]
pub(crate) fn nth_at(&mut self, n: u8, kind: Kind) -> bool {
self.nth(n).kind() == kind
}

/// Peek nth kind
#[inline]
#[expect(dead_code)]
pub(crate) fn nth_kind(&mut self, n: u8) -> Kind {
self.nth(n).kind()
}

/// Checks if the current index has token `Kind`
#[inline]
pub(crate) fn at(&self, kind: Kind) -> bool {
Expand Down Expand Up @@ -384,7 +342,13 @@ impl<'a> ParserImpl<'a> {
if first {
first = false;
} else {
if !trailing_separator && self.at(separator) && self.peek_at(close) {
if !trailing_separator
&& self.at(separator)
&& self.lookahead(|p| {
p.bump_any();
p.at(close)
})
{
Comment on lines +345 to +351
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest splitting this change out into a separate PR. Presumably these lines is where the parser perf regression is coming from. Removing the VecDeque from the lexer should be a gain in both lexer and parser benchmarks.

Then in this block of code, I think we can refactor to avoid using lookahead at all. I think that should be possible since we're immediately bumping on to next token anyway in the next line self.expect(separator). If we can do that, hopefully it'll remove the perf regression.

But it'd be useful to benchmark that in isolation without the change of removing the VecDeque also in the mix - hence why I think it'd be better in a separate PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@overlookmotel got it, I can an intermediate PR to benchmark this specifically. I was planning to write a follow-up PR to improve the performance in this function and hopefully remove the lookahead.

break;
}
self.expect(separator);
Expand Down
3 changes: 0 additions & 3 deletions crates/oxc_parser/src/lexer/jsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ impl Lexer<'_> {
}
self.consume_char();

// Clear the current lookahead `Minus` Token
self.lookahead.clear();

// Consume bytes which are part of identifier tail
let next_byte = byte_search! {
lexer: self,
Expand Down
49 changes: 0 additions & 49 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
//! * [rustc](https://github.com/rust-lang/rust/blob/1.82.0/compiler/rustc_lexer/src)
//! * [v8](https://v8.dev/blog/scanner)

use std::collections::VecDeque;

use rustc_hash::FxHashMap;

use oxc_allocator::Allocator;
Expand Down Expand Up @@ -59,12 +57,6 @@ pub enum LexerContext {
JsxAttributeValue,
}

#[derive(Debug, Clone, Copy)]
struct Lookahead<'a> {
position: SourcePosition<'a>,
token: Token,
}

pub struct Lexer<'a> {
allocator: &'a Allocator,

Expand All @@ -77,8 +69,6 @@ pub struct Lexer<'a> {

pub(crate) errors: Vec<OxcDiagnostic>,

lookahead: VecDeque<Lookahead<'a>>,

context: LexerContext,

pub(crate) trivia_builder: TriviaBuilder,
Expand Down Expand Up @@ -115,7 +105,6 @@ impl<'a> Lexer<'a> {
source_type,
token,
errors: vec![],
lookahead: VecDeque::with_capacity(4), // 4 is the maximum lookahead for TypeScript
context: LexerContext::Regular,
trivia_builder: TriviaBuilder::default(),
escaped_strings: FxHashMap::default(),
Expand Down Expand Up @@ -163,40 +152,6 @@ impl<'a> Lexer<'a> {
self.errors.truncate(checkpoint.errors_pos);
self.source.set_position(checkpoint.position);
self.token = checkpoint.token;
self.lookahead.clear();
}

/// Find the nth lookahead token lazily
pub fn lookahead(&mut self, n: u8) -> Token {
let n = n as usize;
debug_assert!(n > 0);

if let Some(lookahead) = self.lookahead.get(n - 1) {
return lookahead.token;
}

let position = self.source.position();

if let Some(lookahead) = self.lookahead.back() {
self.source.set_position(lookahead.position);
}

for _i in self.lookahead.len()..n {
let kind = self.read_next_token();
let peeked = self.finish_next(kind);
self.lookahead.push_back(Lookahead { position: self.source.position(), token: peeked });
}

// Call to `finish_next` in loop above leaves `self.token = Token::default()`.
// Only circumstance in which `self.token` wouldn't have been default at start of this
// function is if we were at very start of file, before any tokens have been read, when
// `token.is_on_new_line` is `true`. But `lookahead` isn't called before the first token is
// read, so that's not possible. So no need to restore `self.token` here.
// It's already in same state as it was at start of this function.

self.source.set_position(position);

self.lookahead[n - 1].token
}

/// Set context
Expand All @@ -206,10 +161,6 @@ impl<'a> Lexer<'a> {

/// Main entry point
pub fn next_token(&mut self) -> Token {
if let Some(lookahead) = self.lookahead.pop_front() {
self.source.set_position(lookahead.position);
return lookahead.token;
}
let kind = self.read_next_token();
self.finish_next(kind)
}
Expand Down
3 changes: 0 additions & 3 deletions crates/oxc_parser/src/lexer/punctuation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ impl Lexer<'_> {
pub(crate) fn re_lex_right_angle(&mut self) -> Token {
self.token.set_start(self.offset());
let kind = self.read_right_angle();
if kind != Kind::RAngle {
self.lookahead.clear();
}
self.finish_next(kind)
}

Expand Down
1 change: 0 additions & 1 deletion crates/oxc_parser/src/lexer/regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ impl Lexer<'_> {
},
);
let (pattern_end, flags, flags_error) = self.read_regex();
self.lookahead.clear();
let token = self.finish_next(Kind::RegExp);
(token, pattern_end, flags, flags_error)
}
Expand Down
1 change: 0 additions & 1 deletion crates/oxc_parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,6 @@ impl<'a> Lexer<'a> {
pub(crate) fn next_template_substitution_tail(&mut self) -> Token {
self.token.set_start(self.offset() - 1);
let kind = self.read_template_literal(Kind::TemplateMiddle, Kind::TemplateTail);
self.lookahead.clear();
self.finish_next(kind)
}

Expand Down
2 changes: 0 additions & 2 deletions crates/oxc_parser/src/lexer/typescript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ impl Lexer<'_> {
self.token.set_start(self.offset() - offset);
self.source.back(offset as usize - 1);
let kind = Kind::LAngle;
self.lookahead.clear();
self.finish_next(kind)
}

Expand All @@ -25,7 +24,6 @@ impl Lexer<'_> {
self.token.set_start(self.offset() - offset);
self.source.back(offset as usize - 1);
let kind = Kind::RAngle;
self.lookahead.clear();
self.finish_next(kind)
}
}
Loading