Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/oxc_parser/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl<'a> ParserImpl<'a> {

/// Get current template string
pub(crate) fn cur_template_string(&self) -> Option<&'a str> {
self.lexer.get_template_string(self.token.start())
self.lexer.get_template_string(self.token)
}

/// Checks if the current index has token `Kind`
Expand Down
15 changes: 7 additions & 8 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
//! * [rustc](https://github.com/rust-lang/rust/blob/1.82.0/compiler/rustc_lexer/src)
//! * [v8](https://v8.dev/blog/scanner)

use rustc_hash::FxHashMap;

use oxc_allocator::Allocator;
use oxc_ast::ast::RegExpFlags;
use oxc_diagnostics::OxcDiagnostic;
Expand Down Expand Up @@ -77,12 +75,13 @@ pub struct Lexer<'a> {

pub(crate) trivia_builder: TriviaBuilder,

/// Data store for escaped strings, indexed by [Token::start] when [Token::escaped] is true
pub escaped_strings: FxHashMap<u32, &'a str>,
/// Data store for escaped strings.
pub escaped_strings: Vec<&'a str>,

/// Data store for escaped templates, indexed by [Token::start] when [Token::escaped] is true
/// Data store for escaped templates.
/// `Token.escape_index` points into this Vec.
/// `None` is saved when the string contains an invalid escape sequence.
pub escaped_templates: FxHashMap<u32, Option<&'a str>>,
pub escaped_templates: Vec<Option<&'a str>>,

/// `memchr` Finder for end of multi-line comments. Created lazily when first used.
multi_line_comment_end_finder: Option<memchr::memmem::Finder<'static>>,
Expand Down Expand Up @@ -111,8 +110,8 @@ impl<'a> Lexer<'a> {
errors: vec![],
context: LexerContext::Regular,
trivia_builder: TriviaBuilder::default(),
escaped_strings: FxHashMap::default(),
escaped_templates: FxHashMap::default(),
escaped_strings: vec![],
escaped_templates: vec![],
multi_line_comment_end_finder: None,
}
}
Expand Down
12 changes: 8 additions & 4 deletions crates/oxc_parser/src/lexer/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,17 @@ impl<'a> Lexer<'a> {

#[cold]
fn save_escaped_string(&mut self, s: &'a str) {
self.escaped_strings.insert(self.token.start(), s);
self.token.set_escaped(true);
self.escaped_strings.push(s);
// Truncation is fine, the theoretical maximum index is ~860 million.
#[expect(clippy::cast_possible_truncation)]
let index = self.escaped_strings.len() as u32;
self.token.set_escape_index(index);
}

pub(crate) fn get_string(&self, token: Token) -> &'a str {
if token.escaped() {
return self.escaped_strings[&token.start()];
let escape_index = token.escape_index();
if escape_index != 0 {
return self.escaped_strings[escape_index as usize - 1];
}

let raw = &self.source.whole()[token.start() as usize..token.end() as usize];
Expand Down
15 changes: 10 additions & 5 deletions crates/oxc_parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,12 +395,17 @@ impl<'a> Lexer<'a> {

/// Save escaped template string
fn save_template_string(&mut self, is_valid_escape_sequence: bool, s: &'a str) {
self.escaped_templates.insert(self.token.start(), is_valid_escape_sequence.then_some(s));
self.token.set_escaped(true);
self.escaped_templates.push(is_valid_escape_sequence.then_some(s));
#[expect(clippy::cast_possible_truncation)]
self.token.set_escape_index(self.escaped_templates.len() as u32);
}

pub(crate) fn get_template_string(&self, span_start: u32) -> Option<&'a str> {
self.escaped_templates[&span_start]
pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> {
let escape_index = token.escape_index();
if escape_index == 0 {
return None;
}
self.escaped_templates[escape_index as usize - 1]
}
}

Expand Down Expand Up @@ -448,7 +453,7 @@ mod test {
token.kind(),
if is_only_part { Kind::NoSubstitutionTemplate } else { Kind::TemplateHead }
);
let escaped = lexer.escaped_templates[&token.start()];
let escaped = lexer.get_template_string(token);
assert_eq!(escaped, Some(expected_escaped.as_str()));
}

Expand Down
83 changes: 51 additions & 32 deletions crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ use super::kind::Kind;
// - Bits 32-63 (32 bits): `end` (`u32`)
// - Bits 64-71 (8 bits): `kind` (`Kind`)
// - Bits 72-79 (8 bits): `is_on_new_line` (`bool`)
// - Bits 80-87 (8 bits): `escaped` (`bool`)
// - Bits 88-95 (8 bits): `lone_surrogates` (`bool`)
// - Bits 96-103 (8 bits): `has_separator` (`bool`)
// - Bits 104-127 (24 bits): unused
// - Bits 80-87 (8 bits): `lone_surrogates` (`bool`)
// - Bits 88-95 (8 bits): `has_separator` (`bool`)
// - Bits 96-127 (32 bits): `escape_index` (`u32`)

const START_SHIFT: usize = 0;
const END_SHIFT: usize = 32;
const KIND_SHIFT: usize = 64;
const IS_ON_NEW_LINE_SHIFT: usize = 72;
const ESCAPED_SHIFT: usize = 80;
const LONE_SURROGATES_SHIFT: usize = 88;
const HAS_SEPARATOR_SHIFT: usize = 96;
const LONE_SURROGATES_SHIFT: usize = 80;
const HAS_SEPARATOR_SHIFT: usize = 88;
const ESCAPE_INDEX_SHIFT: usize = 96;

const START_MASK: u128 = 0xFFFF_FFFF; // 32 bits
const END_MASK: u128 = 0xFFFF_FFFF; // 32 bits
const KIND_MASK: u128 = 0xFF; // 8 bits
const ESCAPE_INDEX_MASK: u128 = 0xFFFF_FFFF; // 32 bits
const BOOL_MASK: u128 = 0xFF; // 8 bits

const _: () = {
Expand All @@ -36,7 +36,6 @@ const _: () = {
}

assert!(is_valid_shift(IS_ON_NEW_LINE_SHIFT));
assert!(is_valid_shift(ESCAPED_SHIFT));
assert!(is_valid_shift(LONE_SURROGATES_SHIFT));
assert!(is_valid_shift(HAS_SEPARATOR_SHIFT));
};
Expand All @@ -53,9 +52,9 @@ impl Default for Token {
// end: 0,
// kind: Kind::default(),
// is_on_new_line: false,
// escaped: false,
// lone_surrogates: false,
// has_separator: false,
// escape_index: 0,
const _: () = assert!(Kind::Eof as u8 == 0);
Self(0)
}
Expand All @@ -68,9 +67,9 @@ impl fmt::Debug for Token {
.field("start", &self.start())
.field("end", &self.end())
.field("is_on_new_line", &self.is_on_new_line())
.field("escaped", &self.escaped())
.field("lone_surrogates", &self.lone_surrogates())
.field("has_separator", &self.has_separator())
.field("escape_index", &self.escape_index())
.finish()
}
}
Expand Down Expand Up @@ -149,18 +148,24 @@ impl Token {
self.0 |= u128::from(value) << IS_ON_NEW_LINE_SHIFT;
}

/// Returns true if this token has an escape sequence (i.e., escape_index > 0)
#[inline]
pub fn escaped(&self) -> bool {
// Use a pointer read rather than arithmetic as it produces less instructions.
// SAFETY: 8 bits starting at `ESCAPED_SHIFT` are only set in `Token::default` and
// `Token::set_escaped`. Both only set these bits to 0 or 1, so valid to read as a `bool`.
unsafe { self.read_bool(ESCAPED_SHIFT) }
self.escape_index() != 0
}

/// Returns `0` if token has no escape sequences.
/// Returns `index + 1` (to allow for `0` to mean no escapes)
/// of the escaped string otherwise.
Comment on lines +158 to +159
Copy link

Copilot AI Nov 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The documentation is misleading. It should clarify that this returns a 1-based index into the escaped strings/templates vectors. Consider rewording to:

/// Returns `0` if token has no escape sequences.
/// Returns a 1-based index into the escaped strings/templates vector otherwise.

This makes it clearer that the returned value is directly the index to use (after subtracting 1), not index + 1.

Suggested change
/// Returns `index + 1` (to allow for `0` to mean no escapes)
/// of the escaped string otherwise.
/// Returns a 1-based index into the escaped strings/templates vector otherwise.

Copilot uses AI. Check for mistakes.
#[inline]
pub(crate) fn set_escaped(&mut self, escaped: bool) {
self.0 &= !(BOOL_MASK << ESCAPED_SHIFT); // Clear current `escaped` bits
self.0 |= u128::from(escaped) << ESCAPED_SHIFT;
pub fn escape_index(&self) -> u32 {
((self.0 >> ESCAPE_INDEX_SHIFT) & ESCAPE_INDEX_MASK) as u32
}

#[inline]
pub(crate) fn set_escape_index(&mut self, index: u32) {
self.0 &= !(ESCAPE_INDEX_MASK << ESCAPE_INDEX_SHIFT); // Clear current `escape_index` bits
self.0 |= u128::from(index) << ESCAPE_INDEX_SHIFT;
}

#[inline]
Expand Down Expand Up @@ -196,7 +201,7 @@ impl Token {
/// # SAFETY
///
/// `shift` must be the location of a valid boolean "field" in [`Token`]
/// e.g. `ESCAPED_SHIFT`. The caller must guarantee that the 8 bits at
/// e.g. `IS_ON_NEW_LINE_SHIFT`. The caller must guarantee that the 8 bits at
/// `shift` contain only 0 or 1, making it safe to read as a `bool`.
///
/// # Performance analysis
Expand Down Expand Up @@ -256,6 +261,7 @@ mod test {
assert_eq!(token.end(), 0);
assert_eq!(token.kind(), Kind::Eof); // Kind::default() is Eof
assert!(!token.is_on_new_line());
assert_eq!(token.escape_index(), 0);
assert!(!token.escaped());
assert!(!token.lone_surrogates());
assert!(!token.has_separator());
Expand All @@ -268,6 +274,7 @@ mod test {
assert_eq!(token.end(), 0);
assert_eq!(token.kind(), Kind::Eof);
assert!(token.is_on_new_line());
assert_eq!(token.escape_index(), 0);
assert!(!token.escaped());
assert!(!token.lone_surrogates());
assert!(!token.has_separator());
Expand All @@ -279,7 +286,7 @@ mod test {
let start = 100u32;
let end = start + 5u32;
let is_on_new_line = true;
let escaped = false;
let escape_index = 5u32;
let lone_surrogates = true;
let has_separator = false;

Expand All @@ -288,7 +295,7 @@ mod test {
token.set_start(start);
token.set_end(end);
token.set_is_on_new_line(is_on_new_line);
token.set_escaped(escaped);
token.set_escape_index(escape_index);
token.set_lone_surrogates(lone_surrogates);
if has_separator {
// Assuming set_has_separator is not always called if false
Expand All @@ -299,7 +306,8 @@ mod test {
assert_eq!(token.start(), start);
assert_eq!(token.end(), end);
assert_eq!(token.is_on_new_line(), is_on_new_line);
assert_eq!(token.escaped(), escaped);
assert_eq!(token.escape_index(), escape_index);
assert!(token.escaped());
assert_eq!(token.lone_surrogates(), lone_surrogates);
assert_eq!(token.has_separator(), has_separator);
}
Expand All @@ -310,9 +318,10 @@ mod test {
token.set_kind(Kind::Ident);
token.set_start(10);
token.set_end(15);
// is_on_new_line, escaped, lone_surrogates, has_separator are false by default from Token::default()
// is_on_new_line, escape_index, lone_surrogates, has_separator are 0/false by default from Token::default()

assert_eq!(token.start(), 10);
assert_eq!(token.escape_index(), 0);
assert!(!token.escaped());
assert!(!token.is_on_new_line());
assert!(!token.lone_surrogates());
Expand All @@ -334,34 +343,37 @@ mod test {
token_with_flags.set_start(30);
token_with_flags.set_end(33);
token_with_flags.set_is_on_new_line(true);
token_with_flags.set_escaped(true);
token_with_flags.set_escape_index(42);
token_with_flags.set_lone_surrogates(true);
token_with_flags.set_has_separator(true);

token_with_flags.set_start(40);
assert_eq!(token_with_flags.start(), 40);
assert!(token_with_flags.is_on_new_line());
assert_eq!(token_with_flags.escape_index(), 42);
assert!(token_with_flags.escaped());
assert!(token_with_flags.lone_surrogates());
assert!(token_with_flags.has_separator());

// Test that other flags are not affected by set_escaped
// Test that other flags are not affected by set_escape_index
let mut token_with_flags2 = Token::default();
token_with_flags2.set_kind(Kind::Str);
token_with_flags2.set_start(50);
token_with_flags2.set_end(52);
token_with_flags2.set_is_on_new_line(true);
// escaped is false by default
// escape_index is 0 by default
token_with_flags2.set_lone_surrogates(true);
token_with_flags2.set_has_separator(true);

token_with_flags2.set_escaped(true);
token_with_flags2.set_escape_index(10);
assert_eq!(token_with_flags2.start(), 50);
assert!(token_with_flags2.is_on_new_line());
assert_eq!(token_with_flags2.escape_index(), 10);
assert!(token_with_flags2.escaped());
assert!(token_with_flags2.lone_surrogates());
assert!(token_with_flags2.has_separator());
token_with_flags2.set_escaped(false);
token_with_flags2.set_escape_index(0);
assert_eq!(token_with_flags2.escape_index(), 0);
assert!(!token_with_flags2.escaped());
assert!(token_with_flags2.is_on_new_line()); // Check again
assert!(token_with_flags2.lone_surrogates()); // Check again
Expand All @@ -373,18 +385,20 @@ mod test {
token_flags_test_newline.set_start(60);
token_flags_test_newline.set_end(62);
// is_on_new_line is false by default
token_flags_test_newline.set_escaped(true);
token_flags_test_newline.set_escape_index(7);
token_flags_test_newline.set_lone_surrogates(true);
token_flags_test_newline.set_has_separator(true);

token_flags_test_newline.set_is_on_new_line(true);
assert!(token_flags_test_newline.is_on_new_line());
assert_eq!(token_flags_test_newline.start(), 60);
assert_eq!(token_flags_test_newline.escape_index(), 7);
assert!(token_flags_test_newline.escaped());
assert!(token_flags_test_newline.lone_surrogates());
assert!(token_flags_test_newline.has_separator());
token_flags_test_newline.set_is_on_new_line(false);
assert!(!token_flags_test_newline.is_on_new_line());
assert_eq!(token_flags_test_newline.escape_index(), 7);
assert!(token_flags_test_newline.escaped());
assert!(token_flags_test_newline.lone_surrogates());
assert!(token_flags_test_newline.has_separator());
Expand All @@ -395,19 +409,21 @@ mod test {
token_flags_test_lone_surrogates.set_start(70);
token_flags_test_lone_surrogates.set_end(72);
token_flags_test_lone_surrogates.set_is_on_new_line(true);
token_flags_test_lone_surrogates.set_escaped(true);
token_flags_test_lone_surrogates.set_escape_index(3);
// lone_surrogates is false by default
token_flags_test_lone_surrogates.set_has_separator(true);

token_flags_test_lone_surrogates.set_lone_surrogates(true);
assert!(token_flags_test_lone_surrogates.lone_surrogates());
assert_eq!(token_flags_test_lone_surrogates.start(), 70);
assert!(token_flags_test_lone_surrogates.is_on_new_line());
assert_eq!(token_flags_test_lone_surrogates.escape_index(), 3);
assert!(token_flags_test_lone_surrogates.escaped());
assert!(token_flags_test_lone_surrogates.has_separator());
token_flags_test_lone_surrogates.set_lone_surrogates(false);
assert!(!token_flags_test_lone_surrogates.lone_surrogates());
assert!(token_flags_test_lone_surrogates.is_on_new_line());
assert_eq!(token_flags_test_lone_surrogates.escape_index(), 3);
assert!(token_flags_test_lone_surrogates.escaped());
assert!(token_flags_test_lone_surrogates.has_separator());
}
Expand All @@ -423,12 +439,15 @@ mod test {
}

#[test]
fn escaped() {
fn escape_index() {
let mut token = Token::default();
assert_eq!(token.escape_index(), 0);
assert!(!token.escaped());
token.set_escaped(true);
token.set_escape_index(5);
assert_eq!(token.escape_index(), 5);
assert!(token.escaped());
token.set_escaped(false);
token.set_escape_index(0);
assert_eq!(token.escape_index(), 0);
assert!(!token.escaped());
}

Expand Down
6 changes: 3 additions & 3 deletions tasks/track_memory_allocations/allocs_parser.snap
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
File | File size || Sys allocs | Sys reallocs || Arena allocs | Arena reallocs | Arena bytes
-------------------------------------------------------------------------------------------------------------------------------------------
checker.ts | 2.92 MB || 9672 | 21 || 267681 | 22847
checker.ts | 2.92 MB || 9671 | 22 || 267681 | 22847

cal.com.tsx | 1.06 MB || 2212 | 62 || 138162 | 13699

RadixUIAdoptionSection.jsx | 2.52 kB || 1 | 0 || 365 | 66

pdf.mjs | 567.30 kB || 703 | 75 || 90678 | 8148
pdf.mjs | 567.30 kB || 699 | 78 || 90678 | 8148

antd.js | 6.69 MB || 7132 | 235 || 528505 | 55357
antd.js | 6.69 MB || 7127 | 240 || 528505 | 55357

binder.ts | 193.08 kB || 530 | 7 || 16791 | 1467

Loading