From 14c7d4a6b1f32663edc9af901d43a60f74cca2b6 Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Sun, 30 Nov 2025 02:17:18 +0530 Subject: [PATCH 1/7] perf(lexer): store escaped identifiers in a `Vec` --- crates/oxc_parser/src/cursor.rs | 2 +- crates/oxc_parser/src/lexer/identifier.rs | 2 +- crates/oxc_parser/src/lexer/mod.rs | 17 ++--- crates/oxc_parser/src/lexer/string.rs | 15 +++-- crates/oxc_parser/src/lexer/template.rs | 16 +++-- crates/oxc_parser/src/lexer/token.rs | 79 ++++++++++++++--------- 6 files changed, 80 insertions(+), 51 deletions(-) diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 0216002e49423..79a855aed759c 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -76,7 +76,7 @@ impl<'a> ParserImpl<'a> { /// Get current template string pub(crate) fn cur_template_string(&self) -> Option<&'a str> { - self.lexer.get_template_string(self.token.start()) + self.lexer.get_template_string(self.token) } /// Checks if the current index has token `Kind` diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index c08957cddede4..38cad8066a88d 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -204,7 +204,7 @@ impl<'a> Lexer<'a> { } } - // Convert `str` to arena slice and save to `escaped_strings` + // Convert `str` to arena slice and push to `escaped_strings` Vec let id = str.into_str(); self.save_string(true, id); id diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index ce91d1b015dbb..0d8d5b5d8422b 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -5,8 +5,6 @@ //! * [rustc](https://github.com/rust-lang/rust/blob/1.82.0/compiler/rustc_lexer/src) //! * [v8](https://v8.dev/blog/scanner) -use rustc_hash::FxHashMap; - use oxc_allocator::Allocator; use oxc_ast::ast::RegExpFlags; use oxc_diagnostics::OxcDiagnostic; @@ -77,12 +75,14 @@ pub struct Lexer<'a> { pub(crate) trivia_builder: TriviaBuilder, - /// Data store for escaped strings, indexed by [Token::start] when [Token::escaped] is true - pub escaped_strings: FxHashMap, + /// Data store for escaped strings. + /// Index 0 is always an empty string, meaning not escaped. + pub escaped_strings: Vec<&'a str>, - /// Data store for escaped templates, indexed by [Token::start] when [Token::escaped] is true + /// Data store for escaped templates. Index 0 is always `None`, meaning not escaped. + /// Token.escape_index points into this Vec. /// `None` is saved when the string contains an invalid escape sequence. - pub escaped_templates: FxHashMap>, + pub escaped_templates: Vec>, /// `memchr` Finder for end of multi-line comments. Created lazily when first used. multi_line_comment_end_finder: Option>, @@ -111,8 +111,9 @@ impl<'a> Lexer<'a> { errors: vec![], context: LexerContext::Regular, trivia_builder: TriviaBuilder::default(), - escaped_strings: FxHashMap::default(), - escaped_templates: FxHashMap::default(), + // Initialize with sentinel dummy entry at index 0 + escaped_strings: vec![""], + escaped_templates: vec![None], multi_line_comment_end_finder: None, } } diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index 9bfacdb040d61..6a720a596a93f 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -194,7 +194,7 @@ macro_rules! handle_string_literal_escape { return Kind::Undetermined; } - // Convert `str` to arena slice and save to `escaped_strings` + // Convert `str` to arena slice and push to `escaped_strings` Vec $lexer.save_string(true, str.into_str()); Kind::Str @@ -246,13 +246,18 @@ impl<'a> Lexer<'a> { #[cold] fn save_escaped_string(&mut self, s: &'a str) { - self.escaped_strings.insert(self.token.start(), s); - self.token.set_escaped(true); + self.escaped_strings.push(s); + // We are _probably_ not going to have to deal with more than 4.3 billion escaped + // identifiers in a single file + #[expect(clippy::cast_possible_truncation)] + let index = (self.escaped_strings.len() - 1) as u32; + self.token.set_escape_index(index); } pub(crate) fn get_string(&self, token: Token) -> &'a str { - if token.escaped() { - return self.escaped_strings[&token.start()]; + let escape_index = token.escape_index(); + if escape_index != 0 { + return self.escaped_strings[escape_index as usize]; } let raw = &self.source.whole()[token.start() as usize..token.end() as usize]; diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index 41d93e8d300d8..047a7f35c732a 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -395,12 +395,18 @@ impl<'a> Lexer<'a> { /// Save escaped template string fn save_template_string(&mut self, is_valid_escape_sequence: bool, s: &'a str) { - self.escaped_templates.insert(self.token.start(), is_valid_escape_sequence.then_some(s)); - self.token.set_escaped(true); + self.escaped_templates.push(is_valid_escape_sequence.then_some(s)); + #[expect(clippy::cast_possible_truncation)] + let index = (self.escaped_templates.len() - 1) as u32; + self.token.set_escape_index(index); } - pub(crate) fn get_template_string(&self, span_start: u32) -> Option<&'a str> { - self.escaped_templates[&span_start] + pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> { + let escape_index = token.escape_index(); + if escape_index == 0 { + return None; + } + self.escaped_templates[escape_index as usize] } } @@ -448,7 +454,7 @@ mod test { token.kind(), if is_only_part { Kind::NoSubstitutionTemplate } else { Kind::TemplateHead } ); - let escaped = lexer.escaped_templates[&token.start()]; + let escaped = lexer.get_template_string(token); assert_eq!(escaped, Some(expected_escaped.as_str())); } diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 0f1e01d3602a7..3a6fea31841f2 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -11,22 +11,22 @@ use super::kind::Kind; // - Bits 32-63 (32 bits): `end` (`u32`) // - Bits 64-71 (8 bits): `kind` (`Kind`) // - Bits 72-79 (8 bits): `is_on_new_line` (`bool`) -// - Bits 80-87 (8 bits): `escaped` (`bool`) -// - Bits 88-95 (8 bits): `lone_surrogates` (`bool`) -// - Bits 96-103 (8 bits): `has_separator` (`bool`) -// - Bits 104-127 (24 bits): unused +// - Bits 80-111 (32 bits): `escape_index` (`u32`) +// - Bits 112-119 (8 bits): `lone_surrogates` (`bool`) +// - Bits 120-127 (8 bits): `has_separator` (`bool`) const START_SHIFT: usize = 0; const END_SHIFT: usize = 32; const KIND_SHIFT: usize = 64; const IS_ON_NEW_LINE_SHIFT: usize = 72; -const ESCAPED_SHIFT: usize = 80; -const LONE_SURROGATES_SHIFT: usize = 88; -const HAS_SEPARATOR_SHIFT: usize = 96; +const ESCAPE_INDEX_SHIFT: usize = 80; +const LONE_SURROGATES_SHIFT: usize = 112; +const HAS_SEPARATOR_SHIFT: usize = 120; const START_MASK: u128 = 0xFFFF_FFFF; // 32 bits const END_MASK: u128 = 0xFFFF_FFFF; // 32 bits const KIND_MASK: u128 = 0xFF; // 8 bits +const ESCAPE_INDEX_MASK: u128 = 0xFFFF_FFFF; // 32 bits const BOOL_MASK: u128 = 0xFF; // 8 bits const _: () = { @@ -36,7 +36,7 @@ const _: () = { } assert!(is_valid_shift(IS_ON_NEW_LINE_SHIFT)); - assert!(is_valid_shift(ESCAPED_SHIFT)); + assert!(is_valid_shift(ESCAPE_INDEX_SHIFT)); assert!(is_valid_shift(LONE_SURROGATES_SHIFT)); assert!(is_valid_shift(HAS_SEPARATOR_SHIFT)); }; @@ -53,7 +53,7 @@ impl Default for Token { // end: 0, // kind: Kind::default(), // is_on_new_line: false, - // escaped: false, + // escape_index: 0, // lone_surrogates: false, // has_separator: false, const _: () = assert!(Kind::Eof as u8 == 0); @@ -68,7 +68,7 @@ impl fmt::Debug for Token { .field("start", &self.start()) .field("end", &self.end()) .field("is_on_new_line", &self.is_on_new_line()) - .field("escaped", &self.escaped()) + .field("escape_index", &self.escape_index()) .field("lone_surrogates", &self.lone_surrogates()) .field("has_separator", &self.has_separator()) .finish() @@ -149,18 +149,21 @@ impl Token { self.0 |= u128::from(value) << IS_ON_NEW_LINE_SHIFT; } + /// Returns true if this token has an escape sequence (i.e., escape_index > 0) #[inline] pub fn escaped(&self) -> bool { - // Use a pointer read rather than arithmetic as it produces less instructions. - // SAFETY: 8 bits starting at `ESCAPED_SHIFT` are only set in `Token::default` and - // `Token::set_escaped`. Both only set these bits to 0 or 1, so valid to read as a `bool`. - unsafe { self.read_bool(ESCAPED_SHIFT) } + self.escape_index() != 0 } #[inline] - pub(crate) fn set_escaped(&mut self, escaped: bool) { - self.0 &= !(BOOL_MASK << ESCAPED_SHIFT); // Clear current `escaped` bits - self.0 |= u128::from(escaped) << ESCAPED_SHIFT; + pub fn escape_index(&self) -> u32 { + ((self.0 >> ESCAPE_INDEX_SHIFT) & ESCAPE_INDEX_MASK) as u32 + } + + #[inline] + pub(crate) fn set_escape_index(&mut self, index: u32) { + self.0 &= !(ESCAPE_INDEX_MASK << ESCAPE_INDEX_SHIFT); // Clear current `escape_index` bits + self.0 |= u128::from(index) << ESCAPE_INDEX_SHIFT; } #[inline] @@ -256,6 +259,7 @@ mod test { assert_eq!(token.end(), 0); assert_eq!(token.kind(), Kind::Eof); // Kind::default() is Eof assert!(!token.is_on_new_line()); + assert_eq!(token.escape_index(), 0); assert!(!token.escaped()); assert!(!token.lone_surrogates()); assert!(!token.has_separator()); @@ -268,6 +272,7 @@ mod test { assert_eq!(token.end(), 0); assert_eq!(token.kind(), Kind::Eof); assert!(token.is_on_new_line()); + assert_eq!(token.escape_index(), 0); assert!(!token.escaped()); assert!(!token.lone_surrogates()); assert!(!token.has_separator()); @@ -279,7 +284,7 @@ mod test { let start = 100u32; let end = start + 5u32; let is_on_new_line = true; - let escaped = false; + let escape_index = 5u32; let lone_surrogates = true; let has_separator = false; @@ -288,7 +293,7 @@ mod test { token.set_start(start); token.set_end(end); token.set_is_on_new_line(is_on_new_line); - token.set_escaped(escaped); + token.set_escape_index(escape_index); token.set_lone_surrogates(lone_surrogates); if has_separator { // Assuming set_has_separator is not always called if false @@ -299,7 +304,8 @@ mod test { assert_eq!(token.start(), start); assert_eq!(token.end(), end); assert_eq!(token.is_on_new_line(), is_on_new_line); - assert_eq!(token.escaped(), escaped); + assert_eq!(token.escape_index(), escape_index); + assert!(token.escaped()); assert_eq!(token.lone_surrogates(), lone_surrogates); assert_eq!(token.has_separator(), has_separator); } @@ -310,9 +316,10 @@ mod test { token.set_kind(Kind::Ident); token.set_start(10); token.set_end(15); - // is_on_new_line, escaped, lone_surrogates, has_separator are false by default from Token::default() + // is_on_new_line, escape_index, lone_surrogates, has_separator are 0/false by default from Token::default() assert_eq!(token.start(), 10); + assert_eq!(token.escape_index(), 0); assert!(!token.escaped()); assert!(!token.is_on_new_line()); assert!(!token.lone_surrogates()); @@ -334,34 +341,37 @@ mod test { token_with_flags.set_start(30); token_with_flags.set_end(33); token_with_flags.set_is_on_new_line(true); - token_with_flags.set_escaped(true); + token_with_flags.set_escape_index(42); token_with_flags.set_lone_surrogates(true); token_with_flags.set_has_separator(true); token_with_flags.set_start(40); assert_eq!(token_with_flags.start(), 40); assert!(token_with_flags.is_on_new_line()); + assert_eq!(token_with_flags.escape_index(), 42); assert!(token_with_flags.escaped()); assert!(token_with_flags.lone_surrogates()); assert!(token_with_flags.has_separator()); - // Test that other flags are not affected by set_escaped + // Test that other flags are not affected by set_escape_index let mut token_with_flags2 = Token::default(); token_with_flags2.set_kind(Kind::Str); token_with_flags2.set_start(50); token_with_flags2.set_end(52); token_with_flags2.set_is_on_new_line(true); - // escaped is false by default + // escape_index is 0 by default token_with_flags2.set_lone_surrogates(true); token_with_flags2.set_has_separator(true); - token_with_flags2.set_escaped(true); + token_with_flags2.set_escape_index(10); assert_eq!(token_with_flags2.start(), 50); assert!(token_with_flags2.is_on_new_line()); + assert_eq!(token_with_flags2.escape_index(), 10); assert!(token_with_flags2.escaped()); assert!(token_with_flags2.lone_surrogates()); assert!(token_with_flags2.has_separator()); - token_with_flags2.set_escaped(false); + token_with_flags2.set_escape_index(0); + assert_eq!(token_with_flags2.escape_index(), 0); assert!(!token_with_flags2.escaped()); assert!(token_with_flags2.is_on_new_line()); // Check again assert!(token_with_flags2.lone_surrogates()); // Check again @@ -373,18 +383,20 @@ mod test { token_flags_test_newline.set_start(60); token_flags_test_newline.set_end(62); // is_on_new_line is false by default - token_flags_test_newline.set_escaped(true); + token_flags_test_newline.set_escape_index(7); token_flags_test_newline.set_lone_surrogates(true); token_flags_test_newline.set_has_separator(true); token_flags_test_newline.set_is_on_new_line(true); assert!(token_flags_test_newline.is_on_new_line()); assert_eq!(token_flags_test_newline.start(), 60); + assert_eq!(token_flags_test_newline.escape_index(), 7); assert!(token_flags_test_newline.escaped()); assert!(token_flags_test_newline.lone_surrogates()); assert!(token_flags_test_newline.has_separator()); token_flags_test_newline.set_is_on_new_line(false); assert!(!token_flags_test_newline.is_on_new_line()); + assert_eq!(token_flags_test_newline.escape_index(), 7); assert!(token_flags_test_newline.escaped()); assert!(token_flags_test_newline.lone_surrogates()); assert!(token_flags_test_newline.has_separator()); @@ -395,7 +407,7 @@ mod test { token_flags_test_lone_surrogates.set_start(70); token_flags_test_lone_surrogates.set_end(72); token_flags_test_lone_surrogates.set_is_on_new_line(true); - token_flags_test_lone_surrogates.set_escaped(true); + token_flags_test_lone_surrogates.set_escape_index(3); // lone_surrogates is false by default token_flags_test_lone_surrogates.set_has_separator(true); @@ -403,11 +415,13 @@ mod test { assert!(token_flags_test_lone_surrogates.lone_surrogates()); assert_eq!(token_flags_test_lone_surrogates.start(), 70); assert!(token_flags_test_lone_surrogates.is_on_new_line()); + assert_eq!(token_flags_test_lone_surrogates.escape_index(), 3); assert!(token_flags_test_lone_surrogates.escaped()); assert!(token_flags_test_lone_surrogates.has_separator()); token_flags_test_lone_surrogates.set_lone_surrogates(false); assert!(!token_flags_test_lone_surrogates.lone_surrogates()); assert!(token_flags_test_lone_surrogates.is_on_new_line()); + assert_eq!(token_flags_test_lone_surrogates.escape_index(), 3); assert!(token_flags_test_lone_surrogates.escaped()); assert!(token_flags_test_lone_surrogates.has_separator()); } @@ -423,12 +437,15 @@ mod test { } #[test] - fn escaped() { + fn escape_index() { let mut token = Token::default(); + assert_eq!(token.escape_index(), 0); assert!(!token.escaped()); - token.set_escaped(true); + token.set_escape_index(5); + assert_eq!(token.escape_index(), 5); assert!(token.escaped()); - token.set_escaped(false); + token.set_escape_index(0); + assert_eq!(token.escape_index(), 0); assert!(!token.escaped()); } From 3c7da7dcd989f77e9bf483b3f5950650daee05fe Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Sun, 30 Nov 2025 12:52:48 +0530 Subject: [PATCH 2/7] `just allocs` --- tasks/track_memory_allocations/allocs_parser.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/track_memory_allocations/allocs_parser.snap b/tasks/track_memory_allocations/allocs_parser.snap index f69e9354c6605..79423a34638c9 100644 --- a/tasks/track_memory_allocations/allocs_parser.snap +++ b/tasks/track_memory_allocations/allocs_parser.snap @@ -1,14 +1,14 @@ File | File size || Sys allocs | Sys reallocs || Arena allocs | Arena reallocs | Arena bytes ------------------------------------------------------------------------------------------------------------------------------------------- -checker.ts | 2.92 MB || 9672 | 21 || 267681 | 22847 +checker.ts | 2.92 MB || 9671 | 22 || 267681 | 22847 cal.com.tsx | 1.06 MB || 2212 | 62 || 138162 | 13699 RadixUIAdoptionSection.jsx | 2.52 kB || 1 | 0 || 365 | 66 -pdf.mjs | 567.30 kB || 703 | 75 || 90678 | 8148 +pdf.mjs | 567.30 kB || 699 | 78 || 90678 | 8148 -antd.js | 6.69 MB || 7132 | 235 || 528505 | 55357 +antd.js | 6.69 MB || 7127 | 240 || 528505 | 55357 binder.ts | 193.08 kB || 530 | 7 || 16791 | 1467 From ca33ccfb64b1d75bfb69cea897a836d73a8269d7 Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:14:37 +0530 Subject: [PATCH 3/7] avoid sentinel values --- crates/oxc_parser/src/lexer/mod.rs | 10 ++++------ crates/oxc_parser/src/lexer/string.rs | 9 ++++----- crates/oxc_parser/src/lexer/template.rs | 5 ++--- crates/oxc_parser/src/lexer/token.rs | 3 +++ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 0d8d5b5d8422b..65afef755210a 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -76,11 +76,10 @@ pub struct Lexer<'a> { pub(crate) trivia_builder: TriviaBuilder, /// Data store for escaped strings. - /// Index 0 is always an empty string, meaning not escaped. pub escaped_strings: Vec<&'a str>, - /// Data store for escaped templates. Index 0 is always `None`, meaning not escaped. - /// Token.escape_index points into this Vec. + /// Data store for escaped templates. + /// `Token.escape_index` points into this Vec. /// `None` is saved when the string contains an invalid escape sequence. pub escaped_templates: Vec>, @@ -111,9 +110,8 @@ impl<'a> Lexer<'a> { errors: vec![], context: LexerContext::Regular, trivia_builder: TriviaBuilder::default(), - // Initialize with sentinel dummy entry at index 0 - escaped_strings: vec![""], - escaped_templates: vec![None], + escaped_strings: vec![], + escaped_templates: vec![], multi_line_comment_end_finder: None, } } diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs index 6a720a596a93f..de3f533492e8f 100644 --- a/crates/oxc_parser/src/lexer/string.rs +++ b/crates/oxc_parser/src/lexer/string.rs @@ -194,7 +194,7 @@ macro_rules! handle_string_literal_escape { return Kind::Undetermined; } - // Convert `str` to arena slice and push to `escaped_strings` Vec + // Convert `str` to arena slice and save to `escaped_strings` $lexer.save_string(true, str.into_str()); Kind::Str @@ -247,17 +247,16 @@ impl<'a> Lexer<'a> { #[cold] fn save_escaped_string(&mut self, s: &'a str) { self.escaped_strings.push(s); - // We are _probably_ not going to have to deal with more than 4.3 billion escaped - // identifiers in a single file + // Truncation is fine, the theoretical maximum index is ~860 million. #[expect(clippy::cast_possible_truncation)] - let index = (self.escaped_strings.len() - 1) as u32; + let index = self.escaped_strings.len() as u32; self.token.set_escape_index(index); } pub(crate) fn get_string(&self, token: Token) -> &'a str { let escape_index = token.escape_index(); if escape_index != 0 { - return self.escaped_strings[escape_index as usize]; + return self.escaped_strings[escape_index as usize - 1]; } let raw = &self.source.whole()[token.start() as usize..token.end() as usize]; diff --git a/crates/oxc_parser/src/lexer/template.rs b/crates/oxc_parser/src/lexer/template.rs index 047a7f35c732a..b66f0cfdb7333 100644 --- a/crates/oxc_parser/src/lexer/template.rs +++ b/crates/oxc_parser/src/lexer/template.rs @@ -397,8 +397,7 @@ impl<'a> Lexer<'a> { fn save_template_string(&mut self, is_valid_escape_sequence: bool, s: &'a str) { self.escaped_templates.push(is_valid_escape_sequence.then_some(s)); #[expect(clippy::cast_possible_truncation)] - let index = (self.escaped_templates.len() - 1) as u32; - self.token.set_escape_index(index); + self.token.set_escape_index(self.escaped_templates.len() as u32); } pub(crate) fn get_template_string(&self, token: Token) -> Option<&'a str> { @@ -406,7 +405,7 @@ impl<'a> Lexer<'a> { if escape_index == 0 { return None; } - self.escaped_templates[escape_index as usize] + self.escaped_templates[escape_index as usize - 1] } } diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 3a6fea31841f2..8363b1e53df34 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -155,6 +155,9 @@ impl Token { self.escape_index() != 0 } + /// Returns `0` if token has no escape sequences. + /// Returns `index + 1` (to allow for `0` to mean no escapes) + /// of the escaped string otherwise. #[inline] pub fn escape_index(&self) -> u32 { ((self.0 >> ESCAPE_INDEX_SHIFT) & ESCAPE_INDEX_MASK) as u32 From 9a2170774c795b13d9b4f785412b9114c573a823 Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:21:14 +0530 Subject: [PATCH 4/7] move `escape_index` to the last 32 bits in the token memory layout --- crates/oxc_parser/src/lexer/token.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 8363b1e53df34..375fb1fea7fc1 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -11,17 +11,17 @@ use super::kind::Kind; // - Bits 32-63 (32 bits): `end` (`u32`) // - Bits 64-71 (8 bits): `kind` (`Kind`) // - Bits 72-79 (8 bits): `is_on_new_line` (`bool`) -// - Bits 80-111 (32 bits): `escape_index` (`u32`) -// - Bits 112-119 (8 bits): `lone_surrogates` (`bool`) -// - Bits 120-127 (8 bits): `has_separator` (`bool`) +// - Bits 80-87 (8 bits): `lone_surrogates` (`bool`) +// - Bits 88-95 (8 bits): `has_separator` (`bool`) +// - Bits 96-127 (32 bits): `escape_index` (`u32`) const START_SHIFT: usize = 0; const END_SHIFT: usize = 32; const KIND_SHIFT: usize = 64; const IS_ON_NEW_LINE_SHIFT: usize = 72; -const ESCAPE_INDEX_SHIFT: usize = 80; -const LONE_SURROGATES_SHIFT: usize = 112; -const HAS_SEPARATOR_SHIFT: usize = 120; +const LONE_SURROGATES_SHIFT: usize = 80; +const HAS_SEPARATOR_SHIFT: usize = 88; +const ESCAPE_INDEX_SHIFT: usize = 96; const START_MASK: u128 = 0xFFFF_FFFF; // 32 bits const END_MASK: u128 = 0xFFFF_FFFF; // 32 bits @@ -202,7 +202,7 @@ impl Token { /// # SAFETY /// /// `shift` must be the location of a valid boolean "field" in [`Token`] - /// e.g. `ESCAPED_SHIFT`. The caller must guarantee that the 8 bits at + /// e.g. `IS_ON_NEW_LINE_SHIFT`. The caller must guarantee that the 8 bits at /// `shift` contain only 0 or 1, making it safe to read as a `bool`. /// /// # Performance analysis From 547d85d1a45c6129a6f646590765137c6e3033cb Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Sun, 30 Nov 2025 14:43:47 +0530 Subject: [PATCH 5/7] undo unimportant comment diff --- crates/oxc_parser/src/lexer/identifier.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs index 38cad8066a88d..c08957cddede4 100644 --- a/crates/oxc_parser/src/lexer/identifier.rs +++ b/crates/oxc_parser/src/lexer/identifier.rs @@ -204,7 +204,7 @@ impl<'a> Lexer<'a> { } } - // Convert `str` to arena slice and push to `escaped_strings` Vec + // Convert `str` to arena slice and save to `escaped_strings` let id = str.into_str(); self.save_string(true, id); id From 63bfece54d085532bfba7e5937d495a9014c6069 Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Mon, 1 Dec 2025 07:50:40 +0530 Subject: [PATCH 6/7] move comment and debug fields in layout order --- crates/oxc_parser/src/lexer/token.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 375fb1fea7fc1..f93ea0beb5780 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -53,9 +53,9 @@ impl Default for Token { // end: 0, // kind: Kind::default(), // is_on_new_line: false, - // escape_index: 0, // lone_surrogates: false, // has_separator: false, + // escape_index: 0, const _: () = assert!(Kind::Eof as u8 == 0); Self(0) } @@ -68,9 +68,9 @@ impl fmt::Debug for Token { .field("start", &self.start()) .field("end", &self.end()) .field("is_on_new_line", &self.is_on_new_line()) - .field("escape_index", &self.escape_index()) .field("lone_surrogates", &self.lone_surrogates()) .field("has_separator", &self.has_separator()) + .field("escape_index", &self.escape_index()) .finish() } } From 0373f168c415c2fe6452af40527a28a9a9174930 Mon Sep 17 00:00:00 2001 From: Arsh <69170106+lilnasy@users.noreply.github.com> Date: Mon, 1 Dec 2025 07:52:14 +0530 Subject: [PATCH 7/7] remove `is_valid_shift` assertion for `ESCAPE_INDEX_SHIFT` --- crates/oxc_parser/src/lexer/token.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index f93ea0beb5780..eceea9aa9c68f 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -36,7 +36,6 @@ const _: () = { } assert!(is_valid_shift(IS_ON_NEW_LINE_SHIFT)); - assert!(is_valid_shift(ESCAPE_INDEX_SHIFT)); assert!(is_valid_shift(LONE_SURROGATES_SHIFT)); assert!(is_valid_shift(HAS_SEPARATOR_SHIFT)); };