From 9eb16b33b6cc7c3a91fe8490dd94216a795e7107 Mon Sep 17 00:00:00 2001
From: Boshen <boshenc@gmail.com>
Date: Sat, 7 Feb 2026 01:28:54 +0000
Subject: [PATCH] perf(syntax): pack ASCII identifier tables into single
 bitflag table (#19088)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

- Replace two `[bool; 128]` tables (`ASCII_START`, `ASCII_CONTINUE` — 256 bytes, 4 cache lines) with one `[u8; 128]` bitflag table (`ASCII_ID_FLAGS` — 128 bytes, 2 cache lines)
- Each byte packs `ID_START` (bit 0) and `ID_CONTINUE` (bit 1), so both `is_identifier_start_ascii` and `is_identifier_part_ascii` hit the same cache line for any given character
- Remove `unsafe { assert_unchecked! }` calls in the bulk `is_identifier_name` by indexing the flags table directly with byte values

🤖 Generated with [Claude Code](https://claude.com/claude-code)
---
 Cargo.lock                          |  1 -
 crates/oxc_syntax/Cargo.toml        |  1 -
 crates/oxc_syntax/src/identifier.rs | 60 ++++++++++-------------------
 3 files changed, 21 insertions(+), 41 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index ad6e055181040..dfb9f793a914a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2354,7 +2354,6 @@ dependencies = [
  "nonmax",
  "oxc_allocator",
  "oxc_ast_macros",
- "oxc_data_structures",
  "oxc_estree",
  "oxc_index",
  "oxc_span",
diff --git a/crates/oxc_syntax/Cargo.toml b/crates/oxc_syntax/Cargo.toml
index c7947f42ccef5..ecb075823aabe 100644
--- a/crates/oxc_syntax/Cargo.toml
+++ b/crates/oxc_syntax/Cargo.toml
@@ -22,7 +22,6 @@ doctest = false
 [dependencies]
 oxc_allocator = { workspace = true }
 oxc_ast_macros = { workspace = true }
-oxc_data_structures = { workspace = true, features = ["assert_unchecked"] }
 oxc_estree = { workspace = true }
 oxc_index = { workspace = true }
 oxc_span = { workspace = true }
diff --git a/crates/oxc_syntax/src/identifier.rs b/crates/oxc_syntax/src/identifier.rs
index 5c35377cd4305..f8348db954fb0 100644
--- a/crates/oxc_syntax/src/identifier.rs
+++ b/crates/oxc_syntax/src/identifier.rs
@@ -2,8 +2,6 @@
 
 use unicode_id_start::{is_id_continue_unicode, is_id_start_unicode};
 
-use oxc_data_structures::assert_unchecked;
-
 use crate::line_terminator::{CR, LF, LS, PS};
 
 pub const EOF: char = '\0';
@@ -83,38 +81,26 @@ pub fn is_white_space_single_line(c: char) -> bool {
     matches!(c, SP | TAB) || is_irregular_whitespace(c)
 }
 
-const XX: bool = true;
-const __: bool = false;
+const ID_START: u8 = 1;
+const ID_CONTINUE: u8 = 2;
 
 #[repr(C, align(64))]
 pub struct Align64<T>(pub(crate) T);
 
-// `a`-`z`, `A`-`Z`, `$` (0x24), `_` (0x5F)
-#[rustfmt::skip]
-pub static ASCII_START: Align64<[bool; 128]> = Align64([
-//  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F   //
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
-    __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, // 2
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
-    __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 4
-    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, // 5
-    __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 6
-    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
-]);
-
-// `ASCII_START` + `0`-`9`
+// Packed: ID_START | ID_CONTINUE per ASCII byte.
+// `a`-`z`, `A`-`Z`, `$`, `_` get ID_START | ID_CONTINUE (3).
+// `0`-`9` get ID_CONTINUE only (2).
 #[rustfmt::skip]
-pub static ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
-//  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F   //
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
-    __, __, __, __, XX, __, __, __, __, __, __, __, __, __, __, __, // 2
-    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, __, // 3
-    __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 4
-    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, XX, // 5
-    __, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 6
-    XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, __, __, __, __, __, // 7
+pub static ASCII_ID_FLAGS: Align64<[u8; 128]> = Align64([
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  //
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
+    0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2  $ = 3
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, // 3  0-9 = 2
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 4  A-Z = 3
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, // 5  _ = 3
+    0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 6  a-z = 3
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, // 7
 ]);
 
 /// Section 12.7 Detect `IdentifierStartChar`
@@ -128,7 +114,7 @@ pub fn is_identifier_start(c: char) -> bool {
 
 #[inline]
 pub fn is_identifier_start_ascii(c: char) -> bool {
-    ASCII_START.0[c as usize]
+    ASCII_ID_FLAGS.0[c as usize] & ID_START != 0
 }
 
 #[inline]
@@ -148,7 +134,7 @@ pub fn is_identifier_part(c: char) -> bool {
 
 #[inline]
 pub fn is_identifier_part_ascii(c: char) -> bool {
-    ASCII_CONTINUE.0[c as usize]
+    ASCII_ID_FLAGS.0[c as usize] & ID_CONTINUE != 0
 }
 
 #[inline]
@@ -171,7 +157,7 @@ pub fn is_identifier_name(name: &str) -> bool {
 
     let mut chars = if first_byte.is_ascii() {
         // First byte is ASCII
-        if !is_identifier_start_ascii(first_byte as char) {
+        if ASCII_ID_FLAGS.0[first_byte as usize] & ID_START == 0 {
             return false;
         }
 
@@ -196,9 +182,7 @@ pub fn is_identifier_name(name: &str) -> bool {
 
                 let next8 = next8_as_u64.to_ne_bytes();
                 for b in next8 {
-                    // SAFETY: We just checked all these bytes are ASCII
-                    unsafe { assert_unchecked!(b.is_ascii()) };
-                    if !is_identifier_part_ascii(b as char) {
+                    if ASCII_ID_FLAGS.0[b as usize] & ID_CONTINUE == 0 {
                         return false;
                     }
                 }
@@ -221,9 +205,7 @@ pub fn is_identifier_name(name: &str) -> bool {
 
                 let next4 = next4_as_u32.to_ne_bytes();
                 for b in next4 {
-                    // SAFETY: We just checked all these bytes are ASCII
-                    unsafe { assert_unchecked!(b.is_ascii()) };
-                    if !is_identifier_part_ascii(b as char) {
+                    if ASCII_ID_FLAGS.0[b as usize] & ID_CONTINUE == 0 {
                         return false;
                     }
                 }
@@ -237,7 +219,7 @@ pub fn is_identifier_name(name: &str) -> bool {
                     };
 
                     if b.is_ascii() {
-                        if !is_identifier_part_ascii(b as char) {
+                        if ASCII_ID_FLAGS.0[b as usize] & ID_CONTINUE == 0 {
                             return false;
                         }
                     } else {