oxc-project · Boshen · Jan 23, 2026
diff --git a/crates/oxc_parser/src/lexer/identifier.rs b/crates/oxc_parser/src/lexer/identifier.rs
@@ -8,6 +8,8 @@ use oxc_syntax::identifier::{
 
 use crate::diagnostics;
 
+use oxc_span::IncrementalIdentHasher;
+
 use super::{
     Kind, Lexer, SourcePosition, cold_branch,
     search::{SafeByteMatchTable, byte_search, safe_byte_match_table},
@@ -48,16 +50,26 @@ impl<'a> Lexer<'a> {
     /// * `self.source` must not be exhausted (at least 1 char remaining).
     /// * Next char must be ASCII.
     pub(super) unsafe fn identifier_name_handler(&mut self) -> &'a str {
+        // Reset hasher and hash the first byte.
+        // SAFETY: Caller guarantees not at EOF.
+        let first_byte = unsafe { self.source.position().read() };
+        self.identifier_hasher = IncrementalIdentHasher::new();
+        self.identifier_hasher.write_byte(first_byte);
+
         // Advance past 1st byte.
         // SAFETY: Caller guarantees not at EOF, and next byte is ASCII.
         let after_first = unsafe { self.source.position().add(1) };
 
-        // Consume bytes which are part of identifier
+        // Consume bytes which are part of identifier, hashing as we go
         let next_byte = byte_search! {
             lexer: self,
             table: NOT_ASCII_ID_CONTINUE_TABLE,
             start: after_first,
+            hash_identifier: true,
             handle_eof: {
+                // Hash remaining bytes before returning
+                let remaining = self.source.str_from_pos_to_current(after_first);
+                self.identifier_hasher.write_bytes(remaining.as_bytes());
                 // Return identifier minus its first char.
                 // SAFETY: `lexer.source` is positioned at EOF, so there is no valid value
                 // of `after_first` which could be after current position.
@@ -74,15 +86,15 @@ impl<'a> Lexer<'a> {
                 // SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
                 // makes `start_pos` `source`'s position as it was at start of this function
                 let start_pos = unsafe { after_first.sub(1) };
-                &self.identifier_tail_unicode(start_pos)[1..]
+                self.identifier_tail_unicode_with_hash(start_pos)
             });
         }
         if next_byte == b'\\' {
             return cold_branch(|| {
                 // SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
                 // makes `start_pos` `source`'s position as it was at start of this function
                 let start_pos = unsafe { after_first.sub(1) };
-                &self.identifier_backslash(start_pos, false)[1..]
+                self.identifier_backslash_with_hash(start_pos)
             });
         }
 
@@ -93,6 +105,29 @@ impl<'a> Lexer<'a> {
         unsafe { self.source.str_from_pos_to_current_unchecked(after_first) }
     }
 
+    /// Handle rest of identifier after first byte of a multi-byte Unicode char found.
+    /// Continues hashing from current position. Returns identifier minus its first char.
+    fn identifier_tail_unicode_with_hash(&mut self, start_pos: SourcePosition<'a>) -> &'a str {
+        // Save position - bytes before this were already hashed by byte_search
+        let hash_start = self.source.position();
+        let id = self.identifier_tail_unicode(start_pos);
+        // Hash only the new bytes (from unicode char onwards)
+        let new_bytes = self.source.str_from_pos_to_current(hash_start);
+        self.identifier_hasher.write_bytes(new_bytes.as_bytes());
+        &id[1..]
+    }
+
+    /// Handle rest of identifier after a `\` escape is found.
+    /// Must recompute hash because escape sequences decode to different bytes.
+    /// Returns identifier minus its first char.
+    fn identifier_backslash_with_hash(&mut self, start_pos: SourcePosition<'a>) -> &'a str {
+        let id = self.identifier_backslash(start_pos, false);
+        // Must recompute: source has `\u0041` but string has `A`
+        self.identifier_hasher = IncrementalIdentHasher::new();
+        self.identifier_hasher.write_bytes(id.as_bytes());
+        &id[1..]
+    }
+
     /// Handle rest of identifier after first byte of a multi-byte Unicode char found.
     /// Any number of characters can have already been consumed from `self.source` prior to it.
     /// `self.source` should be positioned at start of Unicode character.
@@ -140,6 +175,11 @@ impl<'a> Lexer<'a> {
 
         // Process escape and get rest of identifier
         let id = self.identifier_on_backslash(str, true);
+
+        // Hash the unescaped identifier
+        self.identifier_hasher = IncrementalIdentHasher::new();
+        self.identifier_hasher.write_bytes(id.as_bytes());
+
         Kind::match_keyword(id)
     }
 

diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs
@@ -10,7 +10,7 @@ use rustc_hash::FxHashMap;
 use oxc_allocator::Allocator;
 use oxc_ast::ast::RegExpFlags;
 use oxc_diagnostics::OxcDiagnostic;
-use oxc_span::{SourceType, Span};
+use oxc_span::{IncrementalIdentHasher, SourceType, Span};
 
 use crate::{UniquePromise, diagnostics};
 
@@ -47,6 +47,7 @@ pub struct LexerCheckpoint<'a> {
     errors_snapshot: ErrorSnapshot,
     has_pure_comment: bool,
     has_no_side_effects_comment: bool,
+    identifier_hasher: IncrementalIdentHasher,
 }
 
 #[derive(Debug, Clone)]
@@ -95,6 +96,10 @@ pub struct Lexer<'a> {
 
     /// `memchr` Finder for end of multi-line comments. Created lazily when first used.
     multi_line_comment_end_finder: Option<memchr::memmem::Finder<'static>>,
+
+    /// Incremental hasher for current identifier.
+    /// Used to compute hash during lexing for efficient `Ident` creation.
+    pub(crate) identifier_hasher: IncrementalIdentHasher,
 }
 
 impl<'a> Lexer<'a> {
@@ -124,6 +129,7 @@ impl<'a> Lexer<'a> {
             escaped_strings: FxHashMap::default(),
             escaped_templates: FxHashMap::default(),
             multi_line_comment_end_finder: None,
+            identifier_hasher: IncrementalIdentHasher::new(),
         }
     }
 
@@ -165,6 +171,7 @@ impl<'a> Lexer<'a> {
             errors_snapshot,
             has_pure_comment: self.trivia_builder.has_pure_comment,
             has_no_side_effects_comment: self.trivia_builder.has_no_side_effects_comment,
+            identifier_hasher: self.identifier_hasher,
         }
     }
 
@@ -182,6 +189,7 @@ impl<'a> Lexer<'a> {
             errors_snapshot,
             has_pure_comment: self.trivia_builder.has_pure_comment,
             has_no_side_effects_comment: self.trivia_builder.has_no_side_effects_comment,
+            identifier_hasher: self.identifier_hasher,
         }
     }
 
@@ -196,6 +204,7 @@ impl<'a> Lexer<'a> {
         self.token = checkpoint.token;
         self.trivia_builder.has_pure_comment = checkpoint.has_pure_comment;
         self.trivia_builder.has_no_side_effects_comment = checkpoint.has_no_side_effects_comment;
+        self.identifier_hasher = checkpoint.identifier_hasher;
     }
 
     pub fn peek_token(&mut self) -> Token {

diff --git a/crates/oxc_parser/src/lexer/search.rs b/crates/oxc_parser/src/lexer/search.rs
@@ -414,6 +414,29 @@ macro_rules! byte_search {
         }
     };
 
+    // With provided `start` position and identifier hashing.
+    // Delegates to main implementation, then hashes the scanned bytes.
+    (
+        lexer: $lexer:ident,
+        table: $table:ident,
+        start: $start:ident,
+        hash_identifier: true,
+        handle_eof: $eof_handler:expr,
+    ) => {{
+        let hash_start = $start;
+        let result = byte_search! {
+            lexer: $lexer,
+            table: $table,
+            start: $start,
+            continue_if: (byte, pos) false,
+            handle_eof: $eof_handler,
+        };
+        // Hash the bytes that were scanned (from start to current position)
+        let scanned = $lexer.source.str_from_pos_to_current(hash_start);
+        $lexer.identifier_hasher.write_bytes(scanned.as_bytes());
+        result
+    }};
+
     // Actual implementation - with both `start` and `continue_if`
     (
         lexer: $lexer:ident,

diff --git a/crates/oxc_parser/src/lexer/string.rs b/crates/oxc_parser/src/lexer/string.rs
@@ -277,6 +277,8 @@ impl<'a> Lexer<'a> {
     /// Get the current identifier with precomputed hash.
     #[inline]
     pub(crate) fn get_ident(&self, token: Token) -> Ident<'a> {
-        Ident::new(self.get_string(token))
+        let s = self.get_string(token);
+        let hash = self.identifier_hasher.finish();
+        Ident::new_with_hash(s, hash)
     }
 }
diff --git a/crates/oxc_parser/src/lexer/unicode.rs b/crates/oxc_parser/src/lexer/unicode.rs
@@ -12,6 +12,8 @@ use oxc_syntax::{
     line_terminator::{CR, LF, LS, PS, is_irregular_line_terminator},
 };
 
+use oxc_span::IncrementalIdentHasher;
+
 use super::{Kind, Lexer, Span};
 
 /// A Unicode escape sequence.
@@ -36,7 +38,10 @@ impl<'a> Lexer<'a> {
             c if is_identifier_start_unicode(c) => {
                 let start_pos = self.source.position();
                 self.consume_char();
-                self.identifier_tail_after_unicode(start_pos);
+                let id = self.identifier_tail_after_unicode(start_pos);
+                // Hash the full identifier for get_ident()
+                self.identifier_hasher = IncrementalIdentHasher::new();
+                self.identifier_hasher.write_bytes(id.as_bytes());
                 Kind::Ident
             }
             c if is_irregular_whitespace(c) => self.handle_irregular_whitespace(c),

diff --git a/crates/oxc_span/src/lib.rs b/crates/oxc_span/src/lib.rs
@@ -11,7 +11,8 @@ mod span;
 pub use cmp::ContentEq;
 pub use oxc_str::{
     ArenaIdentHashMap, Atom, CompactStr, Ident, IdentHashMap, IdentHashSet, IdentHasher,
-    MAX_INLINE_LEN as ATOM_MAX_INLINE_LEN, format_atom, format_compact_str, format_ident,
+    IncrementalIdentHasher, MAX_INLINE_LEN as ATOM_MAX_INLINE_LEN, format_atom, format_compact_str,
+    format_ident,
 };
 pub use source_type::{
     FileExtension, Language, LanguageVariant, ModuleKind, SourceType, UnknownExtension,