From fdfec2112863ab2a2e994433f7be69886dad4bef Mon Sep 17 00:00:00 2001
From: overlookmotel <557937+overlookmotel@users.noreply.github.com>
Date: Wed, 13 Aug 2025 15:33:20 +0000
Subject: [PATCH] refactor(lexer): simplify byte handler macros (#13057)

Pure refactor. Simplify the byte handler macros in lexer in 3 ways:

#### 1. Remove the `const BLAH: ByteHandler = { ... };` wrappers from generated code

I don't think they're necessary, and I'm not sure why I put them there in the first place.

Before:

```rs
const UNI: ByteHandler = {
    #[expect(non_snake_case)]
    fn UNI(lexer: &mut Lexer) -> Kind {
        lexer.unicode_char_handler()
    }
    UNI
};
```

After:

```rs
#[expect(non_snake_case)]
fn UNI(lexer: &mut Lexer) -> Kind {
    lexer.unicode_char_handler()
}
```

Each byte handler still has a useful name in CodSpeed flame graphs after this change.

#### 2. Remove the `byte_handler!` macro.

`ascii_byte_handler!` and `ascii_identifier_handler!` macros remain, but they no longer use `byte_handler!` macro internally.

Removing the macro-within-macro pattern may make this code easier for AI to understand (and probably ditto for humans!).

#### 3. Shorten macro expansion

Hoist `use oxc_data_structures::assert_unchecked;` to top of file, so it doesn't need to be repeated in each macro expansion.
---
 crates/oxc_parser/src/lexer/byte_handlers.rs | 112 +++++++------------
 1 file changed, 38 insertions(+), 74 deletions(-)

diff --git a/crates/oxc_parser/src/lexer/byte_handlers.rs b/crates/oxc_parser/src/lexer/byte_handlers.rs
index e2c0b0a7853fa..901806bc7ad19 100644
--- a/crates/oxc_parser/src/lexer/byte_handlers.rs
+++ b/crates/oxc_parser/src/lexer/byte_handlers.rs
@@ -1,3 +1,5 @@
+use oxc_data_structures::assert_unchecked;
+
 use crate::diagnostics;
 
 use super::{Kind, Lexer};
@@ -41,50 +43,14 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [
     UNI, UNI, UNI, UNI, UNI, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, UER, // F
 ];
 
-/// Macro for defining a byte handler.
-///
-/// Use `ascii_byte_handler!` macro for ASCII characters, which adds optimizations for ASCII.
-///
-/// Handlers are defined as functions instead of closures, so they have names in flame graphs.
-///
-/// ```
-/// byte_handler!(UNI(lexer) {
-///   lexer.unicode_char_handler()
-/// });
-/// ```
-///
-/// expands to:
-///
-/// ```
-/// const UNI: ByteHandler = {
-///   #[expect(non_snake_case)]
-///   fn UNI(lexer: &mut Lexer) -> Kind {
-///     lexer.unicode_char_handler()
-///   }
-///   UNI
-/// };
-/// ```
-macro_rules! byte_handler {
-    ($id:ident($lex:ident) $body:expr) => {
-        const $id: ByteHandler = {
-            #[expect(non_snake_case)]
-            fn $id($lex: &mut Lexer) -> Kind {
-                $body
-            }
-            $id
-        };
-    };
-}
-
 /// Macro for defining byte handler for an ASCII character.
 ///
-/// In addition to defining a `const` for the handler, it also asserts that lexer
-/// is not at end of file, and that next char is ASCII.
+/// Asserts that lexer is not at end of file, and that next char is ASCII.
 /// Where the handler is for an ASCII character, these assertions are self-evidently true.
 ///
 /// These assertions produce no runtime code, but hint to the compiler that it can assume that
 /// next char is ASCII, and it uses that information to optimize the rest of the handler.
-/// e.g. `lexer.consume_char()` becomes just a single assembler instruction.
+/// e.g. `lexer.consume_char()` becomes just a single assembly instruction.
 /// Without the assertions, the compiler is unable to deduce the next char is ASCII, due to
 /// the indirection of the `BYTE_HANDLERS` jump table.
 ///
@@ -95,42 +61,38 @@ macro_rules! byte_handler {
 ///
 /// ```
 /// ascii_byte_handler!(SPS(lexer) {
-///   lexer.consume_char();
-///   Kind::WhiteSpace
+///     lexer.consume_char();
+///     Kind::WhiteSpace
 /// });
 /// ```
 ///
 /// expands to:
 ///
 /// ```
-/// const SPS: ByteHandler = {
-///   #[expect(non_snake_case)]
-///   fn SPS(lexer: &mut Lexer) {
+/// #[expect(non_snake_case)]
+/// fn SPS(lexer: &mut Lexer) {
 ///     // SAFETY: This macro is only used for ASCII characters
 ///     unsafe {
-///       use oxc_data_structures::assert_unchecked;
-///       assert_unchecked!(!lexer.source.is_eof());
-///       assert_unchecked!(lexer.source.peek_byte_unchecked() < 128);
+///         assert_unchecked!(!lexer.source.is_eof());
+///         assert_unchecked!(lexer.source.peek_byte_unchecked() < 128);
 ///     }
 ///     {
-///       lexer.consume_char();
-///       Kind::WhiteSpace
+///         lexer.consume_char();
+///         Kind::WhiteSpace
 ///     }
-///   }
-///   SPS
-/// };
+/// }
 /// ```
 macro_rules! ascii_byte_handler {
     ($id:ident($lex:ident) $body:expr) => {
-        byte_handler!($id($lex) {
+        #[expect(non_snake_case)]
+        fn $id($lex: &mut Lexer) -> Kind {
             // SAFETY: This macro is only used for ASCII characters
             unsafe {
-                use oxc_data_structures::assert_unchecked;
                 assert_unchecked!(!$lex.source.is_eof());
                 assert_unchecked!($lex.source.peek_byte_unchecked() < 128);
             }
             $body
-        });
+        }
     };
 }
 
@@ -148,36 +110,34 @@ macro_rules! ascii_byte_handler {
 ///
 /// ```
 /// ascii_identifier_handler!(L_G(id_without_first_char) match id_without_first_char {
-///   "et" => Kind::Get,
-///   "lobal" => Kind::Global,
-///   _ => Kind::Ident,
+///     "et" => Kind::Get,
+///     "lobal" => Kind::Global,
+///     _ => Kind::Ident,
 /// });
 /// ```
 ///
 /// expands to:
 ///
 /// ```
-/// const L_G: ByteHandler = {
-///   #[expect(non_snake_case)]
-///   fn L_G(lexer: &mut Lexer) -> Kind {
+/// #[expect(non_snake_case)]
+/// fn L_G(lexer: &mut Lexer) -> Kind {
 ///     // SAFETY: This macro is only used for ASCII characters
 ///     let id_without_first_char = unsafe { lexer.identifier_name_handler() };
 ///     match id_without_first_char {
-///       "et" => Kind::Get,
-///       "lobal" => Kind::Global,
-///       _ => Kind::Ident,
+///         "et" => Kind::Get,
+///         "lobal" => Kind::Global,
+///         _ => Kind::Ident,
 ///     }
-///   }
-///   L_G
-/// };
+/// }
 /// ```
 macro_rules! ascii_identifier_handler {
     ($id:ident($str:ident) $body:expr) => {
-        byte_handler!($id(lexer) {
+        #[expect(non_snake_case)]
+        fn $id(lexer: &mut Lexer) -> Kind {
             // SAFETY: This macro is only used for ASCII characters
             let $str = unsafe { lexer.identifier_name_handler() };
             $body
-        });
+        }
     };
 }
 
@@ -687,17 +647,21 @@ ascii_identifier_handler!(L_Y(id_without_first_char) match id_without_first_char
 });
 
 // Non-ASCII characters.
-// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
-byte_handler!(UNI(lexer) {
+//
+// Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII chars.
+#[expect(non_snake_case)]
+fn UNI(lexer: &mut Lexer) -> Kind {
     lexer.unicode_char_handler()
-});
+}
 
 // UTF-8 continuation bytes (0x80 - 0xBF) (i.e. middle of a multi-byte UTF-8 sequence)
 // + and byte values which are not legal in UTF-8 strings (0xC0, 0xC1, 0xF5 - 0xFF).
 // `handle_byte()` should only be called with 1st byte of a valid UTF-8 character,
 // so something has gone wrong if we get here.
 // https://datatracker.ietf.org/doc/html/rfc3629
-// NB: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
-byte_handler!(UER(_lexer) {
+//
+// Note: Must not use `ascii_byte_handler!` macro, as this handler is for non-ASCII bytes.
+#[expect(non_snake_case)]
+fn UER(_lexer: &mut Lexer) -> Kind {
     unreachable!();
-});
+}