From 1aed99b66d2e8bab0632643035af60a9c4c36bc1 Mon Sep 17 00:00:00 2001
From: overlookmotel <557937+overlookmotel@users.noreply.github.com>
Date: Wed, 14 May 2025 04:49:57 +0000
Subject: [PATCH] perf(transformer/jsx): use `memchr` for parsing JSX pragma
 comments (#11001)

Use `memchr` for finding `@` when parsing JSX pragmas from comments.

This wins back most (but not all) of the perf loss of #10983 on `antd.js` benchmark, and preserves the perf gain of #10983 on `cal.com.tsx` benchmark.

Interestingly, using `memchr` to search just for `@` and then checking next 3 bytes are `jsx` separately is measurably faster than using `memchr::memmem::Finder` to search for `@jsx`.
---
 Cargo.lock                                 |  1 +
 crates/oxc_transformer/Cargo.toml          |  1 +
 crates/oxc_transformer/src/jsx/comments.rs | 32 ++++++++++++++--------
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2e0dae79ae026..e41651543e896 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2178,6 +2178,7 @@ dependencies = [
  "indexmap",
  "insta",
  "itoa",
+ "memchr",
  "oxc-browserslist",
  "oxc_allocator",
  "oxc_ast",
diff --git a/crates/oxc_transformer/Cargo.toml b/crates/oxc_transformer/Cargo.toml
index f6dfa3956f9e8..263d8d9611094 100644
--- a/crates/oxc_transformer/Cargo.toml
+++ b/crates/oxc_transformer/Cargo.toml
@@ -40,6 +40,7 @@ compact_str = { workspace = true }
 cow-utils = { workspace = true }
 indexmap = { workspace = true }
 itoa = { workspace = true }
+memchr = { workspace = true }
 rustc-hash = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
diff --git a/crates/oxc_transformer/src/jsx/comments.rs b/crates/oxc_transformer/src/jsx/comments.rs
index 8cc62e2864d5f..0959005f83d20 100644
--- a/crates/oxc_transformer/src/jsx/comments.rs
+++ b/crates/oxc_transformer/src/jsx/comments.rs
@@ -1,5 +1,7 @@
 use std::borrow::Cow;
 
+use memchr::memchr;
+
 use oxc_ast::Comment;
 
 use crate::{JsxOptions, JsxRuntime, TransformCtx, TypeScriptOptions};
@@ -96,20 +98,26 @@ enum PragmaType {
 fn find_jsx_pragma(mut comment_str: &str) -> Option<(PragmaType, &str, &str)> {
     let pragma_type;
     loop {
-        // Search for `@jsx`.
-        let mut at_sign_index = None;
-        for (index, next4) in comment_str.as_bytes().windows(4).enumerate() {
-            if next4 == b"@jsx" {
-                at_sign_index = Some(index);
-                break;
-            }
+        // Search for `@`.
+        // Note: Using `memchr::memmem::Finder` to search for `@jsx` is slower than only using `memchr`
+        // to find `@` characters, and then checking if `@` is followed by `jsx` separately.
+        let at_sign_index = memchr(b'@', comment_str.as_bytes())?;
+
+        // Check `@` is start of `@jsx`.
+        // Note: Checking 4 bytes including leading `@` is faster than checking the 3 bytes after `@`,
+        // because 4 bytes is a `u32`.
+        let next4 = comment_str.as_bytes().get(at_sign_index..at_sign_index + 4)?;
+        if next4 != b"@jsx" {
+            // Not `@jsx`. Trim off up to and including `@` and search again.
+            // SAFETY: Byte at `at_sign_index` is `@`, so `at_sign_index + 1` is either within string
+            // or end of string, and on a UTF-8 char boundary.
+            comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 1..) };
+            continue;
         }
-        // Exit if not found
-        let at_sign_index = at_sign_index?;
 
-        // Trim `@jsx` from start of `comment_str`.
+        // Trim `@jsx` and everything before it from start of `comment_str`.
         // SAFETY: 4 bytes starting at `at_sign_index` are `@jsx`, so `at_sign_index + 4` is within string
-        // or end of string, and must be on a UTF-8 character boundary
+        // or end of string, and must be on a UTF-8 character boundary.
         comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 4..) };
 
         // Get rest of keyword e.g. `Runtime` in `@jsxRuntime`
@@ -230,6 +238,8 @@ mod tests {
             ("@jsxX @jsx h @jsxX", &[(PragmaType::Jsx, "h")]),
             ("@jsxMoon @jsx h @jsxMoon", &[(PragmaType::Jsx, "h")]),
             ("@jsx @jsx h", &[(PragmaType::Jsx, "@jsx")]),
+            // Multiple `@` signs
+            ("@@@@@jsx h", &[(PragmaType::Jsx, "h")]),
         ];
 
         let prefixes = ["", "    ", "\n\n", "*\n* "];