From 1aed99b66d2e8bab0632643035af60a9c4c36bc1 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Wed, 14 May 2025 04:49:57 +0000 Subject: [PATCH] perf(transformer/jsx): use `memchr` for parsing JSX pragma comments (#11001) Use `memchr` for finding `@` when parsing JSX pragmas from comments. This wins back most (but not all) of the perf loss of #10983 on `antd.js` benchmark, and preserves the perf gain of #10983 on `cal.com.tsx` benchmark. Interestingly, using `memchr` to search just for `@` and then checking next 3 bytes are `jsx` separately is measurably faster than using `memchr::memmem::Finder` to search for `@jsx`. --- Cargo.lock | 1 + crates/oxc_transformer/Cargo.toml | 1 + crates/oxc_transformer/src/jsx/comments.rs | 32 ++++++++++++++-------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2e0dae79ae026..e41651543e896 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2178,6 +2178,7 @@ dependencies = [ "indexmap", "insta", "itoa", + "memchr", "oxc-browserslist", "oxc_allocator", "oxc_ast", diff --git a/crates/oxc_transformer/Cargo.toml b/crates/oxc_transformer/Cargo.toml index f6dfa3956f9e8..263d8d9611094 100644 --- a/crates/oxc_transformer/Cargo.toml +++ b/crates/oxc_transformer/Cargo.toml @@ -40,6 +40,7 @@ compact_str = { workspace = true } cow-utils = { workspace = true } indexmap = { workspace = true } itoa = { workspace = true } +memchr = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/oxc_transformer/src/jsx/comments.rs b/crates/oxc_transformer/src/jsx/comments.rs index 8cc62e2864d5f..0959005f83d20 100644 --- a/crates/oxc_transformer/src/jsx/comments.rs +++ b/crates/oxc_transformer/src/jsx/comments.rs @@ -1,5 +1,7 @@ use std::borrow::Cow; +use memchr::memchr; + use oxc_ast::Comment; use crate::{JsxOptions, JsxRuntime, TransformCtx, TypeScriptOptions}; @@ -96,20 +98,26 @@ enum PragmaType { fn find_jsx_pragma(mut comment_str: &str) -> Option<(PragmaType, &str, &str)> { let pragma_type; loop { - // Search for `@jsx`. - let mut at_sign_index = None; - for (index, next4) in comment_str.as_bytes().windows(4).enumerate() { - if next4 == b"@jsx" { - at_sign_index = Some(index); - break; - } + // Search for `@`. + // Note: Using `memchr::memmem::Finder` to search for `@jsx` is slower than only using `memchr` + // to find `@` characters, and then checking if `@` is followed by `jsx` separately. + let at_sign_index = memchr(b'@', comment_str.as_bytes())?; + + // Check `@` is start of `@jsx`. + // Note: Checking 4 bytes including leading `@` is faster than checking the 3 bytes after `@`, + // because 4 bytes is a `u32`. + let next4 = comment_str.as_bytes().get(at_sign_index..at_sign_index + 4)?; + if next4 != b"@jsx" { + // Not `@jsx`. Trim off up to and including `@` and search again. + // SAFETY: Byte at `at_sign_index` is `@`, so `at_sign_index + 1` is either within string + // or end of string, and on a UTF-8 char boundary. + comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 1..) }; + continue; } - // Exit if not found - let at_sign_index = at_sign_index?; - // Trim `@jsx` from start of `comment_str`. + // Trim `@jsx` and everything before it from start of `comment_str`. // SAFETY: 4 bytes starting at `at_sign_index` are `@jsx`, so `at_sign_index + 4` is within string - // or end of string, and must be on a UTF-8 character boundary + // or end of string, and must be on a UTF-8 character boundary. comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 4..) }; // Get rest of keyword e.g. `Runtime` in `@jsxRuntime` @@ -230,6 +238,8 @@ mod tests { ("@jsxX @jsx h @jsxX", &[(PragmaType::Jsx, "h")]), ("@jsxMoon @jsx h @jsxMoon", &[(PragmaType::Jsx, "h")]), ("@jsx @jsx h", &[(PragmaType::Jsx, "@jsx")]), + // Multiple `@` signs + ("@@@@@jsx h", &[(PragmaType::Jsx, "h")]), ]; let prefixes = ["", " ", "\n\n", "*\n* "];