From 267922b3b43d199ba3117a1085e6977801872851 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Wed, 9 Apr 2025 09:16:59 +0000 Subject: [PATCH] perf(transformer/jsx): speed up decoding `JSXText` strings (#9741) close: #10249 Speed up transforming `JSXText` nodes by: 1. Avoiding allocations (construct strings directly in arena, with no intermediate `String`s). 2. Where no HTML entity decoding or string concatenation is required, reuse a slice of source text, rather than generating a new `Atom` and copying string data. --- crates/oxc_transformer/src/jsx/jsx_impl.rs | 134 ++++++++++++++++++--- 1 file changed, 117 insertions(+), 17 deletions(-) diff --git a/crates/oxc_transformer/src/jsx/jsx_impl.rs b/crates/oxc_transformer/src/jsx/jsx_impl.rs index 6ba45bd70c49b..3b766a9e127c5 100644 --- a/crates/oxc_transformer/src/jsx/jsx_impl.rs +++ b/crates/oxc_transformer/src/jsx/jsx_impl.rs @@ -88,7 +88,7 @@ //! //! * Babel plugin implementation: -use oxc_allocator::{Box as ArenaBox, TakeIn, Vec as ArenaVec}; +use oxc_allocator::{Box as ArenaBox, String as ArenaString, TakeIn, Vec as ArenaVec}; use oxc_ast::{AstBuilder, NONE, ast::*}; use oxc_ecmascript::PropName; use oxc_span::{Atom, SPAN, Span}; @@ -860,7 +860,16 @@ impl<'a> JsxImpl<'a, '_> { ) -> Expression<'a> { match value { Some(JSXAttributeValue::StringLiteral(s)) => { - let jsx_text = Self::decode_entities(s.value.as_str()); + let mut decoded = None; + Self::decode_entities(s.value.as_str(), &mut decoded, s.value.len(), ctx); + let jsx_text = if let Some(decoded) = decoded { + // Text contains HTML entities which were decoded. + // `decoded` contains the decoded string as an `ArenaString`. Convert it to `Atom`. + Atom::from(decoded) + } else { + // No HTML entities needed to be decoded. Use the original `Atom` without copying. + s.value + }; ctx.ast.expression_string_literal(s.span, jsx_text, None) } Some(JSXAttributeValue::Element(e)) => self.transform_jsx_element(e, ctx), @@ -948,8 +957,8 @@ impl<'a> JsxImpl<'a, '_> { } fn transform_jsx_text(text: &JSXText<'a>, ctx: &TraverseCtx<'a>) -> Option> { - Self::fixup_whitespace_and_decode_entities(text.value.as_str()) - .map(|s| ctx.ast.expression_string_literal(text.span, s, None)) + Self::fixup_whitespace_and_decode_entities(text.value, ctx) + .map(|value| ctx.ast.expression_string_literal(text.span, value, None)) } /// JSX trims whitespace at the end and beginning of lines, except that the @@ -967,14 +976,49 @@ impl<'a> JsxImpl<'a, '_> { /// - Remove empty lines and join the rest with " ". /// /// - fn fixup_whitespace_and_decode_entities(text: &str) -> Option { - let mut acc: Option = None; + fn fixup_whitespace_and_decode_entities( + text: Atom<'a>, + ctx: &TraverseCtx<'a>, + ) -> Option> { + // Avoid copying strings in the common case where there's only 1 line of text, + // and it contains no HTML entities that need decoding. + // + // Where we do have to decode HTML entities, or concatenate multiple lines, assemble the + // concatenated text directly in arena, in an `ArenaString` (the accumulator `acc`), + // to avoid allocations. Initialize that `ArenaString` with capacity equal to length of + // the original text. This may be a bit more capacity than is required, once whitespace + // is removed, but it's highly unlikely to be insufficient capacity, so the `ArenaString` + // shouldn't need to reallocate while it's being constructed. + // + // When first line containing some text is found: + // * If it contains HTML entities, decode them and write decoded text to accumulator `acc`. + // * Otherwise, store trimmed text in `only_line` as an `Atom<'a>`. + // + // When another line containing some text is found: + // * If accumulator isn't already initialized, initialize it, starting with `only_line`. + // * Push a space to the accumulator. + // * Decode current line into the accumulator. + // + // At the end: + // * If accumulator is initialized, convert the `ArenaString` to an `Atom` and return it. + // * If `only_line` contains a string, that means only 1 line contained text, and that line + // didn't contain any HTML entities which needed decoding. + // So we can just return the `Atom` that's in `only_line` (without any copying). + + let mut acc: Option = None; + let mut only_line: Option> = None; let mut first_non_whitespace: Option = Some(0); let mut last_non_whitespace: Option = None; for (index, c) in text.char_indices() { if is_line_terminator(c) { if let (Some(first), Some(last)) = (first_non_whitespace, last_non_whitespace) { - acc = Some(Self::add_line_of_jsx_text(acc, &text[first..last])); + Self::add_line_of_jsx_text( + Atom::from(&text.as_str()[first..last]), + &mut acc, + &mut only_line, + text.len(), + ctx, + ); } first_non_whitespace = None; } else if !is_white_space_single_line(c) { @@ -984,23 +1028,69 @@ impl<'a> JsxImpl<'a, '_> { } } } + if let Some(first) = first_non_whitespace { - Some(Self::add_line_of_jsx_text(acc, &text[first..])) - } else { - acc + Self::add_line_of_jsx_text( + Atom::from(&text.as_str()[first..]), + &mut acc, + &mut only_line, + text.len(), + ctx, + ); } + + if let Some(acc) = acc { Some(Atom::from(acc)) } else { only_line } } - fn add_line_of_jsx_text(acc: Option, trimmed_line: &str) -> String { - let decoded = Self::decode_entities(trimmed_line); - if let Some(acc) = acc { format!("{acc} {decoded}") } else { decoded } + fn add_line_of_jsx_text( + trimmed_line: Atom<'a>, + acc: &mut Option>, + only_line: &mut Option>, + text_len: usize, + ctx: &TraverseCtx<'a>, + ) { + if let Some(buffer) = acc.as_mut() { + // Already some text in accumulator. Push a space before this line is added to `acc`. + buffer.push(' '); + } else if let Some(only_line) = only_line.take() { + // This is the 2nd line containing text. Previous line did not contain any HTML entities. + // Generate an accumulator containing previous line and a trailing space. + // Current line will be added to the accumulator after it. + let mut buffer = ArenaString::with_capacity_in(text_len, ctx.ast.allocator); + buffer.push_str(only_line.as_str()); + buffer.push(' '); + *acc = Some(buffer); + } + + // Decode any HTML entities in this line + Self::decode_entities(trimmed_line.as_str(), acc, text_len, ctx); + + if acc.is_none() { + // This is the first line containing text, and there are no HTML entities in this line. + // Record this line in `only_line`. + // If this turns out to be the only line, we won't need to construct an `ArenaString`, + // so avoid all copying. + *only_line = Some(trimmed_line); + } } /// Replace entities like " ", "{", and "�" with the characters they encode. /// * See /// Code adapted from - fn decode_entities(s: &str) -> String { - let mut buffer = String::new(); + /// + /// If either: + /// (a) Text contains any HTML entities that need to be decoded, or + /// (b) accumulator `acc` passed in to this method is `Some` + /// then push the decoded string to `acc` (initializing it first if required). + /// + /// Otherwise, leave `acc` as `None`. This indicates that the text contains no HTML entities. + /// Caller can use a slice of the original text, rather than making any copies. + fn decode_entities( + s: &str, + acc: &mut Option>, + text_len: usize, + ctx: &TraverseCtx<'a>, + ) { let mut chars = s.char_indices(); let mut prev = 0; while let Some((i, c)) = chars.next() { @@ -1014,6 +1104,10 @@ impl<'a> JsxImpl<'a, '_> { } } if let Some(end) = end { + let buffer = acc.get_or_insert_with(|| { + ArenaString::with_capacity_in(text_len, ctx.ast.allocator) + }); + buffer.push_str(&s[prev..start]); prev = end + 1; let word = &s[start + 1..end]; @@ -1041,11 +1135,17 @@ impl<'a> JsxImpl<'a, '_> { buffer.push('&'); buffer.push_str(word); buffer.push(';'); + } else { + // Reached end of text without finding a `;` after the `&`. + // No point searching for a further `&`, so exit the loop. + break; } } } - buffer.push_str(&s[prev..]); - buffer + + if let Some(buffer) = acc.as_mut() { + buffer.push_str(&s[prev..]); + } } /// The react jsx/jsxs transform falls back to `createElement` when an explicit `key` argument comes after a spread