Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 117 additions & 17 deletions crates/oxc_transformer/src/jsx/jsx_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
//!
//! * Babel plugin implementation: <https://github.com/babel/babel/tree/v7.26.2/packages/babel-helper-builder-react-jsx>

use oxc_allocator::{Box as ArenaBox, TakeIn, Vec as ArenaVec};
use oxc_allocator::{Box as ArenaBox, String as ArenaString, TakeIn, Vec as ArenaVec};
use oxc_ast::{AstBuilder, NONE, ast::*};
use oxc_ecmascript::PropName;
use oxc_span::{Atom, SPAN, Span};
Expand Down Expand Up @@ -860,7 +860,16 @@ impl<'a> JsxImpl<'a, '_> {
) -> Expression<'a> {
match value {
Some(JSXAttributeValue::StringLiteral(s)) => {
let jsx_text = Self::decode_entities(s.value.as_str());
let mut decoded = None;
Self::decode_entities(s.value.as_str(), &mut decoded, s.value.len(), ctx);
let jsx_text = if let Some(decoded) = decoded {
// Text contains HTML entities which were decoded.
// `decoded` contains the decoded string as an `ArenaString`. Convert it to `Atom`.
Atom::from(decoded)
} else {
// No HTML entities needed to be decoded. Use the original `Atom` without copying.
s.value
};
ctx.ast.expression_string_literal(s.span, jsx_text, None)
}
Some(JSXAttributeValue::Element(e)) => self.transform_jsx_element(e, ctx),
Expand Down Expand Up @@ -948,8 +957,8 @@ impl<'a> JsxImpl<'a, '_> {
}

fn transform_jsx_text(text: &JSXText<'a>, ctx: &TraverseCtx<'a>) -> Option<Expression<'a>> {
Self::fixup_whitespace_and_decode_entities(text.value.as_str())
.map(|s| ctx.ast.expression_string_literal(text.span, s, None))
Self::fixup_whitespace_and_decode_entities(text.value, ctx)
.map(|value| ctx.ast.expression_string_literal(text.span, value, None))
}

/// JSX trims whitespace at the end and beginning of lines, except that the
Expand All @@ -967,14 +976,49 @@ impl<'a> JsxImpl<'a, '_> {
/// - Remove empty lines and join the rest with " ".
///
/// <https://github.com/microsoft/TypeScript/blob/f0374ce2a9c465e27a15b7fa4a347e2bd9079450/src/compiler/transformers/jsx.ts#L557-L608>
fn fixup_whitespace_and_decode_entities(text: &str) -> Option<String> {
let mut acc: Option<String> = None;
fn fixup_whitespace_and_decode_entities(
text: Atom<'a>,
ctx: &TraverseCtx<'a>,
) -> Option<Atom<'a>> {
// Avoid copying strings in the common case where there's only 1 line of text,
// and it contains no HTML entities that need decoding.
//
// Where we do have to decode HTML entities, or concatenate multiple lines, assemble the
// concatenated text directly in arena, in an `ArenaString` (the accumulator `acc`),
// to avoid allocations. Initialize that `ArenaString` with capacity equal to length of
// the original text. This may be a bit more capacity than is required, once whitespace
// is removed, but it's highly unlikely to be insufficient capacity, so the `ArenaString`
// shouldn't need to reallocate while it's being constructed.
//
// When first line containing some text is found:
// * If it contains HTML entities, decode them and write decoded text to accumulator `acc`.
// * Otherwise, store trimmed text in `only_line` as an `Atom<'a>`.
//
// When another line containing some text is found:
// * If accumulator isn't already initialized, initialize it, starting with `only_line`.
// * Push a space to the accumulator.
// * Decode current line into the accumulator.
//
// At the end:
// * If accumulator is initialized, convert the `ArenaString` to an `Atom` and return it.
// * If `only_line` contains a string, that means only 1 line contained text, and that line
// didn't contain any HTML entities which needed decoding.
// So we can just return the `Atom` that's in `only_line` (without any copying).

let mut acc: Option<ArenaString> = None;
let mut only_line: Option<Atom<'a>> = None;
let mut first_non_whitespace: Option<usize> = Some(0);
let mut last_non_whitespace: Option<usize> = None;
for (index, c) in text.char_indices() {
if is_line_terminator(c) {
if let (Some(first), Some(last)) = (first_non_whitespace, last_non_whitespace) {
acc = Some(Self::add_line_of_jsx_text(acc, &text[first..last]));
Self::add_line_of_jsx_text(
Atom::from(&text.as_str()[first..last]),
&mut acc,
&mut only_line,
text.len(),
ctx,
);
}
first_non_whitespace = None;
} else if !is_white_space_single_line(c) {
Expand All @@ -984,23 +1028,69 @@ impl<'a> JsxImpl<'a, '_> {
}
}
}

if let Some(first) = first_non_whitespace {
Some(Self::add_line_of_jsx_text(acc, &text[first..]))
} else {
acc
Self::add_line_of_jsx_text(
Atom::from(&text.as_str()[first..]),
&mut acc,
&mut only_line,
text.len(),
ctx,
);
}

if let Some(acc) = acc { Some(Atom::from(acc)) } else { only_line }
}

fn add_line_of_jsx_text(acc: Option<String>, trimmed_line: &str) -> String {
let decoded = Self::decode_entities(trimmed_line);
if let Some(acc) = acc { format!("{acc} {decoded}") } else { decoded }
fn add_line_of_jsx_text(
trimmed_line: Atom<'a>,
acc: &mut Option<ArenaString<'a>>,
only_line: &mut Option<Atom<'a>>,
text_len: usize,
ctx: &TraverseCtx<'a>,
) {
if let Some(buffer) = acc.as_mut() {
// Already some text in accumulator. Push a space before this line is added to `acc`.
buffer.push(' ');
} else if let Some(only_line) = only_line.take() {
// This is the 2nd line containing text. Previous line did not contain any HTML entities.
// Generate an accumulator containing previous line and a trailing space.
// Current line will be added to the accumulator after it.
let mut buffer = ArenaString::with_capacity_in(text_len, ctx.ast.allocator);
buffer.push_str(only_line.as_str());
buffer.push(' ');
*acc = Some(buffer);
}

// Decode any HTML entities in this line
Self::decode_entities(trimmed_line.as_str(), acc, text_len, ctx);

if acc.is_none() {
// This is the first line containing text, and there are no HTML entities in this line.
// Record this line in `only_line`.
// If this turns out to be the only line, we won't need to construct an `ArenaString`,
// so avoid all copying.
*only_line = Some(trimmed_line);
}
}

/// Replace entities like "&nbsp;", "&#123;", and "&#xDEADBEEF;" with the characters they encode.
/// * See <https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references>
/// Code adapted from <https://github.com/microsoft/TypeScript/blob/514f7e639a2a8466c075c766ee9857a30ed4e196/src/compiler/transformers/jsx.ts#L617C1-L635>
fn decode_entities(s: &str) -> String {
let mut buffer = String::new();
///
/// If either:
/// (a) Text contains any HTML entities that need to be decoded, or
/// (b) accumulator `acc` passed in to this method is `Some`
/// then push the decoded string to `acc` (initializing it first if required).
///
/// Otherwise, leave `acc` as `None`. This indicates that the text contains no HTML entities.
/// Caller can use a slice of the original text, rather than making any copies.
fn decode_entities(
s: &str,
acc: &mut Option<ArenaString<'a>>,
text_len: usize,
ctx: &TraverseCtx<'a>,
) {
let mut chars = s.char_indices();
let mut prev = 0;
while let Some((i, c)) = chars.next() {
Expand All @@ -1014,6 +1104,10 @@ impl<'a> JsxImpl<'a, '_> {
}
}
if let Some(end) = end {
let buffer = acc.get_or_insert_with(|| {
ArenaString::with_capacity_in(text_len, ctx.ast.allocator)
});

buffer.push_str(&s[prev..start]);
prev = end + 1;
let word = &s[start + 1..end];
Expand Down Expand Up @@ -1041,11 +1135,17 @@ impl<'a> JsxImpl<'a, '_> {
buffer.push('&');
buffer.push_str(word);
buffer.push(';');
} else {
// Reached end of text without finding a `;` after the `&`.
// No point searching for a further `&`, so exit the loop.
break;
}
}
}
buffer.push_str(&s[prev..]);
buffer

if let Some(buffer) = acc.as_mut() {
buffer.push_str(&s[prev..]);
}
}

/// The react jsx/jsxs transform falls back to `createElement` when an explicit `key` argument comes after a spread
Expand Down
Loading