diff --git a/crates/oxc_transformer/src/jsx/comments.rs b/crates/oxc_transformer/src/jsx/comments.rs index e23e00e78d772..92469790e8edf 100644 --- a/crates/oxc_transformer/src/jsx/comments.rs +++ b/crates/oxc_transformer/src/jsx/comments.rs @@ -1,29 +1,32 @@ use std::borrow::Cow; -use oxc_ast::{Comment, CommentKind}; -use oxc_syntax::identifier::is_irregular_whitespace; +use oxc_ast::Comment; use crate::{JsxOptions, JsxRuntime, TransformCtx, TypeScriptOptions}; /// Scan through all comments and find the following pragmas: /// +/// * @jsx Preact.h /// * @jsxRuntime classic / automatic /// * @jsxImportSource custom-jsx-library /// * @jsxFrag Preact.Fragment -/// * @jsx Preact.h /// /// The comment does not need to be a JSDoc comment, /// otherwise `JSDoc` could be used instead. /// -/// This behavior is aligned with Babel. +/// This behavior is aligned with ESBuild. +/// Babel is less liberal - it doesn't accept multiple pragmas in a single line +/// e.g. `/** @jsx h @jsxRuntime classic */` +/// pub fn update_options_with_comments( comments: &[Comment], typescript: &mut TypeScriptOptions, jsx: &mut JsxOptions, ctx: &TransformCtx, ) { + let source_text = ctx.source_text; for comment in comments { - update_options_with_comment(typescript, jsx, comment, ctx.source_text); + update_options_with_comment(typescript, jsx, comment, source_text); } } @@ -33,211 +36,126 @@ fn update_options_with_comment( comment: &Comment, source_text: &str, ) { - let Some((keyword, remainder)) = find_jsx_pragma(comment, source_text) else { return }; - - match keyword { - // @jsx - "" => { - // Don't set React option unless React transform is enabled - // otherwise can cause error in `ReactJsx::new` - if jsx.jsx_plugin || jsx.development { - jsx.pragma = Some(remainder.to_string()); + let mut comment_str = comment.content_span().source_text(source_text); + + while let Some((keyword, value, remainder)) = find_jsx_pragma(comment_str) { + match keyword { + // @jsx + PragmaType::Jsx => { + // Don't set React option unless React transform is enabled + // otherwise can cause error in `ReactJsx::new` + if jsx.jsx_plugin || jsx.development { + jsx.pragma = Some(value.to_string()); + } + typescript.jsx_pragma = Cow::Owned(value.to_string()); } - typescript.jsx_pragma = Cow::from(remainder.to_string()); - } - // @jsxRuntime - "Runtime" => { - jsx.runtime = match remainder { - "classic" => JsxRuntime::Classic, - "automatic" => JsxRuntime::Automatic, - _ => return, - }; - } - // @jsxImportSource - "ImportSource" => { - jsx.import_source = Some(remainder.to_string()); - } - // @jsxFrag - "Frag" => { - // Don't set React option unless React transform is enabled - // otherwise can cause error in `ReactJsx::new` - if jsx.jsx_plugin || jsx.development { - jsx.pragma_frag = Some(remainder.to_string()); + // @jsxRuntime + PragmaType::JsxRuntime => match value { + "classic" => jsx.runtime = JsxRuntime::Classic, + "automatic" => jsx.runtime = JsxRuntime::Automatic, + _ => {} + }, + // @jsxImportSource + PragmaType::JsxImportSource => { + jsx.import_source = Some(value.to_string()); + } + // @jsxFrag + PragmaType::JsxFrag => { + // Don't set React option unless React transform is enabled + // otherwise can cause error in `ReactJsx::new` + if jsx.jsx_plugin || jsx.development { + jsx.pragma_frag = Some(value.to_string()); + } + typescript.jsx_pragma_frag = Cow::Owned(value.to_string()); } - typescript.jsx_pragma_frag = Cow::from(remainder.to_string()); } - _ => {} - } -} - -/// Search comment for a JSX pragma. -/// -/// Searches for `@jsx` in the comment. -/// -/// If found, returns: -/// * The pragma keyword (not including `jsx` prefix). -/// * The remainder of the comment (with whitespace trimmed off). -/// -/// If none found, returns `None`. -fn find_jsx_pragma<'a>( - comment: &Comment, - source_text: &'a str, -) -> Option<(/* keyword */ &'a str, /* remainder */ &'a str)> { - // Strip whitespace and `*`s from start of comment, and find leading `@`. - // Slice from start of comment to end of file, not end of comment. - // This allows `find_at_sign` functions to search in chunks of 8 bytes without hitting end of string. - let comment_span = comment.content_span(); - let comment_str = &source_text[comment_span.start as usize..]; - let comment_str = match comment.kind { - CommentKind::Line => find_at_sign_in_line_comment(comment_str)?, - CommentKind::Block => find_at_sign_in_block_comment(comment_str)?, - }; - - // Check next 3 chars after `@` is `jsx` - let first_3_bytes = comment_str.as_bytes().get(..3)?; - if first_3_bytes != b"jsx" { - return None; - } - let comment_str = &comment_str[3..]; - // `@jsx` found. `comment_str` contains all source text after `@jsx` - // Find end of `@` keyword. `keyword` does not include 'jsx' prefix. - let (keyword, remainder) = split_at_whitespace(comment_str)?; - - // Slice off after end of comment - let remainder_start = source_text.len() - remainder.len(); - if remainder_start >= comment_span.end as usize { - // Space was after end of comment - return None; + // Search again for another pragma + comment_str = remainder; } - let len = comment_span.end as usize - remainder_start; - let remainder = &remainder[..len]; - // Trim excess whitespace/line breaks from end - let remainder = trim_end(remainder); - - Some((keyword, remainder)) } -/// Find `@` character in a single-line comment. -/// -/// Returns the remainder of the string after the `@`. -/// Returns `None` if any other character except space, or tab, or irregular whitespace is found first. -/// That includes line breaks, since this is a single-line comment. -fn find_at_sign_in_line_comment(str: &str) -> Option<&str> { - // Note: Neither `accept` nor `skip` matches line breaks, so will not search beyond end of the comment - let accept = |byte| byte == b'@'; - let skip = |byte| matches!(byte, b' ' | b'\t'); - let find_unicode = |str: &str| { - let len = str.len(); - let str = str.trim_start().strip_prefix('@')?; - Some(len - str.len() - 1) - }; - let index = find(str, accept, skip, find_unicode)?; - Some(&str[index + 1..]) +/// Type of JSX pragma directive. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum PragmaType { + Jsx, + JsxRuntime, + JsxImportSource, + JsxFrag, } -/// Find `@` character in a block comment. +/// Search comment for a JSX pragma. +/// +/// If found, returns: /// -/// Returns the remainder of the string after the `@`. -/// Returns `None` if any other character except whitespace, line breaks, or `*` is found first. +/// * `PragmaType` representing the type of the pragma. +/// * Value following `@jsx` / `@jsxRuntime` / etc. +/// * The remainder of the comment, to search again for another pragma. /// -/// Line breaks and `*` are allowed in order to handle e.g.: -/// ```js -/// /* -/// * @jsx Preact.h -/// */ -/// ``` -fn find_at_sign_in_block_comment(str: &str) -> Option<&str> { - // Note: Neither `accept` nor `skip` matches `/`, so will not search beyond end of the comment - let accept = |byte| byte == b'@'; - let skip = |byte| byte == b'*' || is_ascii_whitespace(byte); - let find_unicode = |str: &str| { - let len = str.len(); - let mut str = str.trim_start(); - // Strip leading jsdoc comment `*` and then whitespaces - while let Some(cur_str) = str.strip_prefix('*') { - str = cur_str.trim_start(); +/// If no pragma found, returns `None`. +fn find_jsx_pragma(mut comment_str: &str) -> Option<(PragmaType, &str, &str)> { + let pragma_type; + loop { + // Search for `@jsx`. + let mut at_sign_index = None; + for (index, next4) in comment_str.as_bytes().windows(4).enumerate() { + if next4 == b"@jsx" { + at_sign_index = Some(index); + break; + } } - let str = str.strip_prefix('@')?; - Some(len - str.len() - 1) - }; - let index = find(str, accept, skip, find_unicode)?; - Some(&str[index + 1..]) -} - -/// Split string into 2 parts on spaces, tabs, or irregular whitespaces. -/// Removes any amount of whitespace between the 2 parts. -/// Returns `None` if no whitespace found, or if no further characters after the whitespace. -fn split_at_whitespace(str: &str) -> Option<(&str, &str)> { - // Find first space, tab, or irregular whitespace - let mut space_bytes = 1; - let accept = |byte| matches!(byte, b' ' | b'\t'); - let skip = |_| true; - let find_unicode = |str: &str| { - str.find(|c| { - if c == ' ' || c == '\t' { - true - } else if is_irregular_whitespace(c) { - space_bytes = c.len_utf8(); - true - } else { - false + // Exit if not found + let at_sign_index = at_sign_index?; + + // Trim `@jsx` from start of `comment_str`. + // SAFETY: 4 bytes starting at `at_sign_index` are `@jsx`, so `at_sign_index + 4` is within string + // or end of string, and must be on a UTF-8 character boundary + comment_str = unsafe { comment_str.get_unchecked(at_sign_index + 4..) }; + + // Get rest of keyword e.g. `Runtime` in `@jsxRuntime` + let space_index = comment_str.as_bytes().iter().position(|&b| matches!(b, b' ' | b'\t'))?; + // SAFETY: Byte at `space_index` is ASCII, so `space_index` is in bounds and on a UTF-8 char boundary + let keyword_str = unsafe { comment_str.get_unchecked(..space_index) }; + // SAFETY: Byte at `space_index` is ASCII, so `space_index + 1` is in bounds and on a UTF-8 char boundary + comment_str = unsafe { comment_str.get_unchecked(space_index + 1..) }; + + pragma_type = match keyword_str { + "" => PragmaType::Jsx, + "Runtime" => PragmaType::JsxRuntime, + "ImportSource" => PragmaType::JsxImportSource, + "Frag" => PragmaType::JsxFrag, + _ => { + // Unrecognised pragma - search for another + continue; } - }) - }; - let space_index = find(str, accept, skip, find_unicode)?; - - let before = &str[..space_index]; - let after_space_index = space_index + space_bytes; + }; + break; + } - // Consume any further spaces. - // Don't use `find` to search in chunks here, as usually there's only a single space and this loop - // will exit on first turn. - let more_spaces_after; - let mut iter = str.as_bytes()[after_space_index..].iter().enumerate(); + // Consume any further spaces / tabs after keyword loop { - if let Some((index, &byte)) = iter.next() { - more_spaces_after = match byte { - b' ' | b'\t' => continue, - _ if byte.is_ascii() => index, - _ => cold_branch(|| { - let is_space = |c| c == ' ' || c == '\t' || is_irregular_whitespace(c); - str[after_space_index..].find(|c| !is_space(c)).unwrap_or(0) - }), - }; + let next_byte = *comment_str.as_bytes().first()?; + if !matches!(next_byte, b' ' | b'\t') { break; } - return None; + // SAFETY: First byte of string is ASCII, so trimming it off must leave a valid UTF-8 string + comment_str = unsafe { comment_str.get_unchecked(1..) }; } - let after = &str[after_space_index + more_spaces_after..]; - - Some((before, after)) -} -/// Trim whitespace and line breaks from end of string. -/// -/// Equivalent to `str::trim_end`, but optimized for ASCII strings. -/// Comparison: -fn trim_end(str: &str) -> &str { - let mut iter = str.as_bytes().iter().enumerate().rev(); - let index = loop { - if let Some((index, &byte)) = iter.next() { - match byte { - _ if is_ascii_whitespace(byte) => continue, - _ if !byte.is_ascii() => return cold_branch(|| str.trim_end()), - _ => break index, - } - } - return ""; - }; - - // SAFETY: `index` came from a safe iterator, so must be before end of `str`. - // Therefore `index + 1` must be in bounds (or at end of string). - // We have only seen ASCII bytes, so `index + 1` must be on a UTF-8 char boundary. - #[expect(clippy::range_plus_one)] - unsafe { - str.get_unchecked(..index + 1) + // Get value + let space_index = comment_str.as_bytes().iter().position(|&b| is_ascii_whitespace(b)); + let value; + if let Some(space_index) = space_index { + // SAFETY: Byte at `space_index` is ASCII, so `space_index` is in bounds and on a UTF-8 char boundary + value = unsafe { comment_str.get_unchecked(..space_index) }; + // SAFETY: Byte at `space_index` is ASCII, so `space_index + 1` is in bounds and on a UTF-8 char boundary + comment_str = unsafe { comment_str.get_unchecked(space_index + 1..) }; + } else { + value = comment_str; + comment_str = ""; } + + if value.is_empty() { None } else { Some((pragma_type, value, comment_str)) } } /// Test if a byte is ASCII whitespace, using the same group of ASCII chars that `std::str::trim_start` uses. @@ -251,393 +169,93 @@ fn is_ascii_whitespace(byte: u8) -> bool { matches!(byte, b' ' | b'\t' | b'\r' | b'\n' | VT | FF) } -/// Find a match in a string. -/// -/// Optimized for searching through strings which only contain ASCII. -/// Non-ASCII chars are considered unlikely and are handled in a cold fallback path. -/// -/// Search occurs in batches of 8 bytes, with a slower fallback for the last 7 bytes. -/// Provide the longest string possible to be able to avoid the slower fallback. -/// -/// Iterates through string byte-by-byte, calling `accept` and `skip` for each byte. -/// * If a non-ASCII byte is found, hands control to `find_unicode` and returns whatever it returns. -/// * If `accept` returns `true`, this function returns the index of that byte. -/// * If `skip` returns `true`, continues search. -/// * If both `accept` and `skip` return `false`, this function returns `None`. -/// * If reaches the end of the string without exiting, returns `None`. -fn find<'s, Accept, Skip, FindUnicode>( - str: &'s str, - accept: Accept, - skip: Skip, - find_unicode: FindUnicode, -) -> Option -where - Accept: Fn(u8) -> bool, - Skip: Fn(u8) -> bool, - FindUnicode: FnOnce(&'s str) -> Option, -{ - // Process string in chunks of 8 bytes. - // Check chunks for any non-ASCII bytes in one go, and deopt to unicode handler if so. - let mut chunks = str.as_bytes().chunks_exact(8); - for (chunk_index, chunk) in chunks.by_ref().enumerate() { - let chunk: [u8; 8] = chunk.try_into().unwrap(); - if !chunk_is_ascii(chunk) { - return cold_branch(|| find_unicode(str)); - } - - // Compiler will unroll this loop if `accept` and `skip` are small enough - for (byte_index, byte) in chunk.into_iter().enumerate() { - match byte { - _ if accept(byte) => return Some(chunk_index * 8 + byte_index), - _ if skip(byte) => {} - _ => return None, - } - } - } - - // We only get here if we're close to end of the string - let chunk_start = str.len() & !7; - for (byte_index, &byte) in chunks.remainder().iter().enumerate() { - match byte { - _ if !byte.is_ascii() => return cold_branch(|| find_unicode(str)), - _ if accept(byte) => return Some(chunk_start + byte_index), - _ if skip(byte) => {} - _ => return None, - } - } - - None -} - -#[inline] -fn chunk_is_ascii(chunk: [u8; 8]) -> bool { - const HIGH_BITS: u64 = 0x8080_8080_8080_8080; - let chunk_u64 = u64::from_ne_bytes(chunk); - chunk_u64 & HIGH_BITS == 0 -} - -/// Call a closure while hinting to compiler that this branch is rarely taken. -/// "Cold trampoline function", suggested in: -/// -#[cold] -#[inline(never)] -pub fn cold_branch T, T>(f: F) -> T { - f() -} - #[cfg(test)] mod tests { - use oxc_ast::{CommentAnnotation, CommentPosition}; - use oxc_span::Span; - use super::*; - static PRE_AND_POSTFIX: &[(&str, &str)] = &[ - ("", ""), - ("\n\n\n", "\n"), - ("", "\n@jsx AfterCommentWeShouldNotFind\n\n"), - ("\n\n\n@jsx BeforeCommentWeShouldNotFind\n\n", ""), - ("\n\n\n@jsx BeforeCommentWeShouldNotFind\n\n", "\n@jsx AfterCommentWeShouldNotFind\n\n"), - ]; - - fn run_tests<'c>(cases: impl Iterator)>) { - for (comment_str, expected) in cases { - for (before, after) in PRE_AND_POSTFIX { - let (comment, source_text) = create_comment(comment_str, before, after); - assert_eq!(find_jsx_pragma(&comment, &source_text), expected); - } - } - } - - fn create_comment(comment_str: &str, before: &str, after: &str) -> (Comment, String) { - let kind = if comment_str.starts_with("//") { - CommentKind::Line - } else { - assert!(comment_str.starts_with("/*") && comment_str.ends_with("*/")); - CommentKind::Block - }; - - let source_text = format!("{before}{comment_str}{after}"); - #[expect(clippy::cast_possible_truncation)] - let span = Span::new(before.len() as u32, (before.len() + comment_str.len()) as u32); - let comment = Comment { - span, - kind, - position: CommentPosition::Leading, - attached_to: 0, - preceded_by_newline: true, - followed_by_newline: true, - annotation: CommentAnnotation::None, - }; - (comment, source_text) - } - #[test] - fn find_jsx_pragma_line_comment_not_found() { - let cases = [ - // No `@` - "//", - "// ", - "// blah blah blah", - "// blah blah blah", - "// ", - // `@` but not valid - "//@", - "// @", - "// @ ", - "// @j", - "// @j ", - "// @j sx", - "// @j sx ", - "// @js", - "// @js ", - "// @js x", - "// @js blah", - "// @ jsx blah", - "// @ jsx blah", - "// @xjsx blah", - "// @xjsx blah", - "// @xjsx blah", - "// @jsx", - "// @jsx ", - "// @jsx ", - "// @jsxX", - "// @jsxRuntime", - "// @jsxRuntime ", - "// @jsxImportSource", - "// @jsxImportSource ", - "// @jsxFrag", - "// @jsxFrag ", - // Unicode space - "//\u{85}", - "// \u{85} ", - ]; - - run_tests(cases.into_iter().map(|comment_str| (comment_str, None))); - } - - #[test] - fn find_jsx_pragma_line_comment_found() { - let cases = [ - // comment, keyword, remainder - // `@jsx` pragma - ("//@jsx foo", "", "foo"), - ("// @jsx foo", "", "foo"), - ("// @jsx foo", "", "foo"), - ("//\t@jsx foo", "", "foo"), - ("// \t\t \t\t @jsx foo", "", "foo"), - ("// @jsx\tfoo", "", "foo"), - ("// @jsx\t \t \t\t foo", "", "foo"), - ("// @jsx foo ", "", "foo"), - ("// @jsx foo\t", "", "foo"), - ("// @jsx foo \t\t \t\t ", "", "foo"), - // Other pragmas - ("// @jsxRuntime foo", "Runtime", "foo"), - ("// @jsxRuntime \t\t\t foo", "Runtime", "foo"), - ("// @jsxRuntime \t\t\t foo \t\t\t ", "Runtime", "foo"), - ("// @jsxImportSource foo", "ImportSource", "foo"), - ("// @jsxFrag foo", "Frag", "foo"), - // Unicode space - ("//\u{85}@jsx foo", "", "foo"), - ("//\u{85}\t\u{85}@jsx foo", "", "foo"), - ("// @jsx\u{85}foo", "", "foo"), - ("// @jsx\u{85} foo", "", "foo"), - ("// @jsx \u{85}foo", "", "foo"), - ("// @jsx\u{85} \u{85}foo", "", "foo"), - ("// @jsx\u{85}\u{85}\u{85}foo", "", "foo"), - ("// @jsx foo\u{85}", "", "foo"), - ("// @jsx foo\u{85} ", "", "foo"), - ("// @jsx foo \u{85}", "", "foo"), - ("// @jsx foo\u{85} \u{85}", "", "foo"), - ("// @jsx foo\u{85}\u{85}\u{85}", "", "foo"), - ]; - - run_tests( - cases - .into_iter() - .map(|(comment_str, keyword, remainder)| (comment_str, Some((keyword, remainder)))), - ); - } - - #[test] - fn find_jsx_pragma_block_comment_not_found() { - let cases = [ - // No `@` - "/**/", - "/* */", - "/* blah blah blah*/", - "/* blah blah blah*/", - "/* */", - // `@` but not valid - "/*@*/", - "/* @*/", - "/*@ */", - "/* @ */", - "/* @j*/", - "/* @j */", - "/* @j sx */", - "/* @js*/", - "/* @js */", - "/* @js x*/", - "/* @js x */", - "/* @js blah */", - "/* @ jsx blah */", - "/* @ jsx blah */", - "/* @xjsx blah */", - "/* @xjsx blah */", - "/* @xjsx blah */", - "/*@jsx*/", - "/* @jsx*/", - "/* @jsx */", - "/* @jsx */", - "/* @jsxX */", - "/* @jsxRuntime*/", - "/* @jsxRuntime */", - "/* @jsxImportSource*/", - "/* @jsxImportSource */", - "/* @jsxFrag*/", - "/* @jsxFrag */", - // Multi-line - "/*\n*/", - "/* - */", - "/* - * - */", - "/* - * @jsx - */", - "/* - * @jsxX - */", - "/* - * @js - */", - // Unicode space - "/*\u{85}*/", - "/* \u{85} */", - ]; - - run_tests(cases.into_iter().map(|comment_str| (comment_str, None))); - } - - #[test] - fn find_jsx_pragma_block_comment_found() { - let cases = [ - // comment, keyword, remainder - // `@jsx` pragma single line - ("/*@jsx foo*/", "", "foo"), - ("/* @jsx foo*/", "", "foo"), - ("/* @jsx foo*/", "", "foo"), - ("/*\t@jsx foo*/", "", "foo"), - ("/* \t\t \t\t @jsx foo*/", "", "foo"), - ("/* @jsx\tfoo*/", "", "foo"), - ("/* @jsx\t \t \t\t foo*/", "", "foo"), - ("/* @jsx foo */", "", "foo"), - ("/* @jsx foo\t*/", "", "foo"), - ("/* @jsx foo \t\t \t\t */", "", "foo"), - // `@jsx` pragma multi line + fn test_find_jsx_pragma() { + let cases: &[(&str, &[(PragmaType, &str)])] = &[ + // No valid pragmas + ("", &[]), + ("blah blah blah", &[]), + ("@jsxDonkey abc", &[]), + // Single pragma + ("@jsx h", &[(PragmaType::Jsx, "h")]), + ("@jsx React.createDumpling", &[(PragmaType::Jsx, "React.createDumpling")]), + ("@jsxRuntime classic", &[(PragmaType::JsxRuntime, "classic")]), + ("@jsxImportSource preact", &[(PragmaType::JsxImportSource, "preact")]), + ("@jsxFrag Fraggy", &[(PragmaType::JsxFrag, "Fraggy")]), + // Multiple pragmas + ( + "@jsx h @jsxRuntime classic", + &[(PragmaType::Jsx, "h"), (PragmaType::JsxRuntime, "classic")], + ), ( - "/* - * @jsx foo - */", - "", - "foo", + "* @jsx h\n * @jsxRuntime classic\n *", + &[(PragmaType::Jsx, "h"), (PragmaType::JsxRuntime, "classic")], ), ( - "/* - * @jsx foo*/", - "", - "foo", + "@jsx h @jsxRuntime classic @jsxImportSource importer-a-go-go @jsxFrag F", + &[ + (PragmaType::Jsx, "h"), + (PragmaType::JsxRuntime, "classic"), + (PragmaType::JsxImportSource, "importer-a-go-go"), + (PragmaType::JsxFrag, "F"), + ], ), ( - "/* @jsx foo - */", - "", - "foo", + "* @jsx h\n * @jsxRuntime classic\n * @jsxImportSource importer-a-go-go\n * @jsxFrag F\n *", + &[ + (PragmaType::Jsx, "h"), + (PragmaType::JsxRuntime, "classic"), + (PragmaType::JsxImportSource, "importer-a-go-go"), + (PragmaType::JsxFrag, "F"), + ], ), + // Text in between pragmas ( - "/* - * - * - * @jsx foo - */", - "", - "foo", + "@jsx h blah blah @jsxRuntime classic", + &[(PragmaType::Jsx, "h"), (PragmaType::JsxRuntime, "classic")], ), - // Other pragmas - ("/* @jsxRuntime foo*/", "Runtime", "foo"), - ("/* @jsxRuntime foo */", "Runtime", "foo"), - ("/* @jsxRuntime \t\t\t foo*/", "Runtime", "foo"), - ("/* @jsxRuntime \t\t\t foo \t\t\t */", "Runtime", "foo"), - ("/* @jsxImportSource foo */", "ImportSource", "foo"), - ("/* @jsxFrag foo */", "Frag", "foo"), - // Unicode space - ("/*\u{85}@jsx foo*/", "", "foo"), - ("/*\u{85}\t\u{85}@jsx foo*/", "", "foo"), - ("/* @jsx\u{85}foo*/", "", "foo"), - ("/* @jsx\u{85} foo*/", "", "foo"), - ("/* @jsx \u{85}foo*/", "", "foo"), - ("/* @jsx\u{85} \u{85}foo*/", "", "foo"), - ("/* @jsx\u{85}\u{85}\u{85}foo*/", "", "foo"), - ("/* @jsx foo\u{85}*/", "", "foo"), - ("/* @jsx foo\u{85} */", "", "foo"), - ("/* @jsx foo \u{85}*/", "", "foo"), - ("/* @jsx foo\u{85} \u{85}*/", "", "foo"), - ("/* @jsx foo\u{85}\u{85}\u{85}*/", "", "foo"), + ( + "blah blah\n * @jsx h \n * blah blah\n * @jsxRuntime classic \n * blah blah", + &[(PragmaType::Jsx, "h"), (PragmaType::JsxRuntime, "classic")], + ), + // Pragma without value + ("@jsx", &[]), + ("@jsxRuntime", &[]), + // Other invalid pragmas surrounding valid one + ("@moon @jsx h @moon", &[(PragmaType::Jsx, "h")]), + ("@jsxX @jsx h @jsxX", &[(PragmaType::Jsx, "h")]), + ("@jsxMoon @jsx h @jsxMoon", &[(PragmaType::Jsx, "h")]), + ("@jsx @jsx h", &[(PragmaType::Jsx, "@jsx")]), ]; - run_tests( - cases - .into_iter() - .map(|(comment_str, keyword, remainder)| (comment_str, Some((keyword, remainder)))), - ); - } + let prefixes = ["", " ", "\n\n", "*\n* "]; + let postfixes = ["", " ", "\n\n", "\n*"]; - #[test] - fn test_trim_end() { - let cases = [ - // Empty - ("", ""), - (" ", ""), - ("\t", ""), - ("\r", ""), - ("\n", ""), - ("\u{0B}", ""), - ("\u{0C}", ""), - (" \t \n \r\n \u{0B} \u{0C} ", ""), - // Single char - ("a", "a"), - ("a ", "a"), - ("a\t", "a"), - ("a\r", "a"), - ("a\n", "a"), - ("a\u{0B}", "a"), - ("a\u{0C}", "a"), - ("a \t \n \r\n \u{0B} \u{0C} ", "a"), - // Multiple chars - ("abc", "abc"), - ("abc ", "abc"), - ("abc\t", "abc"), - ("abc\r", "abc"), - ("abc\n", "abc"), - ("abc\u{0B}", "abc"), - ("abc\u{0C}", "abc"), - ("abc \t \n \r\n \u{0B} \u{0C} ", "abc"), - // Unicode whitespace - ("\u{85}", ""), - ("\u{85}\u{85}\u{85}", ""), - ("a\u{85}", "a"), - ("a\u{85}\u{85}\u{85}", "a"), - ("abc\u{85}", "abc"), - ("abc\u{85}\u{85}\u{85}", "abc"), - // Spaces on start - (" abc", " abc"), - (" abc ", " abc"), - ]; + for (comment_str, expected) in cases { + for prefix in prefixes { + for postfix in postfixes { + let comment_str = format!("{prefix}{comment_str}{postfix}"); + let mut comment_str = comment_str.as_str(); + let mut pragmas = vec![]; + while let Some((pragma_type, value, remaining)) = find_jsx_pragma(comment_str) { + pragmas.push((pragma_type, value)); + comment_str = remaining; + } + assert_eq!(&pragmas, expected); + } + } - for (str, expected) in cases { - assert_eq!(trim_end(str), expected); + let mut comment_str = *comment_str; + let mut pragmas = vec![]; + while let Some((pragma_type, value, remaining)) = find_jsx_pragma(comment_str) { + pragmas.push((pragma_type, value)); + comment_str = remaining; + } + assert_eq!(&pragmas, expected); } } } diff --git a/tasks/coverage/snapshots/transformer_typescript.snap b/tasks/coverage/snapshots/transformer_typescript.snap index ec719d14518aa..15a7856f209c8 100644 --- a/tasks/coverage/snapshots/transformer_typescript.snap +++ b/tasks/coverage/snapshots/transformer_typescript.snap @@ -2,7 +2,7 @@ commit: 15392346 transformer_typescript Summary: AST Parsed : 6528/6528 (100.00%) -Positive Passed: 6523/6528 (99.92%) +Positive Passed: 6524/6528 (99.94%) Mismatch: tasks/coverage/typescript/tests/cases/compiler/esDecoratorsClassFieldsCrash.ts Mismatch: tasks/coverage/typescript/tests/cases/conformance/classes/propertyMemberDeclarations/autoAccessor2.ts @@ -11,5 +11,3 @@ Mismatch: tasks/coverage/typescript/tests/cases/conformance/esDecorators/classDe Mismatch: tasks/coverage/typescript/tests/cases/conformance/esDecorators/classExpression/esDecorators-classExpression-commentPreservation.ts -Mismatch: tasks/coverage/typescript/tests/cases/conformance/jsx/inline/inlineJsxAndJsxFragPragmaOverridesCompilerOptions.tsx - diff --git a/tasks/transform_conformance/snapshots/babel.snap.md b/tasks/transform_conformance/snapshots/babel.snap.md index 5abe927b7eb17..7159d5113d019 100644 --- a/tasks/transform_conformance/snapshots/babel.snap.md +++ b/tasks/transform_conformance/snapshots/babel.snap.md @@ -1,6 +1,6 @@ commit: 578ac4df -Passed: 713/1191 +Passed: 712/1190 # All Passed: * babel-plugin-transform-logical-assignment-operators @@ -2713,7 +2713,7 @@ x Output mismatch x Output mismatch -# babel-plugin-transform-react-jsx (133/145) +# babel-plugin-transform-react-jsx (132/144) * autoImport/after-polyfills-compiled-to-cjs/input.mjs x Output mismatch diff --git a/tasks/transform_conformance/src/constants.rs b/tasks/transform_conformance/src/constants.rs index 37d16ab33593d..942f3b406bf6f 100644 --- a/tasks/transform_conformance/src/constants.rs +++ b/tasks/transform_conformance/src/constants.rs @@ -80,6 +80,9 @@ pub const SKIP_TESTS: &[&str] = &[ "babel-plugin-transform-typescript/test/fixtures/node-extensions/type-param-arrow-in-cts", "babel-plugin-transform-typescript/test/fixtures/node-extensions/type-param-arrow-in-mts", "babel-plugin-transform-typescript/test/fixtures/node-extensions/with-in-mts", + // We follow ESBuild on JSX pragma parsing + // https://github.com/oxc-project/oxc/issues/10955 + "babel-plugin-transform-react-jsx/test/fixtures/react/should-not-allow-jsx-pragma-to-be-anywhere-in-comment", // Report error for deprecate option or oxc doesn’t follow error message "babel-plugin-transform-typescript/test/fixtures/opts/allowDeclareFields", "babel-plugin-transform-react-jsx/test/fixtures/react-automatic/should-throw-when-filter-is-specified",