diff --git a/crates/oxc_formatter/src/print/import_declaration.rs b/crates/oxc_formatter/src/print/import_declaration.rs index 6645d3d2dce2a..0c734df565928 100644 --- a/crates/oxc_formatter/src/print/import_declaration.rs +++ b/crates/oxc_formatter/src/print/import_declaration.rs @@ -1,6 +1,7 @@ use oxc_allocator::Vec; use oxc_ast::ast::*; use oxc_span::GetSpan; +use oxc_syntax::identifier::is_identifier_name_patched; use crate::{ Format, FormatTrailingCommas, JsLabels, TrailingSeparator, @@ -10,9 +11,7 @@ use crate::{ Formatter, prelude::*, separated::FormatSeparatedIter, trivia::FormatLeadingComments, }, print::semicolon::OptionalSemicolon, - utils::string::{ - FormatLiteralStringToken, StringLiteralParentKind, is_identifier_name_patched, - }, + utils::string::{FormatLiteralStringToken, StringLiteralParentKind}, write, }; diff --git a/crates/oxc_formatter/src/utils/object.rs b/crates/oxc_formatter/src/utils/object.rs index e7be4686ac8ff..8d9aff4c3095c 100644 --- a/crates/oxc_formatter/src/utils/object.rs +++ b/crates/oxc_formatter/src/utils/object.rs @@ -1,12 +1,13 @@ use oxc_ast::ast::*; use oxc_span::GetSpan; +use oxc_syntax::identifier::is_identifier_name_patched; use crate::{ Buffer, Format, ast_nodes::{AstNode, AstNodes}, formatter::Formatter, utils::{ - string::{FormatLiteralStringToken, StringLiteralParentKind, is_identifier_name_patched}, + string::{FormatLiteralStringToken, StringLiteralParentKind}, tailwindcss::{tailwind_context_for_string_literal, write_tailwind_string_literal}, }, write, diff --git a/crates/oxc_formatter/src/utils/string.rs b/crates/oxc_formatter/src/utils/string.rs index 65534eb98ece7..7cca29abe4ef3 100644 --- a/crates/oxc_formatter/src/utils/string.rs +++ b/crates/oxc_formatter/src/utils/string.rs @@ -1,7 +1,7 @@ use std::{borrow::Cow, ops::Deref}; use oxc_span::SourceType; -use oxc_syntax::identifier::{is_identifier_part, is_identifier_start}; +use oxc_syntax::identifier::is_identifier_name_patched; use unicode_width::UnicodeWidthStr; use crate::{ @@ -572,16 +572,6 @@ fn normalize_jsx_string( (Cow::Owned(result), chosen_quote) } -/// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT -/// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to -/// 15. -/// -pub fn is_identifier_name_patched(content: &str) -> bool { - let mut chars = content.chars(); - chars.next().is_some_and(is_identifier_start) - && chars.all(|c| is_identifier_part(c) && c != '・' && c != '・') -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/oxc_minifier/src/peephole/convert_to_dotted_properties.rs b/crates/oxc_minifier/src/peephole/convert_to_dotted_properties.rs index 25809440e1742..f54b8545f0a5a 100644 --- a/crates/oxc_minifier/src/peephole/convert_to_dotted_properties.rs +++ b/crates/oxc_minifier/src/peephole/convert_to_dotted_properties.rs @@ -1,5 +1,6 @@ use oxc_allocator::TakeIn; use oxc_ast::ast::*; +use oxc_syntax::identifier::is_identifier_name_patched; use crate::TraverseCtx; @@ -16,7 +17,7 @@ impl<'a> PeepholeOptimizations { pub fn convert_to_dotted_properties(expr: &mut MemberExpression<'a>, ctx: &TraverseCtx<'a>) { let MemberExpression::ComputedMemberExpression(e) = expr else { return }; let Expression::StringLiteral(s) = &e.expression else { return }; - if TraverseCtx::is_identifier_name_patched(&s.value) { + if is_identifier_name_patched(&s.value) { let property = ctx.ast.identifier_name(s.span, s.value); *expr = MemberExpression::StaticMemberExpression(ctx.ast.alloc_static_member_expression( diff --git a/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs b/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs index 1e83677b50f78..12f02146665d3 100644 --- a/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs +++ b/crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs @@ -12,6 +12,7 @@ use oxc_span::GetSpan; use oxc_span::SPAN; use oxc_syntax::precedence::GetPrecedence; use oxc_syntax::{ + identifier::is_identifier_name_patched, number::NumberBase, operator::{BinaryOperator, UnaryOperator}, }; @@ -1276,7 +1277,7 @@ impl<'a> PeepholeOptimizations { } PropertyKey::StringLiteral(s) => { let value = s.value.as_str(); - if TraverseCtx::is_identifier_name_patched(value) { + if is_identifier_name_patched(value) { *computed = false; *key = PropertyKey::StaticIdentifier( ctx.ast.alloc_identifier_name(s.span, s.value), diff --git a/crates/oxc_minifier/src/traverse_context/ecma_context.rs b/crates/oxc_minifier/src/traverse_context/ecma_context.rs index d0f47fe90bca2..4c9c991660c7c 100644 --- a/crates/oxc_minifier/src/traverse_context/ecma_context.rs +++ b/crates/oxc_minifier/src/traverse_context/ecma_context.rs @@ -11,11 +11,7 @@ use oxc_ecmascript::{ }; use oxc_semantic::{IsGlobalReference, SymbolId}; use oxc_span::format_atom; -use oxc_syntax::{ - identifier::{is_identifier_part, is_identifier_start}, - reference::ReferenceId, - scope::ScopeFlags, -}; +use oxc_syntax::{reference::ReferenceId, scope::ScopeFlags}; use crate::{ generated::ancestor::Ancestor, options::CompressOptions, state::MinifierState, @@ -300,16 +296,6 @@ impl<'a> TraverseCtx<'a, MinifierState<'a>> { Some(f64::from(int_value)) } - /// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT - /// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to - /// 15. - /// - pub fn is_identifier_name_patched(s: &str) -> bool { - let mut chars = s.chars(); - chars.next().is_some_and(is_identifier_start) - && chars.all(|c| is_identifier_part(c) && c != '・' && c != '・') - } - /// Whether the closest function scope is created by an async generator pub fn is_closest_function_scope_an_async_generator(&self) -> bool { self.ancestors() diff --git a/crates/oxc_syntax/src/identifier.rs b/crates/oxc_syntax/src/identifier.rs index 7fbfcca3f3783..c10f87156d4de 100644 --- a/crates/oxc_syntax/src/identifier.rs +++ b/crates/oxc_syntax/src/identifier.rs @@ -140,9 +140,27 @@ pub fn is_identifier_part_unicode(c: char) -> bool { is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ } +/// U+30FB KATAKANA MIDDLE DOT +const KATAKANA_MIDDLE_DOT: char = '・'; +/// U+FF65 HALFWIDTH KATAKANA MIDDLE DOT +const HALFWIDTH_KATAKANA_MIDDLE_DOT: char = '・'; + /// Determine if a string is a valid JS identifier. -#[expect(clippy::missing_panics_doc)] pub fn is_identifier_name(name: &str) -> bool { + is_identifier_name_impl::(name) +} + +/// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT. +/// +/// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to +/// 15. +/// +/// +pub fn is_identifier_name_patched(name: &str) -> bool { + is_identifier_name_impl::(name) +} + +fn is_identifier_name_impl(name: &str) -> bool { // This function contains a fast path for ASCII (common case), iterating over bytes and using // the cheap `is_identifier_start_ascii` and `is_identifier_part_ascii` to test bytes. // Only if a Unicode char is found, fall back to iterating over `char`s, and using the more @@ -245,7 +263,13 @@ pub fn is_identifier_name(name: &str) -> bool { }; // A Unicode char was found - search rest of string as Unicode - chars.all(is_identifier_part) + if PATCHED { + chars.all(|c| { + is_identifier_part(c) && c != KATAKANA_MIDDLE_DOT && c != HALFWIDTH_KATAKANA_MIDDLE_DOT + }) + } else { + chars.all(is_identifier_part) + } } #[test] @@ -334,3 +358,25 @@ fn is_identifier_name_false() { assert!(!is_identifier_name(str)); } } + +#[test] +fn is_identifier_name_patched_rejects_katakana_dots() { + // Katakana middle dots are valid identifier parts per Unicode 15+, + // but we reject them in the patched version for compat with Unicode 4.1-15. + // U+30FB KATAKANA MIDDLE DOT + assert!(is_identifier_name("x\u{30FB}")); + assert!(!is_identifier_name_patched("x\u{30FB}")); + // U+FF65 HALFWIDTH KATAKANA MIDDLE DOT + assert!(is_identifier_name("x\u{FF65}")); + assert!(!is_identifier_name_patched("x\u{FF65}")); + // As start character (neither is a valid start, so both should reject) + assert!(!is_identifier_name("\u{30FB}")); + assert!(!is_identifier_name_patched("\u{30FB}")); + // Normal identifiers still work + assert!(is_identifier_name_patched("foo")); + assert!(is_identifier_name_patched("_bar")); + assert!(is_identifier_name_patched("$baz")); + assert!(is_identifier_name_patched("µ")); + // Empty string rejected + assert!(!is_identifier_name_patched("")); +}