Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions crates/oxc_formatter/src/print/import_declaration.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use oxc_allocator::Vec;
use oxc_ast::ast::*;
use oxc_span::GetSpan;
use oxc_syntax::identifier::is_identifier_name_patched;

use crate::{
Format, FormatTrailingCommas, JsLabels, TrailingSeparator,
Expand All @@ -10,9 +11,7 @@ use crate::{
Formatter, prelude::*, separated::FormatSeparatedIter, trivia::FormatLeadingComments,
},
print::semicolon::OptionalSemicolon,
utils::string::{
FormatLiteralStringToken, StringLiteralParentKind, is_identifier_name_patched,
},
utils::string::{FormatLiteralStringToken, StringLiteralParentKind},
write,
};

Expand Down
3 changes: 2 additions & 1 deletion crates/oxc_formatter/src/utils/object.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use oxc_ast::ast::*;
use oxc_span::GetSpan;
use oxc_syntax::identifier::is_identifier_name_patched;

use crate::{
Buffer, Format,
ast_nodes::{AstNode, AstNodes},
formatter::Formatter,
utils::{
string::{FormatLiteralStringToken, StringLiteralParentKind, is_identifier_name_patched},
string::{FormatLiteralStringToken, StringLiteralParentKind},
tailwindcss::{tailwind_context_for_string_literal, write_tailwind_string_literal},
},
write,
Expand Down
12 changes: 1 addition & 11 deletions crates/oxc_formatter/src/utils/string.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{borrow::Cow, ops::Deref};

use oxc_span::SourceType;
use oxc_syntax::identifier::{is_identifier_part, is_identifier_start};
use oxc_syntax::identifier::is_identifier_name_patched;
use unicode_width::UnicodeWidthStr;

use crate::{
Expand Down Expand Up @@ -572,16 +572,6 @@ fn normalize_jsx_string(
(Cow::Owned(result), chosen_quote)
}

/// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT
/// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to
/// 15.
/// <https://github.com/oxc-project/unicode-id-start/pull/3>
pub fn is_identifier_name_patched(content: &str) -> bool {
let mut chars = content.chars();
chars.next().is_some_and(is_identifier_start)
&& chars.all(|c| is_identifier_part(c) && c != '・' && c != '・')
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use oxc_allocator::TakeIn;
use oxc_ast::ast::*;
use oxc_syntax::identifier::is_identifier_name_patched;

use crate::TraverseCtx;

Expand All @@ -16,7 +17,7 @@ impl<'a> PeepholeOptimizations {
pub fn convert_to_dotted_properties(expr: &mut MemberExpression<'a>, ctx: &TraverseCtx<'a>) {
let MemberExpression::ComputedMemberExpression(e) = expr else { return };
let Expression::StringLiteral(s) = &e.expression else { return };
if TraverseCtx::is_identifier_name_patched(&s.value) {
if is_identifier_name_patched(&s.value) {
let property = ctx.ast.identifier_name(s.span, s.value);
*expr =
MemberExpression::StaticMemberExpression(ctx.ast.alloc_static_member_expression(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use oxc_span::GetSpan;
use oxc_span::SPAN;
use oxc_syntax::precedence::GetPrecedence;
use oxc_syntax::{
identifier::is_identifier_name_patched,
number::NumberBase,
operator::{BinaryOperator, UnaryOperator},
};
Expand Down Expand Up @@ -1276,7 +1277,7 @@ impl<'a> PeepholeOptimizations {
}
PropertyKey::StringLiteral(s) => {
let value = s.value.as_str();
if TraverseCtx::is_identifier_name_patched(value) {
if is_identifier_name_patched(value) {
*computed = false;
*key = PropertyKey::StaticIdentifier(
ctx.ast.alloc_identifier_name(s.span, s.value),
Expand Down
16 changes: 1 addition & 15 deletions crates/oxc_minifier/src/traverse_context/ecma_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ use oxc_ecmascript::{
};
use oxc_semantic::{IsGlobalReference, SymbolId};
use oxc_span::format_atom;
use oxc_syntax::{
identifier::{is_identifier_part, is_identifier_start},
reference::ReferenceId,
scope::ScopeFlags,
};
use oxc_syntax::{reference::ReferenceId, scope::ScopeFlags};

use crate::{
generated::ancestor::Ancestor, options::CompressOptions, state::MinifierState,
Expand Down Expand Up @@ -300,16 +296,6 @@ impl<'a> TraverseCtx<'a, MinifierState<'a>> {
Some(f64::from(int_value))
}

/// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT
/// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to
/// 15.
/// <https://github.com/oxc-project/unicode-id-start/pull/3>
pub fn is_identifier_name_patched(s: &str) -> bool {
let mut chars = s.chars();
chars.next().is_some_and(is_identifier_start)
&& chars.all(|c| is_identifier_part(c) && c != '・' && c != '・')
}

/// Whether the closest function scope is created by an async generator
pub fn is_closest_function_scope_an_async_generator(&self) -> bool {
self.ancestors()
Expand Down
50 changes: 48 additions & 2 deletions crates/oxc_syntax/src/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,27 @@ pub fn is_identifier_part_unicode(c: char) -> bool {
is_id_continue_unicode(c) || c == ZWNJ || c == ZWJ
}

/// U+30FB KATAKANA MIDDLE DOT
const KATAKANA_MIDDLE_DOT: char = '・';
/// U+FF65 HALFWIDTH KATAKANA MIDDLE DOT
const HALFWIDTH_KATAKANA_MIDDLE_DOT: char = '・';

/// Determine if a string is a valid JS identifier.
#[expect(clippy::missing_panics_doc)]
pub fn is_identifier_name(name: &str) -> bool {
is_identifier_name_impl::<false>(name)
}

/// `is_identifier_name` patched with KATAKANA MIDDLE DOT and HALFWIDTH KATAKANA MIDDLE DOT.
///
/// Otherwise `({ 'x・': 0 })` gets converted to `({ x・: 0 })`, which breaks in Unicode 4.1 to
/// 15.
///
/// <https://github.com/oxc-project/unicode-id-start/pull/3>
pub fn is_identifier_name_patched(name: &str) -> bool {
is_identifier_name_impl::<true>(name)
}

fn is_identifier_name_impl<const PATCHED: bool>(name: &str) -> bool {
// This function contains a fast path for ASCII (common case), iterating over bytes and using
// the cheap `is_identifier_start_ascii` and `is_identifier_part_ascii` to test bytes.
// Only if a Unicode char is found, fall back to iterating over `char`s, and using the more
Expand Down Expand Up @@ -245,7 +263,13 @@ pub fn is_identifier_name(name: &str) -> bool {
};

// A Unicode char was found - search rest of string as Unicode
chars.all(is_identifier_part)
if PATCHED {
chars.all(|c| {
is_identifier_part(c) && c != KATAKANA_MIDDLE_DOT && c != HALFWIDTH_KATAKANA_MIDDLE_DOT
})
} else {
chars.all(is_identifier_part)
}
}

#[test]
Expand Down Expand Up @@ -334,3 +358,25 @@ fn is_identifier_name_false() {
assert!(!is_identifier_name(str));
}
}

#[test]
fn is_identifier_name_patched_rejects_katakana_dots() {
// Katakana middle dots are valid identifier parts per Unicode 15+,
// but we reject them in the patched version for compat with Unicode 4.1-15.
// U+30FB KATAKANA MIDDLE DOT
assert!(is_identifier_name("x\u{30FB}"));
assert!(!is_identifier_name_patched("x\u{30FB}"));
// U+FF65 HALFWIDTH KATAKANA MIDDLE DOT
assert!(is_identifier_name("x\u{FF65}"));
assert!(!is_identifier_name_patched("x\u{FF65}"));
// As start character (neither is a valid start, so both should reject)
assert!(!is_identifier_name("\u{30FB}"));
assert!(!is_identifier_name_patched("\u{30FB}"));
// Normal identifiers still work
assert!(is_identifier_name_patched("foo"));
assert!(is_identifier_name_patched("_bar"));
assert!(is_identifier_name_patched("$baz"));
assert!(is_identifier_name_patched("µ"));
// Empty string rejected
assert!(!is_identifier_name_patched(""));
}
Loading