diff --git a/crates/oxc_formatter/src/formatter/format_element/mod.rs b/crates/oxc_formatter/src/formatter/format_element/mod.rs index a6479d42e2c79..3cf545b089e38 100644 --- a/crates/oxc_formatter/src/formatter/format_element/mod.rs +++ b/crates/oxc_formatter/src/formatter/format_element/mod.rs @@ -7,7 +7,7 @@ use std::hash::{Hash, Hasher}; use std::ptr; use std::{borrow::Cow, ops::Deref}; -use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; +use unicode_width::UnicodeWidthStr; use oxc_allocator::Vec as ArenaVec; @@ -452,7 +452,10 @@ impl TextWidth { /// Calculates width from text, handling tabs, newlines, and Unicode. /// - /// Returns early on newline detection for efficiency. + /// NOTE: Uses `UnicodeWidthStr::width()` for accurate emoji sequence handling. + /// Counting by `char` can lead to incorrect widths for complex Unicode sequences. + /// e.g. "🗑️" (U+1F5D1 U+FE0F) is a single emoji with width 2, but counting chars gives width 1. + #[expect(clippy::cast_possible_truncation)] pub fn from_text(text: &str, indent_width: IndentWidth) -> TextWidth { // Fast path for empty text if text.is_empty() { @@ -460,17 +463,22 @@ impl TextWidth { } let mut width = 0u32; - - #[expect(clippy::cast_lossless)] - for c in text.chars() { - let char_width = match c { - '\t' => indent_width.value(), - '\n' => return Self::multiline(width), - #[expect(clippy::cast_possible_truncation)] - c => c.width().unwrap_or(0) as u8, - }; - width += char_width as u32; + let mut segment_start = 0; + for (i, c) in text.char_indices() { + match c { + '\t' => { + width += text[segment_start..i].width() as u32; + width += u32::from(indent_width.value()); + segment_start = i + 1; // Skip the tab character + } + '\n' => { + width += text[segment_start..i].width() as u32; + return Self::multiline(width); + } + _ => {} + } } + width += text[segment_start..].width() as u32; Self::single(width) } @@ -554,6 +562,31 @@ mod tests { debug_assert!(!width.is_multiline()); } + #[test] + fn from_text_handles_emoji_sequences() { + use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; + + // Emoji with variation selector: 🗑️ = U+1F5D1 + U+FE0F + let emoji = "🗑️"; + + // Counting by char gives wrong width + let wrong: usize = emoji.chars().filter_map(UnicodeWidthChar::width).sum(); + debug_assert_eq!(wrong, 1); + // Need to count by str for correct width + debug_assert_eq!(emoji.width(), 2); + // Verify `TextWidth` also gets it right + let width = TextWidth::from_text(emoji, indent_width(2)); + debug_assert_eq!(width.value(), 2); + + // Emoji with text + let width = TextWidth::from_text("🗑️ DELETE", indent_width(2)); + debug_assert_eq!(width.value(), 9); // 2 (emoji) + 1 (space) + 6 (DELETE) + + // Another emoji with variation selector: ⚠️ = U+26A0 + U+FE0F + let width = TextWidth::from_text("⚠️", indent_width(2)); + debug_assert_eq!(width.value(), 2); + } + #[test] fn from_text_empty_returns_zero() { let width = TextWidth::from_text("", indent_width(2)); diff --git a/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js b/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js new file mode 100644 index 0000000000000..424125c36c5e6 --- /dev/null +++ b/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js @@ -0,0 +1,16 @@ +//345678901234567890123456789012345678901234567890123456789012345678901234567890 +// 80| +export async function deleteTestWorkspaceAPI() { + // THIS SHOULD BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} +export async function deleteTestWorkspaceAPI2() { + // THIS SHOULD NOT BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} + +// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1. +// const _ = "..."; = 13 chars +// + 34 emojis = width 68, +// Total 81 chars, should break with `printWidth: 80`. +const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️"; diff --git a/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js.snap b/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js.snap new file mode 100644 index 0000000000000..b5cbae0d1388a --- /dev/null +++ b/crates/oxc_formatter/tests/fixtures/js/unicode/emoji-sequences.js.snap @@ -0,0 +1,66 @@ +--- +source: crates/oxc_formatter/tests/fixtures/mod.rs +--- +==================== Input ==================== +//345678901234567890123456789012345678901234567890123456789012345678901234567890 +// 80| +export async function deleteTestWorkspaceAPI() { + // THIS SHOULD BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} +export async function deleteTestWorkspaceAPI2() { + // THIS SHOULD NOT BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} + +// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1. +// const _ = "..."; = 13 chars +// + 34 emojis = width 68, +// Total 81 chars, should break with `printWidth: 80`. +const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️"; + +==================== Output ==================== +------------------ +{ printWidth: 80 } +------------------ +//345678901234567890123456789012345678901234567890123456789012345678901234567890 +// 80| +export async function deleteTestWorkspaceAPI() { + // THIS SHOULD BREAK WITH PRINT WIDTH 80 + console.log( + `🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`, + ); +} +export async function deleteTestWorkspaceAPI2() { + // THIS SHOULD NOT BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} + +// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1. +// const _ = "..."; = 13 chars +// + 34 emojis = width 68, +// Total 81 chars, should break with `printWidth: 80`. +const _ = + "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️"; + +------------------- +{ printWidth: 100 } +------------------- +//345678901234567890123456789012345678901234567890123456789012345678901234567890 +// 80| +export async function deleteTestWorkspaceAPI() { + // THIS SHOULD BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} +export async function deleteTestWorkspaceAPI2() { + // THIS SHOULD NOT BREAK WITH PRINT WIDTH 80 + console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`); +} + +// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1. +// const _ = "..."; = 13 chars +// + 34 emojis = width 68, +// Total 81 chars, should break with `printWidth: 80`. +const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️"; + +===================== End =====================