Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 45 additions & 12 deletions crates/oxc_formatter/src/formatter/format_element/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::hash::{Hash, Hasher};
use std::ptr;
use std::{borrow::Cow, ops::Deref};

use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
use unicode_width::UnicodeWidthStr;

use oxc_allocator::Vec as ArenaVec;

Expand Down Expand Up @@ -452,25 +452,33 @@ impl TextWidth {

/// Calculates width from text, handling tabs, newlines, and Unicode.
///
/// Returns early on newline detection for efficiency.
/// NOTE: Uses `UnicodeWidthStr::width()` for accurate emoji sequence handling.
/// Counting by `char` can lead to incorrect widths for complex Unicode sequences.
/// e.g. "🗑️" (U+1F5D1 U+FE0F) is a single emoji with width 2, but counting chars gives width 1.
#[expect(clippy::cast_possible_truncation)]
pub fn from_text(text: &str, indent_width: IndentWidth) -> TextWidth {
// Fast path for empty text
if text.is_empty() {
return Self::single(0);
}

let mut width = 0u32;

#[expect(clippy::cast_lossless)]
for c in text.chars() {
let char_width = match c {
'\t' => indent_width.value(),
'\n' => return Self::multiline(width),
#[expect(clippy::cast_possible_truncation)]
c => c.width().unwrap_or(0) as u8,
};
width += char_width as u32;
let mut segment_start = 0;
for (i, c) in text.char_indices() {
match c {
'\t' => {
width += text[segment_start..i].width() as u32;
width += u32::from(indent_width.value());
segment_start = i + 1; // Skip the tab character
}
'\n' => {
width += text[segment_start..i].width() as u32;
return Self::multiline(width);
}
_ => {}
}
}
width += text[segment_start..].width() as u32;

Self::single(width)
}
Expand Down Expand Up @@ -554,6 +562,31 @@ mod tests {
debug_assert!(!width.is_multiline());
}

#[test]
fn from_text_handles_emoji_sequences() {
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

// Emoji with variation selector: 🗑️ = U+1F5D1 + U+FE0F
let emoji = "🗑️";

// Counting by char gives wrong width
let wrong: usize = emoji.chars().filter_map(UnicodeWidthChar::width).sum();
debug_assert_eq!(wrong, 1);
// Need to count by str for correct width
debug_assert_eq!(emoji.width(), 2);
// Verify `TextWidth` also gets it right
let width = TextWidth::from_text(emoji, indent_width(2));
debug_assert_eq!(width.value(), 2);

// Emoji with text
let width = TextWidth::from_text("🗑️ DELETE", indent_width(2));
debug_assert_eq!(width.value(), 9); // 2 (emoji) + 1 (space) + 6 (DELETE)

// Another emoji with variation selector: ⚠️ = U+26A0 + U+FE0F
let width = TextWidth::from_text("⚠️", indent_width(2));
debug_assert_eq!(width.value(), 2);
}

#[test]
fn from_text_empty_returns_zero() {
let width = TextWidth::from_text("", indent_width(2));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//345678901234567890123456789012345678901234567890123456789012345678901234567890
// 80|
export async function deleteTestWorkspaceAPI() {
// THIS SHOULD BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}
export async function deleteTestWorkspaceAPI2() {
// THIS SHOULD NOT BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}

// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1.
// const _ = "..."; = 13 chars
// + 34 emojis = width 68,
// Total 81 chars, should break with `printWidth: 80`.
const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️";
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
---
source: crates/oxc_formatter/tests/fixtures/mod.rs
---
==================== Input ====================
//345678901234567890123456789012345678901234567890123456789012345678901234567890
// 80|
export async function deleteTestWorkspaceAPI() {
// THIS SHOULD BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}
export async function deleteTestWorkspaceAPI2() {
// THIS SHOULD NOT BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}

// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1.
// const _ = "..."; = 13 chars
// + 34 emojis = width 68,
// Total 81 chars, should break with `printWidth: 80`.
const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️";

==================== Output ====================
------------------
{ printWidth: 80 }
------------------
//345678901234567890123456789012345678901234567890123456789012345678901234567890
// 80|
export async function deleteTestWorkspaceAPI() {
// THIS SHOULD BREAK WITH PRINT WIDTH 80
console.log(
`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`,
);
}
export async function deleteTestWorkspaceAPI2() {
// THIS SHOULD NOT BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}

// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1.
// const _ = "..."; = 13 chars
// + 34 emojis = width 68,
// Total 81 chars, should break with `printWidth: 80`.
const _ =
"🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️";

-------------------
{ printWidth: 100 }
-------------------
//345678901234567890123456789012345678901234567890123456789012345678901234567890
// 80|
export async function deleteTestWorkspaceAPI() {
// THIS SHOULD BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETED => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}
export async function deleteTestWorkspaceAPI2() {
// THIS SHOULD NOT BREAK WITH PRINT WIDTH 80
console.log(`🗑️ DELETE => TEST WORKSPACE on ${baseURL} (Team ID: ${teamId})`);
}

// Emoji with variation selector (U+1F5D1 + U+FE0F) has width 2, not 1.
// const _ = "..."; = 13 chars
// + 34 emojis = width 68,
// Total 81 chars, should break with `printWidth: 80`.
const _ = "🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️🗑️";

===================== End =====================
Loading