diff --git a/crates/oxc_formatter/src/formatter/builders.rs b/crates/oxc_formatter/src/formatter/builders.rs index 2bb4b2e437aec..b91e85672a2bd 100644 --- a/crates/oxc_formatter/src/formatter/builders.rs +++ b/crates/oxc_formatter/src/formatter/builders.rs @@ -12,7 +12,7 @@ use oxc_syntax::identifier::{is_line_terminator, is_white_space_single_line}; use super::{ Argument, Arguments, Buffer, Comments, GroupId, TextSize, VecBuffer, format_element::{ - self, + self, TextWidth, tag::{Condition, Tag}, }, prelude::{ @@ -299,17 +299,23 @@ impl std::fmt::Debug for Token { pub fn text(text: &str) -> Text<'_> { // FIXME // debug_assert_no_newlines(text); - Text { text } + Text { text, width: None } } #[derive(Eq, PartialEq)] pub struct Text<'a> { text: &'a str, + width: Option, } impl<'a> Format<'a> for Text<'a> { fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { - f.write_element(FormatElement::Text { text: self.text }) + f.write_element(FormatElement::Text { + text: self.text, + width: self + .width + .unwrap_or_else(|| TextWidth::from_text(self.text, f.options().indent_width)), + }) } } @@ -350,6 +356,7 @@ impl<'a> Format<'a> for SyntaxTokenCowSlice<'a> { Cow::Owned(text) => f.write_element(FormatElement::Text { // TODO: Should use arena String to replace Cow::Owned. text: f.context().allocator().alloc_str(text), + width: TextWidth::from_text(text, f.options().indent_width), }), } } diff --git a/crates/oxc_formatter/src/formatter/format_element/document.rs b/crates/oxc_formatter/src/formatter/format_element/document.rs index 44b585b9a7c97..2414398d83f32 100644 --- a/crates/oxc_formatter/src/formatter/format_element/document.rs +++ b/crates/oxc_formatter/src/formatter/format_element/document.rs @@ -210,10 +210,14 @@ impl<'a> Format<'a> for &[FormatElement<'a>] { let new_element = match element { // except for static text because source_position is unknown FormatElement::Token { .. } => element.clone(), - FormatElement::Text { text } => { + FormatElement::Text { text, width } => { let text = text.cow_replace('"', "\\\""); FormatElement::Text { text: f.context().allocator().alloc_str(&text), + width: TextWidth::from_text( + &text, + f.options().indent_width, + ), } } _ => unreachable!(), diff --git a/crates/oxc_formatter/src/formatter/format_element/mod.rs b/crates/oxc_formatter/src/formatter/format_element/mod.rs index ad423d4e024ce..fee46fe37de3b 100644 --- a/crates/oxc_formatter/src/formatter/format_element/mod.rs +++ b/crates/oxc_formatter/src/formatter/format_element/mod.rs @@ -4,8 +4,13 @@ pub mod tag; // #[cfg(target_pointer_width = "64")] // use biome_rowan::static_assert; use std::hash::{Hash, Hasher}; +use std::num::NonZeroU32; use std::{borrow::Cow, ops::Deref, rc::Rc}; +use unicode_width::UnicodeWidthChar; + +use crate::{IndentWidth, TabWidth}; + use super::{ TagKind, TextSize, format_element::tag::{LabelId, Tag}, @@ -49,6 +54,7 @@ pub enum FormatElement<'a> { /// An arbitrary text that can contain tabs, newlines, and unicode characters. Text { text: &'a str, + width: TextWidth, }, /// Prevents that line suffixes move past this boundary. Forces the printer to print any pending @@ -238,7 +244,7 @@ impl FormatElements for FormatElement<'_> { FormatElement::ExpandParent => true, FormatElement::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(), FormatElement::Line(line_mode) => line_mode.will_break(), - FormatElement::Text { text } => text.contains('\n'), + FormatElement::Text { text, width } => width.is_multiline(), FormatElement::Interned(interned) => interned.will_break(), // Traverse into the most flat version because the content is guaranteed to expand when even // the most flat version contains some content that forces a break. @@ -363,3 +369,65 @@ pub trait FormatElements { /// * the last element is an end tag of `kind` fn end_tag(&self, kind: TagKind) -> Option<&Tag>; } + +/// New-type wrapper for a single-line text unicode width. +/// Mainly to prevent access to the inner value. +/// +/// ## Representation +/// +/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`], +/// allowing [`TextWidth`] or [`Option`] fit in 4 bytes rather than 8. +/// +/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1. +/// This imprecision shouldn't matter in practice because either text are longer than any configured line width +/// and thus, the text should break. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Width(NonZeroU32); + +impl Width { + pub(crate) const fn new(width: u32) -> Self { + Width(NonZeroU32::MIN.saturating_add(width)) + } + + pub const fn value(self) -> u32 { + self.0.get() - 1 + } +} + +/// The pre-computed unicode width of a text if it is a single-line text or a marker +/// that it is a multiline text if it contains a line feed. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum TextWidth { + Width(Width), + Multiline, +} + +impl TextWidth { + pub fn from_text(text: &str, indent_width: IndentWidth) -> TextWidth { + let mut width = 0u32; + + #[expect(clippy::cast_lossless)] + for c in text.chars() { + let char_width = match c { + '\t' => indent_width.value(), + '\n' => return TextWidth::Multiline, + #[expect(clippy::cast_possible_truncation)] + c => c.width().unwrap_or(0) as u8, + }; + width += char_width as u32; + } + + Self::Width(Width::new(width)) + } + + pub const fn width(self) -> Option { + match self { + TextWidth::Width(width) => Some(width), + TextWidth::Multiline => None, + } + } + + pub(crate) const fn is_multiline(self) -> bool { + matches!(self, TextWidth::Multiline) + } +} diff --git a/crates/oxc_formatter/src/formatter/printer/mod.rs b/crates/oxc_formatter/src/formatter/printer/mod.rs index 1d6bdfd1cb399..5d9fa5661dee6 100644 --- a/crates/oxc_formatter/src/formatter/printer/mod.rs +++ b/crates/oxc_formatter/src/formatter/printer/mod.rs @@ -16,6 +16,7 @@ use super::{ format_element::{BestFittingElement, LineMode, PrintMode, document::Document, tag::Condition}, prelude::{ Tag::EndFill, + TextWidth, tag::{DedentMode, Tag, TagKind}, }, printer::{ @@ -94,8 +95,8 @@ impl<'a> Printer<'a> { } FormatElement::Token { text } => self.print_text(Text::Token(text)), - FormatElement::Text { text } => { - self.print_text(Text::Text(text)); + FormatElement::Text { text, width } => { + self.print_text(Text::Text { text, width: *width }); } FormatElement::Line(line_mode) => { if args.mode().is_flat() { @@ -616,9 +617,14 @@ impl<'a> Printer<'a> { self.state.buffer.push_str(text); self.state.line_width += text.len(); } - Text::Text(text) => { - for char in text.chars() { - self.print_char(char); + Text::Text { text, width } => { + if let Some(width) = width.width() { + self.state.buffer.push_str(text); + self.state.line_width += width.value() as usize; + } else { + for char in text.chars() { + self.print_char(char); + } } } } @@ -1002,8 +1008,8 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { FormatElement::Token { text } => { return Ok(self.fits_text(Text::Token(text))); } - FormatElement::Text { text, .. } => { - return Ok(self.fits_text(Text::Text(text))); + FormatElement::Text { text, width } => { + return Ok(self.fits_text(Text::Text { text, width: *width })); } FormatElement::LineSuffixBoundary => { @@ -1150,22 +1156,17 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { Text::Token(text) => { self.state.line_width += text.len(); } - Text::Text(text) => { - for c in text.chars() { - let char_width = match c { - '\t' => self.options().indent_width.value() as usize, - '\n' => { - return if self.must_be_flat - || self.state.line_width > usize::from(self.options().print_width) - { - Fits::No - } else { - Fits::Yes - }; - } - c => c.width().unwrap_or(0), + Text::Text { text, width } => { + if let Some(width) = width.width() { + self.state.line_width += width.value() as usize; + } else { + return if self.must_be_flat + || self.state.line_width > usize::from(self.options().print_width) + { + Fits::No + } else { + Fits::Yes }; - self.state.line_width += char_width; } } } @@ -1262,13 +1263,13 @@ enum Text<'a> { /// ASCII only text that contains no line breaks or tab characters. Token(&'a str), /// Arbitrary text. May contain `\n` line breaks, tab characters, or unicode characters. - Text(&'a str), + Text { text: &'a str, width: TextWidth }, } impl Text<'_> { fn len(&self) -> usize { match self { - Text::Token(text) | Text::Text(text) => text.len(), + Text::Token(text) | Text::Text { text, .. } => text.len(), } } } diff --git a/crates/oxc_formatter/src/ir_transform/sort_imports/import_unit.rs b/crates/oxc_formatter/src/ir_transform/sort_imports/import_unit.rs index feaef332e54a8..9059c0496f3bb 100644 --- a/crates/oxc_formatter/src/ir_transform/sort_imports/import_unit.rs +++ b/crates/oxc_formatter/src/ir_transform/sort_imports/import_unit.rs @@ -132,7 +132,7 @@ impl SortableImport { // Strip quotes and params let source = match &elements[*source_idx] { - FormatElement::Text { text } => *text, + FormatElement::Text { text, .. } => *text, _ => unreachable!( "`source_idx` must point to either `LocatedTokenText` or `Text` in the `elements`." ), diff --git a/crates/oxc_formatter/src/ir_transform/sort_imports/source_line.rs b/crates/oxc_formatter/src/ir_transform/sort_imports/source_line.rs index dfcd35fd5a22c..d3a057d684f7c 100644 --- a/crates/oxc_formatter/src/ir_transform/sort_imports/source_line.rs +++ b/crates/oxc_formatter/src/ir_transform/sort_imports/source_line.rs @@ -65,7 +65,7 @@ impl SourceLine { // /* comment */ /* comment */ // ``` let is_comment_only = range.clone().all(|idx| match &elements[idx] { - FormatElement::Text { text } => text.starts_with("//") || text.starts_with("/*"), + FormatElement::Text { text, width } => text.starts_with("//") || text.starts_with("/*"), FormatElement::Line(LineMode::Soft | LineMode::SoftOrSpace) | FormatElement::Space => { true } diff --git a/crates/oxc_formatter/src/options.rs b/crates/oxc_formatter/src/options.rs index cad3834fcd6c5..84fd598a95c6b 100644 --- a/crates/oxc_formatter/src/options.rs +++ b/crates/oxc_formatter/src/options.rs @@ -496,6 +496,13 @@ impl From for Quote { #[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)] pub struct TabWidth(u8); +impl TabWidth { + /// Returns the numeric value for this [TabWidth] + pub fn value(self) -> u8 { + self.0 + } +} + impl From for TabWidth { fn from(value: u8) -> Self { TabWidth(value)