From 7f93f9ae2b9385e0c407f115291f215149272a11 Mon Sep 17 00:00:00 2001 From: Denis Bezrukov <6227442+denbezrukov@users.noreply.github.com> Date: Mon, 3 Nov 2025 22:35:01 +0200 Subject: [PATCH 1/2] refactor(formatter): memoize text width --- .changeset/long-cars-beam.md | 6 ++ crates/biome_formatter/src/builders.rs | 24 ++++- crates/biome_formatter/src/format_element.rs | 72 ++++++++++++- .../src/format_element/document.rs | 12 ++- crates/biome_formatter/src/lib.rs | 7 +- crates/biome_formatter/src/printer/mod.rs | 101 +++++++++++++----- .../src/utils/children.rs | 4 +- crates/biome_js_formatter/src/utils/jsx.rs | 3 +- 8 files changed, 187 insertions(+), 42 deletions(-) create mode 100644 .changeset/long-cars-beam.md diff --git a/.changeset/long-cars-beam.md b/.changeset/long-cars-beam.md new file mode 100644 index 000000000000..f998e6288d07 --- /dev/null +++ b/.changeset/long-cars-beam.md @@ -0,0 +1,6 @@ +--- +"@biomejs/biome": patch +--- + +Introduces a new TextWidth field on all text FormatElements that stores either the precomputed width of the text or +indicates that it is multiline. diff --git a/crates/biome_formatter/src/builders.rs b/crates/biome_formatter/src/builders.rs index 9dea24d0ad9b..076a8b898a6c 100644 --- a/crates/biome_formatter/src/builders.rs +++ b/crates/biome_formatter/src/builders.rs @@ -1,7 +1,10 @@ use crate::format_element::tag::{Condition, Tag}; use crate::prelude::tag::{DedentMode, GroupMode, LabelId}; use crate::prelude::*; -use crate::{Argument, Arguments, GroupId, TextRange, TextSize, format_element, write}; +use crate::{ + Argument, Arguments, FormatContext, FormatOptions, GroupId, TextRange, TextSize, + format_element, write, +}; use crate::{Buffer, VecBuffer}; use Tag::*; use biome_rowan::{Language, SyntaxNode, SyntaxToken, TextLen, TokenText}; @@ -288,11 +291,15 @@ pub struct Text<'a> { position: TextSize, } -impl Format for Text<'_> { +impl Format for Text<'_> +where + Context: FormatContext, +{ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { f.write_element(FormatElement::Text { text: self.text.to_string().into_boxed_str(), source_position: self.position, + text_width: TextWidth::from_text(self.text, f.options().indent_width()), }) } } @@ -321,7 +328,10 @@ pub struct SyntaxTokenCowSlice<'a, L: Language> { start: TextSize, } -impl Format for SyntaxTokenCowSlice<'_, L> { +impl Format for SyntaxTokenCowSlice<'_, L> +where + Context: FormatContext, +{ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { match &self.text { Cow::Borrowed(text) => { @@ -338,11 +348,13 @@ impl Format for SyntaxTokenCowSlice<'_, L> { f.write_element(FormatElement::LocatedTokenText { slice, source_position: self.start, + text_width: TextWidth::from_text(text, f.options().indent_width()), }) } Cow::Owned(text) => f.write_element(FormatElement::Text { text: text.clone().into_boxed_str(), source_position: self.start, + text_width: TextWidth::from_text(text, f.options().indent_width()), }), } } @@ -375,11 +387,15 @@ pub struct LocatedTokenText { source_position: TextSize, } -impl Format for LocatedTokenText { +impl Format for LocatedTokenText +where + Context: FormatContext, +{ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { f.write_element(FormatElement::LocatedTokenText { slice: self.text.clone(), source_position: self.source_position, + text_width: TextWidth::from_text(&self.text, f.options().indent_width()), }) } } diff --git a/crates/biome_formatter/src/format_element.rs b/crates/biome_formatter/src/format_element.rs index 34a4b07b40a1..e16ca5c12842 100644 --- a/crates/biome_formatter/src/format_element.rs +++ b/crates/biome_formatter/src/format_element.rs @@ -4,13 +4,15 @@ pub mod tag; use crate::format_element::tag::{LabelId, Tag}; use std::borrow::Cow; -use crate::{TagKind, TextSize}; +use crate::{IndentWidth, TagKind, TextSize}; use biome_rowan::TokenText; #[cfg(target_pointer_width = "64")] use biome_rowan::static_assert; use std::hash::{Hash, Hasher}; +use std::num::NonZeroU32; use std::ops::Deref; use std::rc::Rc; +use unicode_width::UnicodeWidthChar; /// Language agnostic IR for formatting source code. /// @@ -37,6 +39,7 @@ pub enum FormatElement { text: Box, /// The start position of the text in the unformatted source code source_position: TextSize, + text_width: TextWidth, }, /// A token for a text that is taken as is from the source code (input text and formatted representation are identical). @@ -46,6 +49,7 @@ pub enum FormatElement { source_position: TextSize, /// The token text slice: TokenText, + text_width: TextWidth, }, /// Prevents that line suffixes move past this boundary. Forces the printer to print any pending @@ -238,8 +242,8 @@ impl FormatElements for FormatElement { Self::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(), Self::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty), - Self::Text { text, .. } => text.contains('\n'), - Self::LocatedTokenText { slice, .. } => slice.contains('\n'), + Self::Text { text_width, .. } => text_width.is_multiline(), + Self::LocatedTokenText { text_width, .. } => text_width.is_multiline(), Self::Interned(interned) => interned.will_break(), // Traverse into the most flat version because the content is guaranteed to expand when even // the most flat version contains some content that forces a break. @@ -370,6 +374,66 @@ pub trait FormatElements { fn end_tag(&self, kind: TagKind) -> Option<&Tag>; } +/// New-type wrapper for a single-line text unicode width. +/// Mainly to prevent access to the inner value. +/// +/// ## Representation +/// +/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`], +/// allowing [`TextWidth`] or [`Option`] fit in 4 bytes rather than 8. +/// +/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1. +/// This imprecision shouldn't matter in practice because either text are longer than any configured line width +/// and thus, the text should break. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Width(NonZeroU32); + +impl Width { + pub(crate) const fn new(width: u32) -> Self { + Width(NonZeroU32::MIN.saturating_add(width)) + } + + pub const fn value(self) -> u32 { + self.0.get() - 1 + } +} + +/// The pre-computed unicode width of a text if it is a single-line text or a marker +/// that it is a multiline text if it contains a line feed. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum TextWidth { + Width(Width), + Multiline, +} + +impl TextWidth { + pub fn from_text(text: &str, indent_width: IndentWidth) -> TextWidth { + let mut width = 0u32; + + for c in text.chars() { + let char_width = match c { + '\t' => indent_width.value() as u32, + '\n' => return TextWidth::Multiline, + c => c.width().unwrap_or(0) as u32, + }; + width += char_width; + } + + Self::Width(Width::new(width)) + } + + pub const fn width(self) -> Option { + match self { + TextWidth::Width(width) => Some(width), + TextWidth::Multiline => None, + } + } + + pub(crate) const fn is_multiline(self) -> bool { + matches!(self, TextWidth::Multiline) + } +} + #[cfg(test)] mod tests { @@ -407,4 +471,4 @@ static_assert!(std::mem::size_of::() == 16usize); #[cfg(not(debug_assertions))] #[cfg(target_pointer_width = "64")] -static_assert!(std::mem::size_of::() == 24usize); +static_assert!(std::mem::size_of::() == 32usize); diff --git a/crates/biome_formatter/src/format_element/document.rs b/crates/biome_formatter/src/format_element/document.rs index 82b6f7b1af0e..bf9748b3f571 100644 --- a/crates/biome_formatter/src/format_element/document.rs +++ b/crates/biome_formatter/src/format_element/document.rs @@ -116,8 +116,8 @@ impl Document { // propagate their expansion. false } - FormatElement::Text { text, .. } => text.contains('\n'), - FormatElement::LocatedTokenText { slice, .. } => slice.contains('\n'), + FormatElement::Text { text_width, .. } => text_width.is_multiline(), + FormatElement::LocatedTokenText { text_width, .. } => text_width.is_multiline(), FormatElement::ExpandParent | FormatElement::Line(LineMode::Hard | LineMode::Empty) => true, FormatElement::Token { .. } => false, @@ -314,21 +314,25 @@ impl Format for &[FormatElement] { FormatElement::Text { text, source_position, + text_width, } => { let text = text.to_string().replace('"', "\\\""); FormatElement::Text { text: text.into(), source_position: *source_position, + text_width: *text_width, } } FormatElement::LocatedTokenText { slice, source_position, + text_width, } => { let text = slice.to_string().replace('"', "\\\""); FormatElement::Text { text: text.into(), source_position: *source_position, + text_width: *text_width, } } _ => unreachable!(), @@ -795,7 +799,7 @@ mod tests { use crate::SimpleFormatContext; use crate::prelude::*; - use crate::{format, format_args, write}; + use crate::{IndentWidth, format, format_args, write}; #[test] fn display_elements() { @@ -927,12 +931,14 @@ mod tests { let token_text = FormatElement::LocatedTokenText { source_position: TextSize::default(), slice: token.token_text(), + text_width: TextWidth::from_text(&token.token_text(), IndentWidth::default()), }; let mut document = Document::from(vec![ FormatElement::Text { text: "\"foo\"".into(), source_position: TextSize::default(), + text_width: TextWidth::from_text("\"foo\"", IndentWidth::default()), }, token_text, ]); diff --git a/crates/biome_formatter/src/lib.rs b/crates/biome_formatter/src/lib.rs index e9bb5df1315e..3479cf840be2 100644 --- a/crates/biome_formatter/src/lib.rs +++ b/crates/biome_formatter/src/lib.rs @@ -72,7 +72,7 @@ pub use buffer::{ VecBuffer, }; pub use builders::BestFitting; -pub use format_element::{FormatElement, LINE_TERMINATORS, normalize_newlines}; +pub use format_element::{FormatElement, LINE_TERMINATORS, TextWidth, normalize_newlines}; pub use group_id::GroupId; pub use source_map::{TransformSourceMap, TransformSourceMapBuilder}; use std::num::ParseIntError; @@ -2057,7 +2057,10 @@ pub fn format_sub_tree( )) } -impl Format for SyntaxTriviaPiece { +impl Format for SyntaxTriviaPiece +where + Context: FormatContext, +{ fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { let range = self.text_range(); diff --git a/crates/biome_formatter/src/printer/mod.rs b/crates/biome_formatter/src/printer/mod.rs index cf1edae4bb56..46284a8ba42c 100644 --- a/crates/biome_formatter/src/printer/mod.rs +++ b/crates/biome_formatter/src/printer/mod.rs @@ -6,7 +6,7 @@ mod stack; pub use printer_options::*; -use crate::format_element::{BestFittingElement, LineMode, PrintMode}; +use crate::format_element::{BestFittingElement, LineMode, PrintMode, TextWidth}; use crate::{ ActualStart, FormatElement, GroupId, IndentStyle, InvalidDocumentError, PrintError, PrintResult, Printed, SourceMarker, TextRange, @@ -100,11 +100,25 @@ impl<'a> Printer<'a> { FormatElement::Text { text, source_position, - } => self.print_text(Text::Text(text), Some(*source_position)), + text_width, + } => self.print_text( + Text::Text { + text, + text_width: *text_width, + }, + Some(*source_position), + ), FormatElement::LocatedTokenText { slice, source_position, - } => self.print_text(Text::Text(slice), Some(*source_position)), + text_width, + } => self.print_text( + Text::Text { + text: slice, + text_width: *text_width, + }, + Some(*source_position), + ), FormatElement::Line(line_mode) => { if args.mode().is_flat() { @@ -372,9 +386,20 @@ impl<'a> Printer<'a> { self.state.has_empty_line = false; } } - Text::Text(text_str) => { - for char in text_str.chars() { - self.print_char(char); + Text::Text { + text: text_str, + text_width, + } => { + if let Some(width) = text_width.width() { + self.state.buffer.push_str(text_str); + self.state.line_width += width.value() as usize; + if !text_str.is_empty() { + self.state.has_empty_line = false; + } + } else { + for char in text_str.chars() { + self.print_char(char); + } } } } @@ -382,7 +407,7 @@ impl<'a> Printer<'a> { if source_position.is_some() { let text_str = match text { Text::Token(s) => s, - Text::Text(s) => s, + Text::Text { text, .. } => text, }; self.state.source_position += text_str.text_len(); } @@ -753,7 +778,10 @@ enum Text<'a> { /// ASCII only text that contains no line breaks or tab characters. Token(&'a str), /// Arbitrary text. May contain `\n` line breaks, tab characters, or unicode characters. - Text(&'a str), + Text { + text: &'a str, + text_width: TextWidth, + }, } #[derive(Copy, Clone, Debug)] @@ -1135,9 +1163,21 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { } FormatElement::Token { text } => return Ok(self.fits_text(Text::Token(text))), - FormatElement::Text { text, .. } => return Ok(self.fits_text(Text::Text(text))), - FormatElement::LocatedTokenText { slice, .. } => { - return Ok(self.fits_text(Text::Text(slice))); + FormatElement::Text { + text, text_width, .. + } => { + return Ok(self.fits_text(Text::Text { + text, + text_width: *text_width, + })); + } + FormatElement::LocatedTokenText { + slice, text_width, .. + } => { + return Ok(self.fits_text(Text::Text { + text: slice, + text_width: *text_width, + })); } FormatElement::LineSuffixBoundary => { @@ -1302,22 +1342,29 @@ impl<'a, 'print> FitsMeasurer<'a, 'print> { Text::Token(token) => { self.state.line_width += token.len(); } - Text::Text(text_str) => { - for c in text_str.chars() { - let char_width = match c { - '\t' => self.options().indent_width.value() as usize, - '\n' => { - return if self.must_be_flat - || self.state.line_width > self.options().print_width.into() - { - Fits::No - } else { - Fits::Yes - }; - } - c => c.width().unwrap_or(0), - }; - self.state.line_width += char_width; + Text::Text { + text: text_str, + text_width, + } => { + if let Some(width) = text_width.width() { + self.state.line_width += width.value() as usize; + } else { + for c in text_str.chars() { + let char_width = match c { + '\t' => self.options().indent_width.value() as usize, + '\n' => { + return if self.must_be_flat + || self.state.line_width > self.options().print_width.into() + { + Fits::No + } else { + Fits::Yes + }; + } + c => c.width().unwrap_or(0), + }; + self.state.line_width += char_width; + } } } } diff --git a/crates/biome_html_formatter/src/utils/children.rs b/crates/biome_html_formatter/src/utils/children.rs index 41c052cd8831..ce1d4e87bf6d 100644 --- a/crates/biome_html_formatter/src/utils/children.rs +++ b/crates/biome_html_formatter/src/utils/children.rs @@ -4,7 +4,8 @@ use std::{ }; use biome_formatter::{ - Buffer, Format, FormatElement, FormatResult, format_args, prelude::*, write, + Buffer, Format, FormatElement, FormatOptions, FormatResult, TextWidth, format_args, prelude::*, + write, }; use biome_html_syntax::{AnyHtmlContent, AnyHtmlElement}; use biome_rowan::{AstNode, SyntaxResult, TextLen, TextRange, TextSize, TokenText}; @@ -66,6 +67,7 @@ impl Format for HtmlWord { f.write_element(FormatElement::LocatedTokenText { source_position: self.source_position, slice: self.text.clone(), + text_width: TextWidth::from_text(&self.text, f.options().indent_width()), }) } } diff --git a/crates/biome_js_formatter/src/utils/jsx.rs b/crates/biome_js_formatter/src/utils/jsx.rs index cae5860a3d3b..bfeec9ed0f50 100644 --- a/crates/biome_js_formatter/src/utils/jsx.rs +++ b/crates/biome_js_formatter/src/utils/jsx.rs @@ -1,6 +1,6 @@ use crate::JsCommentStyle; use crate::prelude::*; -use biome_formatter::{QuoteStyle, comments::CommentStyle, format_args, write}; +use biome_formatter::{FormatOptions, QuoteStyle, TextWidth, comments::CommentStyle, format_args, write}; use biome_js_syntax::{ AnyJsExpression, AnyJsLiteralExpression, AnyJsxChild, AnyJsxTag, JsComputedMemberExpression, JsStaticMemberExpression, JsSyntaxKind, JsxChildList, JsxExpressionChild, JsxTagExpression, @@ -423,6 +423,7 @@ impl Format for JsxWord { f.write_element(FormatElement::LocatedTokenText { source_position: self.source_position, slice: self.text.clone(), + text_width: TextWidth::from_text(&self.text, f.options().indent_width()), }) } } From f018e50f82eafbf785ce69d134cf0d23ed29db85 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 4 Nov 2025 09:14:47 +0000 Subject: [PATCH 2/2] [autofix.ci] apply automated fixes --- crates/biome_js_formatter/src/utils/jsx.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/biome_js_formatter/src/utils/jsx.rs b/crates/biome_js_formatter/src/utils/jsx.rs index bfeec9ed0f50..9f76a9328c20 100644 --- a/crates/biome_js_formatter/src/utils/jsx.rs +++ b/crates/biome_js_formatter/src/utils/jsx.rs @@ -1,6 +1,8 @@ use crate::JsCommentStyle; use crate::prelude::*; -use biome_formatter::{FormatOptions, QuoteStyle, TextWidth, comments::CommentStyle, format_args, write}; +use biome_formatter::{ + FormatOptions, QuoteStyle, TextWidth, comments::CommentStyle, format_args, write, +}; use biome_js_syntax::{ AnyJsExpression, AnyJsLiteralExpression, AnyJsxChild, AnyJsxTag, JsComputedMemberExpression, JsStaticMemberExpression, JsSyntaxKind, JsxChildList, JsxExpressionChild, JsxTagExpression,