Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changeset/long-cars-beam.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@biomejs/biome": patch
---

Introduces a new TextWidth field on all text FormatElements that stores either the precomputed width of the text or
indicates that it is multiline.
24 changes: 20 additions & 4 deletions crates/biome_formatter/src/builders.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::format_element::tag::{Condition, Tag};
use crate::prelude::tag::{DedentMode, GroupMode, LabelId};
use crate::prelude::*;
use crate::{Argument, Arguments, GroupId, TextRange, TextSize, format_element, write};
use crate::{
Argument, Arguments, FormatContext, FormatOptions, GroupId, TextRange, TextSize,
format_element, write,
};
use crate::{Buffer, VecBuffer};
use Tag::*;
use biome_rowan::{Language, SyntaxNode, SyntaxToken, TextLen, TokenText};
Expand Down Expand Up @@ -288,11 +291,15 @@ pub struct Text<'a> {
position: TextSize,
}

impl<Context> Format<Context> for Text<'_> {
impl<Context> Format<Context> for Text<'_>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
f.write_element(FormatElement::Text {
text: self.text.to_string().into_boxed_str(),
source_position: self.position,
text_width: TextWidth::from_text(self.text, f.options().indent_width()),
})
}
}
Expand Down Expand Up @@ -321,7 +328,10 @@ pub struct SyntaxTokenCowSlice<'a, L: Language> {
start: TextSize,
}

impl<L: Language, Context> Format<Context> for SyntaxTokenCowSlice<'_, L> {
impl<L: Language, Context> Format<Context> for SyntaxTokenCowSlice<'_, L>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
match &self.text {
Cow::Borrowed(text) => {
Expand All @@ -338,11 +348,13 @@ impl<L: Language, Context> Format<Context> for SyntaxTokenCowSlice<'_, L> {
f.write_element(FormatElement::LocatedTokenText {
slice,
source_position: self.start,
text_width: TextWidth::from_text(text, f.options().indent_width()),
})
}
Cow::Owned(text) => f.write_element(FormatElement::Text {
text: text.clone().into_boxed_str(),
source_position: self.start,
text_width: TextWidth::from_text(text, f.options().indent_width()),
}),
}
}
Expand Down Expand Up @@ -375,11 +387,15 @@ pub struct LocatedTokenText {
source_position: TextSize,
}

impl<Context> Format<Context> for LocatedTokenText {
impl<Context> Format<Context> for LocatedTokenText
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
f.write_element(FormatElement::LocatedTokenText {
slice: self.text.clone(),
source_position: self.source_position,
text_width: TextWidth::from_text(&self.text, f.options().indent_width()),
})
}
}
Expand Down
72 changes: 68 additions & 4 deletions crates/biome_formatter/src/format_element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ pub mod tag;
use crate::format_element::tag::{LabelId, Tag};
use std::borrow::Cow;

use crate::{TagKind, TextSize};
use crate::{IndentWidth, TagKind, TextSize};
use biome_rowan::TokenText;
#[cfg(target_pointer_width = "64")]
use biome_rowan::static_assert;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::rc::Rc;
use unicode_width::UnicodeWidthChar;

/// Language agnostic IR for formatting source code.
///
Expand All @@ -37,6 +39,7 @@ pub enum FormatElement {
text: Box<str>,
/// The start position of the text in the unformatted source code
source_position: TextSize,
text_width: TextWidth,
},

/// A token for a text that is taken as is from the source code (input text and formatted representation are identical).
Expand All @@ -46,6 +49,7 @@ pub enum FormatElement {
source_position: TextSize,
/// The token text
slice: TokenText,
text_width: TextWidth,
},

/// Prevents that line suffixes move past this boundary. Forces the printer to print any pending
Expand Down Expand Up @@ -238,8 +242,8 @@ impl FormatElements for FormatElement {
Self::Tag(Tag::StartGroup(group)) => !group.mode().is_flat(),
Self::Line(line_mode) => matches!(line_mode, LineMode::Hard | LineMode::Empty),

Self::Text { text, .. } => text.contains('\n'),
Self::LocatedTokenText { slice, .. } => slice.contains('\n'),
Self::Text { text_width, .. } => text_width.is_multiline(),
Self::LocatedTokenText { text_width, .. } => text_width.is_multiline(),
Self::Interned(interned) => interned.will_break(),
// Traverse into the most flat version because the content is guaranteed to expand when even
// the most flat version contains some content that forces a break.
Expand Down Expand Up @@ -370,6 +374,66 @@ pub trait FormatElements {
fn end_tag(&self, kind: TagKind) -> Option<&Tag>;
}

/// New-type wrapper for a single-line text unicode width.
/// Mainly to prevent access to the inner value.
///
/// ## Representation
///
/// Represents the width by adding 1 to the actual width so that the width can be represented by a [`NonZeroU32`],
/// allowing [`TextWidth`] or [`Option<Width>`] fit in 4 bytes rather than 8.
///
/// This means that 2^32 can not be precisely represented and instead has the same value as 2^32-1.
/// This imprecision shouldn't matter in practice because either text are longer than any configured line width
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is true only for text that is actually formatted by us. This means that this isn't applicable in cases where we format in verbatim mode. Can you update the comment to include this?

/// and thus, the text should break.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Width(NonZeroU32);

impl Width {
pub(crate) const fn new(width: u32) -> Self {
Width(NonZeroU32::MIN.saturating_add(width))
}

pub const fn value(self) -> u32 {
self.0.get() - 1
}
}

/// The pre-computed unicode width of a text if it is a single-line text or a marker
/// that it is a multiline text if it contains a line feed.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum TextWidth {
Width(Width),
Multiline,
}

impl TextWidth {
pub fn from_text(text: &str, indent_width: IndentWidth) -> TextWidth {
let mut width = 0u32;

for c in text.chars() {
let char_width = match c {
'\t' => indent_width.value() as u32,
'\n' => return TextWidth::Multiline,
c => c.width().unwrap_or(0) as u32,
};
width += char_width;
}
Comment on lines +413 to +420
Copy link
Contributor

@coderabbitai coderabbitai bot Nov 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Prevent overflow while tallying TextWidth.

width is a plain u32. A sufficiently long token (think multi‑gigabyte minified files or wide CJK text) will make width += char_width overflow: debug builds panic, release builds wrap to zero, and we then under-report the width so groups stay flat when they should break. Please make the accumulation saturating.

Apply this diff to stay safe:

-            width += char_width;
+            width = width.saturating_add(char_width);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for c in text.chars() {
let char_width = match c {
'\t' => indent_width.value() as u32,
'\n' => return TextWidth::Multiline,
c => c.width().unwrap_or(0) as u32,
};
width += char_width;
}
for c in text.chars() {
let char_width = match c {
'\t' => indent_width.value() as u32,
'\n' => return TextWidth::Multiline,
c => c.width().unwrap_or(0) as u32,
};
width = width.saturating_add(char_width);
}
🤖 Prompt for AI Agents
In crates/biome_formatter/src/format_element.rs around lines 413 to 420, the
loop adds char widths into a plain u32 `width`, which can overflow for very long
or wide tokens; replace the non-saturating addition with a saturating
accumulation (e.g., use width = width.saturating_add(char_width) or equivalent)
so the total never wraps—keep the rest of the match logic unchanged and ensure
types remain u32.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@coderabbitai Biome trees work on u32 constraint. Do you think it's safe to assume we don't need the saturing_add?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ematipico You're right that the u32 tree constraint provides a practical upper bound—most text width will track close to byte size. The main edge case would be a massive file full of tabs with a large indent_width, but that's admittedly theoretical.

Since Width::new already saturates (NonZeroU32::MIN.saturating_add(width)), adding saturating_add here is pure defense in depth. The performance cost is negligible, but if you're confident the constraint holds in practice, plain += is fine. Your call!


🧠 Learnings used
Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/src/lib.rs : Implement FormatLanguage for HtmlFormatLanguage with associated types: SyntaxLanguage=HtmlLanguage, Context=HtmlFormatContext, FormatRule=FormatHtmlSyntaxNode

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/src/cst.rs : Create FormatHtmlSyntaxNode in cst.rs implementing FormatRule<HtmlSyntaxNode> and AsFormat/IntoFormat for HtmlSyntaxNode using the provided plumbing

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/src/lib.rs : Define the HtmlFormatter type alias: `type HtmlFormatter<'buf> = Formatter<'buf, HtmlFormatContext>;`

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/src/context.rs : Create HtmlFormatContext in context.rs with comments and source_map fields and implement FormatContext and CstFormatContext

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/src/comments.rs : Define HtmlCommentStyle implementing CommentStyle in comments.rs

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_js_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:46.002Z
Learning: Applies to crates/biome_js_formatter/**/*.rs : Import and use the `FormatNode` trait for AST nodes

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:15.851Z
Learning: Applies to crates/biome_formatter/tests/language.rs : Create tests/language.rs defining `HtmlTestFormatLanguage` and implement the TestFormatLanguage trait

Learnt from: CR
Repo: biomejs/biome PR: 0
File: crates/biome_js_formatter/CONTRIBUTING.md:0-0
Timestamp: 2025-10-15T09:22:46.002Z
Learning: Applies to crates/biome_js_formatter/**/*.rs : Do not attempt to fix code; if a mandatory token/node is missing, return `None` instead

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@denbezrukov can you check this comment?


Self::Width(Width::new(width))
}

pub const fn width(self) -> Option<Width> {
match self {
TextWidth::Width(width) => Some(width),
TextWidth::Multiline => None,
}
}

pub(crate) const fn is_multiline(self) -> bool {
matches!(self, TextWidth::Multiline)
}
}

#[cfg(test)]
mod tests {

Expand Down Expand Up @@ -407,4 +471,4 @@ static_assert!(std::mem::size_of::<crate::format_element::Tag>() == 16usize);

#[cfg(not(debug_assertions))]
#[cfg(target_pointer_width = "64")]
static_assert!(std::mem::size_of::<crate::FormatElement>() == 24usize);
static_assert!(std::mem::size_of::<crate::FormatElement>() == 32usize);
12 changes: 9 additions & 3 deletions crates/biome_formatter/src/format_element/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ impl Document {
// propagate their expansion.
false
}
FormatElement::Text { text, .. } => text.contains('\n'),
FormatElement::LocatedTokenText { slice, .. } => slice.contains('\n'),
FormatElement::Text { text_width, .. } => text_width.is_multiline(),
FormatElement::LocatedTokenText { text_width, .. } => text_width.is_multiline(),
FormatElement::ExpandParent
| FormatElement::Line(LineMode::Hard | LineMode::Empty) => true,
FormatElement::Token { .. } => false,
Expand Down Expand Up @@ -314,21 +314,25 @@ impl Format<IrFormatContext> for &[FormatElement] {
FormatElement::Text {
text,
source_position,
text_width,
} => {
let text = text.to_string().replace('"', "\\\"");
FormatElement::Text {
text: text.into(),
source_position: *source_position,
text_width: *text_width,
}
}
FormatElement::LocatedTokenText {
slice,
source_position,
text_width,
} => {
let text = slice.to_string().replace('"', "\\\"");
FormatElement::Text {
text: text.into(),
source_position: *source_position,
text_width: *text_width,
}
}
_ => unreachable!(),
Expand Down Expand Up @@ -795,7 +799,7 @@ mod tests {

use crate::SimpleFormatContext;
use crate::prelude::*;
use crate::{format, format_args, write};
use crate::{IndentWidth, format, format_args, write};

#[test]
fn display_elements() {
Expand Down Expand Up @@ -927,12 +931,14 @@ mod tests {
let token_text = FormatElement::LocatedTokenText {
source_position: TextSize::default(),
slice: token.token_text(),
text_width: TextWidth::from_text(&token.token_text(), IndentWidth::default()),
};

let mut document = Document::from(vec![
FormatElement::Text {
text: "\"foo\"".into(),
source_position: TextSize::default(),
text_width: TextWidth::from_text("\"foo\"", IndentWidth::default()),
},
token_text,
]);
Expand Down
7 changes: 5 additions & 2 deletions crates/biome_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub use buffer::{
VecBuffer,
};
pub use builders::BestFitting;
pub use format_element::{FormatElement, LINE_TERMINATORS, normalize_newlines};
pub use format_element::{FormatElement, LINE_TERMINATORS, TextWidth, normalize_newlines};
pub use group_id::GroupId;
pub use source_map::{TransformSourceMap, TransformSourceMapBuilder};
use std::num::ParseIntError;
Expand Down Expand Up @@ -2057,7 +2057,10 @@ pub fn format_sub_tree<L: FormatLanguage>(
))
}

impl<L: Language, Context> Format<Context> for SyntaxTriviaPiece<L> {
impl<L: Language, Context> Format<Context> for SyntaxTriviaPiece<L>
where
Context: FormatContext,
{
fn fmt(&self, f: &mut Formatter<Context>) -> FormatResult<()> {
let range = self.text_range();

Expand Down
Loading
Loading