diff --git a/crates/oxc_formatter/src/formatter/builders.rs b/crates/oxc_formatter/src/formatter/builders.rs index b91e85672a2bd..fb0ca9d96f4f7 100644 --- a/crates/oxc_formatter/src/formatter/builders.rs +++ b/crates/oxc_formatter/src/formatter/builders.rs @@ -1,4 +1,4 @@ -use std::{backtrace, borrow::Cow, cell::Cell, num::NonZeroU8}; +use std::{backtrace, cell::Cell, num::NonZeroU8}; use Tag::{ EndAlign, EndConditionalContent, EndDedent, EndEntry, EndFill, EndGroup, EndIndent, @@ -302,6 +302,23 @@ pub fn text(text: &str) -> Text<'_> { Text { text, width: None } } +/// Creates a text from a dynamic string and a known width, for example, +/// identifiers or numbers that do not contain line breaks. +pub fn text_with_width(text: &str, width: TextWidth) -> Text<'_> { + if width.is_multiline() { + debug_assert!( + text.as_bytes().iter().any(|&b| matches!(b, b'\n' | b'\t')), + "Text with a known multiline width must contain at least one whitespace character. Found invalid content: '{text}'" + ); + } else { + debug_assert!( + !text.as_bytes().iter().any(|&b| matches!(b, b'\n' | b'\t')), + "Text with a known width must not contain whitespace characters when the width is single line. Found invalid content: '{text}'" + ); + } + Text { text, width: Some(width) } +} + #[derive(Eq, PartialEq)] pub struct Text<'a> { text: &'a str, @@ -325,49 +342,6 @@ impl std::fmt::Debug for Text<'_> { } } -/// String that is the same as in the input source text if `text` is [`Cow::Borrowed`] or -/// some replaced content if `text` is [`Cow::Owned`]. -pub fn syntax_token_cow_slice(text: Cow<'_, str>, span: Span) -> SyntaxTokenCowSlice<'_> { - debug_assert_no_newlines(&text); - SyntaxTokenCowSlice { text, span } -} - -pub struct SyntaxTokenCowSlice<'a> { - text: Cow<'a, str>, - span: Span, -} - -impl<'a> Format<'a> for SyntaxTokenCowSlice<'a> { - fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { - match &self.text { - Cow::Borrowed(content) => { - // let range = TextRange::at(self.start, text.text_len()); - // debug_assert_eq!( - // *text, - // &self.token.token()[range - self.token.text_range().start()], - // "The borrowed string doesn't match the specified token substring. Does the borrowed string belong to this token and range?" - // ); - - // let relative_range = range - self.token.text_range().start(); - // let slice = self.token.token_text().slice(relative_range); - - text(f.source_text().text_for(&self.span)).fmt(f) - } - Cow::Owned(text) => f.write_element(FormatElement::Text { - // TODO: Should use arena String to replace Cow::Owned. - text: f.context().allocator().alloc_str(text), - width: TextWidth::from_text(text, f.options().indent_width), - }), - } - } -} - -impl std::fmt::Debug for SyntaxTokenCowSlice<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::write!(f, "SyntaxTokenCowSlice({})", self.text) - } -} - #[track_caller] fn debug_assert_no_newlines(text: &str) { debug_assert!( diff --git a/crates/oxc_formatter/src/formatter/format_element/mod.rs b/crates/oxc_formatter/src/formatter/format_element/mod.rs index fee46fe37de3b..b3b62291c966f 100644 --- a/crates/oxc_formatter/src/formatter/format_element/mod.rs +++ b/crates/oxc_formatter/src/formatter/format_element/mod.rs @@ -420,6 +420,11 @@ impl TextWidth { Self::Width(Width::new(width)) } + pub fn from_len(len: usize) -> TextWidth { + #[expect(clippy::cast_possible_truncation)] + TextWidth::Width(Width::new(len as u32)) + } + pub const fn width(self) -> Option { match self { TextWidth::Width(width) => Some(width), diff --git a/crates/oxc_formatter/src/formatter/token/number.rs b/crates/oxc_formatter/src/formatter/token/number.rs index a079bbfa189fc..1e6d7f80ed6d2 100644 --- a/crates/oxc_formatter/src/formatter/token/number.rs +++ b/crates/oxc_formatter/src/formatter/token/number.rs @@ -40,7 +40,9 @@ pub struct CleanedNumberLiteralText<'a> { impl<'a> Format<'a> for CleanedNumberLiteralText<'a> { fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { - syntax_token_cow_slice(format_trimmed_number(self.text, self.options), self.span).fmt(f) + let text = format_trimmed_number(self.text, self.options); + let width = TextWidth::from_len(text.len()); + text_with_width(f.context().allocator().alloc_str(&text), width).fmt(f) } } diff --git a/crates/oxc_formatter/src/utils/assignment_like.rs b/crates/oxc_formatter/src/utils/assignment_like.rs index 8e39a9753d671..bd03fa00ca102 100644 --- a/crates/oxc_formatter/src/utils/assignment_like.rs +++ b/crates/oxc_formatter/src/utils/assignment_like.rs @@ -27,7 +27,7 @@ use crate::{ }, }; -use super::string_utils::{FormatLiteralStringToken, StringLiteralParentKind}; +use super::string::{FormatLiteralStringToken, StringLiteralParentKind}; #[derive(Clone, Copy)] pub enum AssignmentLike<'a, 'b> { diff --git a/crates/oxc_formatter/src/utils/mod.rs b/crates/oxc_formatter/src/utils/mod.rs index c9e6433ae1b30..bdcdac7282870 100644 --- a/crates/oxc_formatter/src/utils/mod.rs +++ b/crates/oxc_formatter/src/utils/mod.rs @@ -8,7 +8,7 @@ pub mod jsx; pub mod member_chain; pub mod object; pub mod statement_body; -pub mod string_utils; +pub mod string; pub mod suppressed; pub mod typecast; pub mod typescript; diff --git a/crates/oxc_formatter/src/utils/object.rs b/crates/oxc_formatter/src/utils/object.rs index 56517c87541a1..177bb959ebce0 100644 --- a/crates/oxc_formatter/src/utils/object.rs +++ b/crates/oxc_formatter/src/utils/object.rs @@ -6,7 +6,7 @@ use crate::{ Buffer, Format, FormatResult, ast_nodes::{AstNode, AstNodes}, formatter::Formatter, - utils::string_utils::{FormatLiteralStringToken, StringLiteralParentKind}, + utils::string::{FormatLiteralStringToken, StringLiteralParentKind}, write, }; diff --git a/crates/oxc_formatter/src/utils/string_utils.rs b/crates/oxc_formatter/src/utils/string.rs similarity index 63% rename from crates/oxc_formatter/src/utils/string_utils.rs rename to crates/oxc_formatter/src/utils/string.rs index 56a76531e0c3f..b39b88728159b 100644 --- a/crates/oxc_formatter/src/utils/string_utils.rs +++ b/crates/oxc_formatter/src/utils/string.rs @@ -1,13 +1,12 @@ use std::borrow::Cow; -use unicode_width::UnicodeWidthStr; - use oxc_span::{SourceType, Span}; use oxc_syntax::identifier::is_identifier_name; +use unicode_width::UnicodeWidthStr; use crate::{ FormatOptions, QuoteProperties, QuoteStyle, - formatter::{Format, FormatResult, Formatter, prelude::*, token::string::normalize_string}, + formatter::{Format, FormatResult, Formatter, prelude::*}, }; #[derive(Eq, PartialEq, Debug, Clone, Copy)] @@ -57,42 +56,32 @@ impl<'a> FormatLiteralStringToken<'a> { let chosen_quote_properties = options.quote_properties; let mut string_cleaner = - LiteralStringNormaliser::new(*self, chosen_quote_style, chosen_quote_properties); + LiteralStringNormalizer::new(*self, chosen_quote_style, chosen_quote_properties); - let content = string_cleaner.normalise_text(source_type); - let normalized_text_width = content.width(); + let content = string_cleaner.normalize_text(source_type); - CleanedStringLiteralText { - string: self.string, - text: content, - span: self.span, - width: normalized_text_width, - } + CleanedStringLiteralText { string: self.string, text: content } + } + + fn raw_content(&self) -> &'a str { + &self.string[1..self.string.len() - 1] } } pub struct CleanedStringLiteralText<'a> { string: &'a str, text: Cow<'a, str>, - span: Span, - width: usize, } impl CleanedStringLiteralText<'_> { pub fn width(&self) -> usize { - self.width + self.text.width() } } impl<'a> Format<'a> for CleanedStringLiteralText<'a> { fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { - syntax_token_cow_slice(self.text.clone(), self.span).fmt(f) - } -} - -impl<'a> Format<'a> for FormatLiteralStringToken<'a> { - fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { - self.clean_text(f.context().source_type(), f.options()).fmt(f) + text(f.context().allocator().alloc_str(&self.text)).fmt(f) } } @@ -187,8 +176,8 @@ impl FormatLiteralStringToken<'_> { } /// Struct of convenience used to manipulate the string. It saves some state in order to apply -/// the normalise process. -struct LiteralStringNormaliser<'a> { +/// the normalize process. +struct LiteralStringNormalizer<'a> { /// The current token token: FormatLiteralStringToken<'a>, /// The quote that was set inside the configuration @@ -197,7 +186,7 @@ struct LiteralStringNormaliser<'a> { chosen_quote_properties: QuoteProperties, } -impl<'a> LiteralStringNormaliser<'a> { +impl<'a> LiteralStringNormalizer<'a> { pub fn new( token: FormatLiteralStringToken<'a>, chosen_quote_style: QuoteStyle, @@ -206,17 +195,17 @@ impl<'a> LiteralStringNormaliser<'a> { Self { token, chosen_quote_style, chosen_quote_properties } } - fn normalise_text(&mut self, source_type: SourceType) -> Cow<'a, str> { + fn normalize_text(&mut self, source_type: SourceType) -> Cow<'a, str> { let str_info = self.token.compute_string_information(self.chosen_quote_style); match self.token.parent_kind { - StringLiteralParentKind::Expression => self.normalise_string_literal(str_info), - StringLiteralParentKind::Directive => self.normalise_directive(str_info), - StringLiteralParentKind::ImportAttribute => self.normalise_import_attribute(str_info), - StringLiteralParentKind::Member => self.normalise_type_member(str_info, source_type), + StringLiteralParentKind::Expression => self.normalize_string_literal(str_info), + StringLiteralParentKind::Directive => self.normalize_directive(str_info), + StringLiteralParentKind::ImportAttribute => self.normalize_import_attribute(str_info), + StringLiteralParentKind::Member => self.normalize_type_member(str_info, source_type), } } - fn normalise_import_attribute( + fn normalize_import_attribute( &mut self, string_information: StringInformation, ) -> Cow<'a, str> { @@ -226,16 +215,16 @@ impl<'a> LiteralStringNormaliser<'a> { if can_remove_quotes { Cow::Owned(quoteless.to_string()) } else { - self.normalise_string_literal(string_information) + self.normalize_string_literal(string_information) } } - fn normalise_directive(&mut self, string_information: StringInformation) -> Cow<'a, str> { + fn normalize_directive(&mut self, string_information: StringInformation) -> Cow<'a, str> { // In diretcives, unnecessary escapes should be preserved. // See https://github.com/prettier/prettier/issues/1555 - // Thus we don't normalise the string. + // Thus we don't normalize the string. // - // Since the string is not normalised, we should not change the quotes, + // Since the string is not normalized, we should not change the quotes, // if the directive contains some quotes. // // Note that we could change the quotes if the preferred quote is escaped. @@ -271,7 +260,7 @@ impl<'a> LiteralStringNormaliser<'a> { false } - fn normalise_type_member( + fn normalize_type_member( &mut self, string_information: StringInformation, source_type: SourceType, @@ -283,15 +272,15 @@ impl<'a> LiteralStringNormaliser<'a> { if can_remove_quotes { Cow::Owned(quoteless.to_string()) } else { - self.normalise_string_literal(string_information) + self.normalize_string_literal(string_information) } } - fn normalise_string_literal(&self, string_information: StringInformation) -> Cow<'a, str> { + fn normalize_string_literal(&self, string_information: StringInformation) -> Cow<'a, str> { let preferred_quote = string_information.preferred_quote; let polished_raw_content = normalize_string( self.raw_content(), - string_information.preferred_quote.into(), + string_information.preferred_quote, string_information.current_quote != string_information.preferred_quote, ); @@ -324,3 +313,119 @@ impl<'a> LiteralStringNormaliser<'a> { } } } + +impl<'a> Format<'a> for FormatLiteralStringToken<'a> { + fn fmt(&self, f: &mut Formatter<'_, 'a>) -> FormatResult<()> { + self.clean_text(f.context().source_type(), f.options()).fmt(f) + } +} + +/// This function is responsible of: +/// +/// - escaping `preferred_quote` +/// - unescape alternate quotes of `preferred_quote` if `quotes_will_change` +/// - normalize the new lines by replacing `\r\n` with `\n`. +/// +/// The function allocates a new string only if at least one change is performed. +/// +/// In the following example `"` is escaped and the newline is normalized. +/// +/// ``` +/// use biome_formatter::token::string::{normalize_string, Quote}; +/// assert_eq!( +/// normalize_string(" \"He\\llo\\tworld\" \\' \\' \r\n ", Quote::Double, true), +/// " \\\"He\\llo\\tworld\\\" ' ' \n ", +/// ); +/// ``` +pub fn normalize_string( + raw_content: &str, + preferred_quote: QuoteStyle, + quotes_will_change: bool, +) -> Cow<'_, str> { + let alternate_quote = preferred_quote.other().as_byte(); + let preferred_quote = preferred_quote.as_byte(); + let mut reduced_string = String::new(); + let mut copy_start = 0; + let mut bytes = raw_content.bytes().enumerate(); + while let Some((byte_index, byte)) = bytes.next() { + match byte { + // If the next character is escaped + b'\\' => { + if let Some((escaped_index, escaped)) = bytes.next() { + if escaped == b'\r' { + // If we encounter the sequence "\r\n", then skip '\r' + if let Some((next_byte_index, b'\n')) = bytes.next() { + reduced_string.push_str(&raw_content[copy_start..escaped_index]); + copy_start = next_byte_index; + } + } else if quotes_will_change && escaped == alternate_quote { + // Unescape alternate quotes if quotes are changing + reduced_string.push_str(&raw_content[copy_start..byte_index]); + copy_start = escaped_index; + } + } + } + // If we encounter the sequence "\r\n", then skip '\r' + b'\r' => { + if let Some((next_byte_index, b'\n')) = bytes.next() { + reduced_string.push_str(&raw_content[copy_start..byte_index]); + copy_start = next_byte_index; + } + } + _ => { + // If we encounter a preferred quote and it's not escaped, we have to replace it with + // an escaped version. + // This is done because of how the enclosed strings can change. + // Check `computed_preferred_quote` for more details. + if byte == preferred_quote { + reduced_string.push_str(&raw_content[copy_start..byte_index]); + reduced_string.push('\\'); + copy_start = byte_index; + } + } + } + } + if copy_start == 0 && reduced_string.is_empty() { + Cow::Borrowed(raw_content) + } else { + // Copy the remaining characters + reduced_string.push_str(&raw_content[copy_start..]); + Cow::Owned(reduced_string) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_newline() { + assert_eq!(normalize_string("a\nb", QuoteStyle::Double, true), "a\nb"); + assert_eq!(normalize_string("a\r\nb", QuoteStyle::Double, true), "a\nb"); + assert_eq!(normalize_string("a\\\r\nb", QuoteStyle::Double, true), "a\\\nb"); + } + + #[test] + fn normalize_escapes() { + assert_eq!(normalize_string("\\", QuoteStyle::Double, true), "\\"); + assert_eq!(normalize_string("\\t", QuoteStyle::Double, true), "\\t"); + assert_eq!(normalize_string("\\\u{2028}", QuoteStyle::Double, true), "\\\u{2028}"); + assert_eq!(normalize_string("\\\u{2029}", QuoteStyle::Double, true), "\\\u{2029}"); + + assert_eq!(normalize_string(r"a\a", QuoteStyle::Double, true), r"a\a"); + assert_eq!(normalize_string(r"👍\👍", QuoteStyle::Single, true), r"👍\👍"); + assert_eq!(normalize_string("\\\u{2027}", QuoteStyle::Double, true), "\\\u{2027}"); + assert_eq!(normalize_string("\\\u{2030}", QuoteStyle::Double, true), "\\\u{2030}"); + } + + #[test] + fn normalize_quotes() { + assert_eq!(normalize_string("\"", QuoteStyle::Double, true), "\\\""); + assert_eq!(normalize_string(r"\'", QuoteStyle::Double, true), r"'"); + + assert_eq!(normalize_string(r"\'", QuoteStyle::Double, false), r"\'"); + assert_eq!(normalize_string("\"", QuoteStyle::Single, false), "\""); + assert_eq!(normalize_string("\\'", QuoteStyle::Single, false), "\\'"); + assert_eq!(normalize_string("\\\"", QuoteStyle::Single, false), "\\\""); + } +} diff --git a/crates/oxc_formatter/src/write/mod.rs b/crates/oxc_formatter/src/write/mod.rs index 9e603aa12766d..eb32aa2dddb2b 100644 --- a/crates/oxc_formatter/src/write/mod.rs +++ b/crates/oxc_formatter/src/write/mod.rs @@ -73,7 +73,7 @@ use crate::{ member_chain::MemberChain, object::format_property_key, statement_body::FormatStatementBody, - string_utils::{FormatLiteralStringToken, StringLiteralParentKind}, + string::{FormatLiteralStringToken, StringLiteralParentKind}, suppressed::FormatSuppressedNode, }, write, diff --git a/crates/oxc_formatter/src/write/program.rs b/crates/oxc_formatter/src/write/program.rs index 1ee3e222baa62..5920331cd8db7 100644 --- a/crates/oxc_formatter/src/write/program.rs +++ b/crates/oxc_formatter/src/write/program.rs @@ -14,7 +14,7 @@ use crate::{ call_expression::is_test_call_expression, is_long_curried_call, member_chain::simple_argument::SimpleArgument, - string_utils::{FormatLiteralStringToken, StringLiteralParentKind}, + string::{FormatLiteralStringToken, StringLiteralParentKind}, }, write, write::semicolon::OptionalSemicolon,