diff --git a/crates/biome_html_formatter/src/html/lists/element_list.rs b/crates/biome_html_formatter/src/html/lists/element_list.rs index d687a63417d..0abdc7e7a0a 100644 --- a/crates/biome_html_formatter/src/html/lists/element_list.rs +++ b/crates/biome_html_formatter/src/html/lists/element_list.rs @@ -1,10 +1,683 @@ -use crate::prelude::*; -use biome_html_syntax::HtmlElementList; +//! This implementation is very heavily inspired by the JSX formatter implementation for JsxChildList. + +use std::cell::RefCell; + +use crate::{ + prelude::*, + util::children::{ + html_split_children, is_meaningful_html_text, HtmlChild, HtmlChildrenIterator, + HtmlRawSpace, HtmlSpace, + }, +}; +use biome_formatter::{best_fitting, prelude::*}; +use biome_formatter::{format_args, write, VecBuffer}; +use biome_html_syntax::{AnyHtmlElement, HtmlElementList}; +use tag::GroupMode; #[derive(Debug, Clone, Default)] -pub(crate) struct FormatHtmlElementList; +pub(crate) struct FormatHtmlElementList { + layout: HtmlChildListLayout, +} impl FormatRule for FormatHtmlElementList { type Context = HtmlFormatContext; fn fmt(&self, node: &HtmlElementList, f: &mut HtmlFormatter) -> FormatResult<()> { - f.join().entries(node.iter().formatted()).finish() + let result = self.fmt_children(node, f)?; + match result { + FormatChildrenResult::ForceMultiline(format_multiline) => { + write!(f, [format_multiline]) + } + FormatChildrenResult::BestFitting { + flat_children, + expanded_children, + } => { + write!(f, [best_fitting![flat_children, expanded_children]]) + } + } + } +} + +#[derive(Debug)] +pub(crate) enum FormatChildrenResult { + ForceMultiline(FormatMultilineChildren), + BestFitting { + flat_children: FormatFlatChildren, + expanded_children: FormatMultilineChildren, + }, +} + +impl FormatHtmlElementList { + pub(crate) fn fmt_children( + &self, + list: &HtmlElementList, + f: &mut HtmlFormatter, + ) -> FormatResult { + let children_meta = self.children_meta(list); + let layout = self.layout(children_meta); + + let multiline_layout = if children_meta.meaningful_text { + MultilineLayout::Fill + } else { + MultilineLayout::NoFill + }; + + let mut flat = FlatBuilder::new(); + let mut multiline = MultilineBuilder::new(multiline_layout); + + let mut force_multiline = layout.is_multiline(); + + let mut children = html_split_children(list.iter())?; + + // Trim trailing new lines + if let Some(HtmlChild::EmptyLine | HtmlChild::Newline) = children.last() { + children.pop(); + } + + let mut last: Option<&HtmlChild> = None; + let mut children_iter = HtmlChildrenIterator::new(children.iter()); + + // Trim leading new lines + if let Some(HtmlChild::Newline | HtmlChild::EmptyLine) = children_iter.peek() { + children_iter.next(); + } + + while let Some(child) = children_iter.next() { + let mut child_breaks = false; + + match &child { + // A single word: Both `a` and `b` are a word in `a b` because they're separated by HTML Whitespace. + HtmlChild::Word(word) => { + let separator = match children_iter.peek() { + Some(HtmlChild::Word(_)) => { + // Separate words by a space or line break in extended mode + Some(WordSeparator::BetweenWords) + } + + // Last word or last word before an element without any whitespace in between + Some(HtmlChild::NonText(next_child)) => Some(WordSeparator::EndOfText { + is_soft_line_break: !matches!( + next_child, + AnyHtmlElement::HtmlSelfClosingElement(_) + ) || word.is_single_character(), + }), + + Some(HtmlChild::Newline | HtmlChild::Whitespace | HtmlChild::EmptyLine) => { + None + } + + None => None, + }; + + child_breaks = separator.map_or(false, |separator| separator.will_break()); + + flat.write(&format_args![word, separator], f); + + if let Some(separator) = separator { + multiline.write_with_separator(word, &separator, f); + } else { + // it's safe to write without a separator because None means that next element is a separator or end of the iterator + multiline.write_content(word, f); + } + } + + // * Whitespace after the opening tag and before a meaningful text: `
a` + // * Whitespace before the closing tag: `a
` + // * Whitespace before an opening tag: `a
` + HtmlChild::Whitespace => { + flat.write(&HtmlSpace, f); + + // ```javascript + //
a + // {' '}
+ // ``` + let is_after_line_break = + last.as_ref().map_or(false, |last| last.is_any_line()); + + // `
aaa
` or `
` + let is_trailing_or_only_whitespace = children_iter.peek().is_none(); + + if is_trailing_or_only_whitespace || is_after_line_break { + multiline.write_separator(&HtmlRawSpace, f); + } + // Leading whitespace. Only possible if used together with a expression child + // + // ``` + //
+ // + // {' '} + // + //
+ // ``` + else if last.is_none() { + multiline.write_with_separator(&HtmlRawSpace, &hard_line_break(), f); + } else { + multiline.write_separator(&HtmlSpace, f); + } + } + + // A new line between some JSX text and an element + HtmlChild::Newline => { + let is_soft_break = { + // Here we handle the case when we have a newline between a single-character word and a jsx element + // We need to use the previous and the next element + // [HtmlChild::Word, HtmlChild::Newline, HtmlChild::NonText] + // ``` + //
+ //
First
, + //
Second
+ //
+ // ``` + if let Some(HtmlChild::Word(word)) = last { + let is_next_element_self_closing = matches!( + children_iter.peek(), + Some(HtmlChild::NonText(AnyHtmlElement::HtmlSelfClosingElement( + _ + ))) + ); + !is_next_element_self_closing && word.is_single_character() + } + // Here we handle the case when we have a single-character word between a new line and a jsx element + // Here we need to look ahead two elements + // [HtmlChild::Newline, HtmlChild::Word, HtmlChild::NonText] + // ``` + //
+ //
First
+ // ,
Second
+ //
+ // ``` + else if let Some(HtmlChild::Word(next_word)) = children_iter.peek() { + let next_next_element = children_iter.peek_next(); + let is_next_next_element_new_line = + matches!(next_next_element, Some(HtmlChild::Newline)); + let is_next_next_element_self_closing = matches!( + next_next_element, + Some(HtmlChild::NonText(AnyHtmlElement::HtmlSelfClosingElement( + _ + ))) + ); + let has_new_line_and_self_closing = is_next_next_element_new_line + && matches!( + children_iter.peek_next_next(), + Some(HtmlChild::NonText( + AnyHtmlElement::HtmlSelfClosingElement(_) + )) + ); + + !has_new_line_and_self_closing + && !is_next_next_element_self_closing + && next_word.is_single_character() + } else { + false + } + }; + + if is_soft_break { + multiline.write_separator(&soft_line_break(), f); + } else { + child_breaks = true; + multiline.write_separator(&hard_line_break(), f); + } + } + + // An empty line between some JSX text and an element + HtmlChild::EmptyLine => { + child_breaks = true; + + // Additional empty lines are not preserved when any of + // the children are a meaningful text node. + // + // <> + //
First
+ // + //
Second
+ // + // Third + // + // + // Becomes: + // + // <> + //
First
+ //
Second
+ // Third + // + if children_meta.meaningful_text { + multiline.write_separator(&hard_line_break(), f); + } else { + multiline.write_separator(&empty_line(), f); + } + } + + // Any child that isn't text + HtmlChild::NonText(non_text) => { + let line_mode = match children_iter.peek() { + Some(HtmlChild::Word(word)) => { + // Break if the current or next element is a self closing element + // ```javascript + //
adefg
+                            // ```
+                            // Becomes
+                            // ```javascript
+                            // 
+                            // adefg
+                            // ```
+                            if matches!(non_text, AnyHtmlElement::HtmlSelfClosingElement(_))
+                                && !word.is_single_character()
+                            {
+                                Some(LineMode::Hard)
+                            } else {
+                                Some(LineMode::Soft)
+                            }
+                        }
+
+                        // Add a hard line break if what comes after the element is not a text or is all whitespace
+                        Some(HtmlChild::NonText(_)) => Some(LineMode::Hard),
+
+                        Some(HtmlChild::Newline | HtmlChild::Whitespace | HtmlChild::EmptyLine) => {
+                            None
+                        }
+                        // Don't insert trailing line breaks
+                        None => None,
+                    };
+
+                    child_breaks = line_mode.map_or(false, |mode| mode.is_hard());
+
+                    let format_separator = line_mode.map(|mode| {
+                        format_with(move |f| f.write_element(FormatElement::Line(mode)))
+                    });
+
+                    if force_multiline {
+                        if let Some(format_separator) = format_separator {
+                            multiline.write_with_separator(
+                                &non_text.format(),
+                                &format_separator,
+                                f,
+                            );
+                        } else {
+                            // it's safe to write without a separator because None means that next element is a separator or end of the iterator
+                            multiline.write_content(&non_text.format(), f);
+                        }
+                    } else {
+                        let mut memoized = non_text.format().memoized();
+
+                        force_multiline = memoized.inspect(f)?.will_break();
+                        flat.write(&format_args![memoized, format_separator], f);
+
+                        if let Some(format_separator) = format_separator {
+                            multiline.write_with_separator(&memoized, &format_separator, f);
+                        } else {
+                            // it's safe to write without a separator because None means that next element is a separator or end of the iterator
+                            multiline.write_content(&memoized, f);
+                        }
+                    }
+                }
+            }
+
+            if child_breaks {
+                flat.disable();
+                force_multiline = true;
+            }
+
+            last = Some(child);
+        }
+
+        if force_multiline {
+            Ok(FormatChildrenResult::ForceMultiline(multiline.finish()?))
+        } else {
+            Ok(FormatChildrenResult::BestFitting {
+                flat_children: flat.finish()?,
+                expanded_children: multiline.finish()?,
+            })
+        }
+    }
+
+    fn layout(&self, meta: ChildrenMeta) -> HtmlChildListLayout {
+        match self.layout {
+            HtmlChildListLayout::BestFitting => {
+                if meta.any_tag || meta.multiple_expressions {
+                    HtmlChildListLayout::Multiline
+                } else {
+                    HtmlChildListLayout::BestFitting
+                }
+            }
+            HtmlChildListLayout::Multiline => HtmlChildListLayout::Multiline,
+        }
+    }
+
+    /// Computes additional meta data about the children by iterating once over all children.
+    fn children_meta(&self, list: &HtmlElementList) -> ChildrenMeta {
+        let mut meta = ChildrenMeta::default();
+
+        for child in list {
+            use AnyHtmlElement::*;
+
+            match child {
+                HtmlElement(_) | HtmlSelfClosingElement(_) => meta.any_tag = true,
+                HtmlContent(text) => {
+                    meta.meaningful_text = meta.meaningful_text
+                        || text
+                            .value_token()
+                            .map_or(false, |token| is_meaningful_html_text(token.text()));
+                }
+                _ => {}
+            }
+        }
+
+        meta
+    }
+}
+
+#[derive(Debug, Default, Copy, Clone)]
+pub enum HtmlChildListLayout {
+    /// Prefers to format the children on a single line if possible.
+    #[default]
+    BestFitting,
+
+    /// Forces the children to be formatted over multiple lines
+    Multiline,
+}
+
+impl HtmlChildListLayout {
+    const fn is_multiline(&self) -> bool {
+        matches!(self, HtmlChildListLayout::Multiline)
+    }
+}
+
+#[derive(Copy, Clone, Debug, Default)]
+struct ChildrenMeta {
+    /// `true` if children contains a [HtmlElement] or [HtmlFragment]
+    any_tag: bool,
+
+    /// `true` if children contains more than one [HtmlExpressionChild]
+    multiple_expressions: bool,
+
+    /// `true` if any child contains meaningful a [HtmlText] with meaningful text.
+    meaningful_text: bool,
+}
+
+#[derive(Copy, Clone, Debug)]
+enum WordSeparator {
+    /// Separator between two words. Creates a soft line break or space.
+    ///
+    /// `a b`
+    BetweenWords,
+
+    /// A separator of a word at the end of a [HtmlText] element. Either because it is the last
+    /// child in its parent OR it is right before the start of another child (element, expression, ...).
+    ///
+    /// ```javascript
+    /// 
a
; // last element of parent + ///
a
// last element before another element + ///
a{expression}
// last element before expression + /// ``` + /// + /// Creates a soft line break EXCEPT if the next element is a self closing element + /// or the previous word was an ascii punctuation, which results in a hard line break: + /// + /// ```javascript + /// a =
ab
; + /// + /// // becomes + /// + /// a = ( + ///
+ /// ab + ///
+ ///
+ /// ); + /// ``` + EndOfText { is_soft_line_break: bool }, +} + +impl WordSeparator { + /// Returns if formatting this separator will result in a child that expands + fn will_break(&self) -> bool { + matches!( + self, + WordSeparator::EndOfText { + is_soft_line_break: false, + } + ) + } +} + +impl Format for WordSeparator { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + match self { + WordSeparator::BetweenWords => soft_line_break_or_space().fmt(f), + WordSeparator::EndOfText { is_soft_line_break } => { + if *is_soft_line_break { + soft_line_break().fmt(f) + } + // ```javascript + //
ab
+ // ``` + // Becomes + // + // ```javascript + //
+ // ab + //
+ //
+ // ``` + else { + hard_line_break().fmt(f) + } + } + } + } +} + +#[derive(Copy, Clone, Debug, Default)] +enum MultilineLayout { + Fill, + #[default] + NoFill, +} + +/// Builder that helps to create the output for the multiline layout. +/// +/// The multiline layout may use [FormatElement::Fill] element that requires that its children +/// are an alternating sequence of `[element, separator, element, separator, ...]`. +/// +/// This requires that each element is wrapped inside of a list if it emits more than one element to uphold +/// the constraints of [FormatElement::Fill]. +/// +/// However, the wrapping is only necessary for [MultilineLayout::Fill] for when the [FormatElement::Fill] element is used. +/// +/// This builder takes care of doing the least amount of work necessary for the chosen layout while also guaranteeing +/// that the written element is valid +#[derive(Debug, Clone)] +struct MultilineBuilder { + layout: MultilineLayout, + result: FormatResult>, +} + +impl MultilineBuilder { + fn new(layout: MultilineLayout) -> Self { + Self { + layout, + result: Ok(Vec::new()), + } + } + + /// Formats an element that does not require a separator + /// It is safe to omit the separator because at the call side we must guarantee that we have reached the end of the iterator + /// or the next element is a space/newline that should be written into the separator "slot". + fn write_content(&mut self, content: &dyn Format, f: &mut HtmlFormatter) { + self.write(content, None, f); + } + + /// Formatting a separator does not require any element in the separator slot + fn write_separator( + &mut self, + separator: &dyn Format, + f: &mut HtmlFormatter, + ) { + self.write(separator, None, f); + } + + fn write_with_separator( + &mut self, + content: &dyn Format, + separator: &dyn Format, + f: &mut HtmlFormatter, + ) { + self.write(content, Some(separator), f); + } + + fn write( + &mut self, + content: &dyn Format, + separator: Option<&dyn Format>, + f: &mut HtmlFormatter, + ) { + let result = std::mem::replace(&mut self.result, Ok(Vec::new())); + + self.result = result.and_then(|elements| { + let elements = { + let mut buffer = VecBuffer::new_with_vec(f.state_mut(), elements); + match self.layout { + MultilineLayout::Fill => { + // Make sure that the separator and content only ever write a single element + buffer.write_element(FormatElement::Tag(Tag::StartEntry))?; + write!(buffer, [content])?; + buffer.write_element(FormatElement::Tag(Tag::EndEntry))?; + + if let Some(separator) = separator { + buffer.write_element(FormatElement::Tag(Tag::StartEntry))?; + write!(buffer, [separator])?; + buffer.write_element(FormatElement::Tag(Tag::EndEntry))?; + } + } + MultilineLayout::NoFill => { + write!(buffer, [content, separator])?; + + if let Some(separator) = separator { + write!(buffer, [separator])?; + } + } + }; + buffer.into_vec() + }; + Ok(elements) + }) + } + + fn finish(self) -> FormatResult { + Ok(FormatMultilineChildren { + layout: self.layout, + elements: RefCell::new(self.result?), + }) + } +} + +#[derive(Debug)] +pub(crate) struct FormatMultilineChildren { + layout: MultilineLayout, + elements: RefCell>, +} + +impl Format for FormatMultilineChildren { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + let format_inner = format_once(|f| { + if let Some(elements) = f.intern_vec(self.elements.take()) { + match self.layout { + MultilineLayout::Fill => f.write_elements([ + FormatElement::Tag(Tag::StartFill), + elements, + FormatElement::Tag(Tag::EndFill), + ])?, + MultilineLayout::NoFill => f.write_elements([ + FormatElement::Tag(Tag::StartGroup( + tag::Group::new().with_mode(GroupMode::Expand), + )), + elements, + FormatElement::Tag(Tag::EndGroup), + ])?, + }; + } + + Ok(()) + }); + + // This indent is wrapped with a group to ensure that the print mode is + // set to `Expanded` when the group prints and will guarantee that the + // content _does not_ fit when printed as part of a `Fill`. Example: + //
+ // + // + // {" "} + // ({variable}) + //
+ // The `...` is the element that gets wrapped in the group + // by this line. Importantly, it contains a hard line break, and because + // [FitsMeasurer::fits_element] considers all hard lines as `Fits::Yes`, + // it will cause the element and the following separator to be printed + // in flat mode due to the logic of `Fill`. But because the we know the + // item breaks over multiple lines, we want it to _not_ fit and print + // both the content and the separator in Expanded mode, keeping the + // formatting as shown above. + // + // The `group` here allows us to opt-in to telling the `FitsMeasurer` + // that content that breaks shouldn't be considered flat and should be + // expanded. This is in contrast to something like a concise array fill, + // which _does_ allow breaks to fit and preserves density. + write!(f, [group(&block_indent(&format_inner))]) + } +} + +#[derive(Debug)] +struct FlatBuilder { + result: FormatResult>, + disabled: bool, +} + +impl FlatBuilder { + fn new() -> Self { + Self { + result: Ok(Vec::new()), + disabled: false, + } + } + + fn write(&mut self, content: &dyn Format, f: &mut HtmlFormatter) { + if self.disabled { + return; + } + + let result = std::mem::replace(&mut self.result, Ok(Vec::new())); + + self.result = result.and_then(|elements| { + let mut buffer = VecBuffer::new_with_vec(f.state_mut(), elements); + + write!(buffer, [content])?; + + Ok(buffer.into_vec()) + }) + } + + fn disable(&mut self) { + self.disabled = true; + } + + fn finish(self) -> FormatResult { + assert!(!self.disabled, "The flat builder has been disabled and thus, does no longer store any elements. Make sure you don't call disable if you later intend to format the flat content."); + + Ok(FormatFlatChildren { + elements: RefCell::new(self.result?), + }) + } +} + +#[derive(Debug)] +pub(crate) struct FormatFlatChildren { + elements: RefCell>, +} + +impl Format for FormatFlatChildren { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + if let Some(elements) = f.intern_vec(self.elements.take()) { + f.write_element(elements)?; + } + Ok(()) } } diff --git a/crates/biome_html_formatter/src/lib.rs b/crates/biome_html_formatter/src/lib.rs index 3b68dca5521..8e3777b3851 100644 --- a/crates/biome_html_formatter/src/lib.rs +++ b/crates/biome_html_formatter/src/lib.rs @@ -14,6 +14,7 @@ mod cst; mod generated; mod html; pub(crate) mod prelude; +pub(crate) mod util; /// Formats a Html file based on its features. /// diff --git a/crates/biome_html_formatter/src/util/children.rs b/crates/biome_html_formatter/src/util/children.rs new file mode 100644 index 00000000000..1ac4a5dee52 --- /dev/null +++ b/crates/biome_html_formatter/src/util/children.rs @@ -0,0 +1,422 @@ +use std::{ + iter::{FusedIterator, Peekable}, + str::Chars, +}; + +use biome_formatter::{ + format_args, prelude::*, write, Buffer, Format, FormatElement, FormatResult, +}; +use biome_html_syntax::AnyHtmlElement; +use biome_rowan::{SyntaxResult, TextLen, TextRange, TextSize, TokenText}; + +use crate::{context::HtmlFormatContext, HtmlFormatter}; + +pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r']; + +/// Meaningful HTML text is defined to be text that has either non-whitespace +/// characters, or does not contain a newline. Whitespace is defined as ASCII +/// whitespace. +/// +/// ``` +/// use biome_html_formatter::utils::is_meaningful_html_text; +/// +/// assert_eq!(is_meaningful_html_text(" \t\r "), true); +/// assert_eq!(is_meaningful_html_text(" \n\r "), false); +/// assert_eq!(is_meaningful_html_text(" Alien "), true); +/// assert_eq!(is_meaningful_html_text("\n Alien "), true); +/// assert_eq!(is_meaningful_html_text(" Alien \n"), true); +/// assert_eq!(is_meaningful_html_text(""), true); +/// ``` +pub fn is_meaningful_html_text(text: &str) -> bool { + let mut has_newline = false; + for c in text.chars() { + // If there is a non-whitespace character + if !HTML_WHITESPACE_CHARS.contains(&c) { + return true; + } else if c == '\n' { + has_newline = true; + } + } + + !has_newline +} + +/// A word in a Html Text. A word is string sequence that isn't separated by any HTML whitespace. +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) struct HtmlWord { + text: TokenText, + source_position: TextSize, +} + +impl HtmlWord { + fn new(text: TokenText, source_position: TextSize) -> Self { + HtmlWord { + text, + source_position, + } + } + + pub(crate) fn is_single_character(&self) -> bool { + self.text.chars().count() == 1 + } +} + +impl Format for HtmlWord { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + f.write_element(FormatElement::LocatedTokenText { + source_position: self.source_position, + slice: self.text.clone(), + }) + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] +pub(crate) enum HtmlChild { + /// A Single word in a HTML text. For example, the words for `a b\nc` are `[a, b, c]` + Word(HtmlWord), + + /// A ` ` whitespace + /// + /// ```html + ///
+ ///
a
+ ///
a
+ ///
a + /// b
+ /// ``` + /// + /// Whitespace between two words is not represented as whitespace + /// ```javascript + ///
a b
+ /// ``` + /// The space between `a` and `b` is not considered a whitespace. + Whitespace, + + /// A new line at the start or end of a [HtmlText] with meaningful content. (that isn't all whitespace + /// and contains a new line). + /// + /// ```html + ///
+ /// a + ///
+ /// ``` + Newline, + + /// A [HtmlText] that only consists of whitespace and has at least two line breaks; + /// + /// ```html + ///
+ /// + /// + ///
+ /// ``` + /// + /// The text between `
` and `` is an empty line text. + EmptyLine, + + /// Any other content that isn't a text. Should be formatted as is. + NonText(AnyHtmlElement), +} + +impl HtmlChild { + pub(crate) const fn is_any_line(&self) -> bool { + matches!(self, HtmlChild::EmptyLine | HtmlChild::Newline) + } +} + +/// Creates either a space using an expression child and a string literal, +/// or a regular space, depending on whether the group breaks or not. +/// +/// ```html +///
Winter Light
; +/// +///
+/// Winter Light +/// Through A Glass Darkly +/// The Silence +/// Seventh Seal +/// Wild Strawberries +///
+/// ``` +#[derive(Default)] +pub(crate) struct HtmlSpace; + +impl Format for HtmlSpace { + fn fmt(&self, formatter: &mut HtmlFormatter) -> FormatResult<()> { + write![ + formatter, + [ + if_group_breaks(&format_args![HtmlRawSpace, soft_line_break()]), + if_group_fits_on_line(&space()) + ] + ] + } +} + +pub(crate) struct HtmlRawSpace; + +impl Format for HtmlRawSpace { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + write!(f, [text(" ")]) + } +} + +pub(crate) fn html_split_children(children: I) -> SyntaxResult> +where + I: IntoIterator, +{ + let mut builder = HtmlSplitChildrenBuilder::new(); + + for child in children { + match child { + AnyHtmlElement::HtmlContent(text) => { + // Split the text into words + // Keep track if there's any leading/trailing empty line, new line or whitespace + + let value_token = text.value_token()?; + let mut chunks = HtmlSplitChunksIterator::new(value_token.text()).peekable(); + + // Text starting with a whitespace + if let Some((_, HtmlTextChunk::Whitespace(_whitespace))) = chunks.peek() { + match chunks.next() { + Some((_, HtmlTextChunk::Whitespace(whitespace))) => { + if whitespace.contains('\n') { + if chunks.peek().is_none() { + // A text only consisting of whitespace that also contains a new line isn't considered meaningful text. + // It can be entirely removed from the content without changing the semantics. + let newlines = + whitespace.chars().filter(|c| *c == '\n').count(); + + // Keep up to one blank line between tags/expressions and text. + // ```javascript + //
+ // + // + //
+ // ``` + if newlines > 1 { + builder.entry(HtmlChild::EmptyLine); + } + + continue; + } + + builder.entry(HtmlChild::Newline) + } else { + builder.entry(HtmlChild::Whitespace) + } + } + _ => unreachable!(), + } + } + + while let Some(chunk) = chunks.next() { + match chunk { + (_, HtmlTextChunk::Whitespace(whitespace)) => { + // Only handle trailing whitespace. Words must always be joined by new lines + if chunks.peek().is_none() { + if whitespace.contains('\n') { + builder.entry(HtmlChild::Newline); + } else { + builder.entry(HtmlChild::Whitespace) + } + } + } + + (relative_start, HtmlTextChunk::Word(word)) => { + let text = value_token + .token_text() + .slice(TextRange::at(relative_start, word.text_len())); + let source_position = value_token.text_range().start() + relative_start; + + builder.entry(HtmlChild::Word(HtmlWord::new(text, source_position))); + } + } + } + } + child => { + builder.entry(HtmlChild::NonText(child)); + } + } + } + + Ok(builder.finish()) +} + +/// The builder is used to: +/// 1. Remove [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace] if a next element is [HtmlChild::Whitespace] +/// 2. Don't push a new element [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace] if previous one is [HtmlChild::EmptyLine], [HtmlChild::Newline], [HtmlChild::Whitespace] +/// +/// [Prettier applies]: https://github.com/prettier/prettier/blob/b0d9387b95cdd4e9d50f5999d3be53b0b5d03a97/src/language-js/print/jsx.js#L144-L180 +#[derive(Debug)] +struct HtmlSplitChildrenBuilder { + buffer: Vec, +} + +impl HtmlSplitChildrenBuilder { + fn new() -> Self { + HtmlSplitChildrenBuilder { buffer: vec![] } + } + + fn entry(&mut self, child: HtmlChild) { + match self.buffer.last_mut() { + Some(last @ (HtmlChild::EmptyLine | HtmlChild::Newline | HtmlChild::Whitespace)) => { + if matches!(child, HtmlChild::Whitespace) { + *last = child; + } else if matches!(child, HtmlChild::NonText(_) | HtmlChild::Word(_)) { + self.buffer.push(child); + } + } + _ => self.buffer.push(child), + } + } + + fn finish(self) -> Vec { + self.buffer + } +} + +#[derive(Eq, PartialEq, Copy, Clone, Debug)] +enum HtmlTextChunk<'a> { + Whitespace(&'a str), + Word(&'a str), +} + +/// Splits a text into whitespace only and non-whitespace chunks. +/// +/// See `jsx_split_chunks_iterator` test for examples +struct HtmlSplitChunksIterator<'a> { + position: TextSize, + text: &'a str, + chars: Peekable>, +} + +impl<'a> HtmlSplitChunksIterator<'a> { + fn new(text: &'a str) -> Self { + Self { + position: TextSize::default(), + text, + chars: text.chars().peekable(), + } + } +} + +impl<'a> Iterator for HtmlSplitChunksIterator<'a> { + type Item = (TextSize, HtmlTextChunk<'a>); + + fn next(&mut self) -> Option { + let char = self.chars.next()?; + + let start = self.position; + self.position += char.text_len(); + + let is_whitespace = matches!(char, ' ' | '\n' | '\t' | '\r'); + + while let Some(next) = self.chars.peek() { + let next_is_whitespace = matches!(next, ' ' | '\n' | '\t' | '\r'); + + if is_whitespace != next_is_whitespace { + break; + } + + self.position += next.text_len(); + self.chars.next(); + } + + let range = TextRange::new(start, self.position); + let slice = &self.text[range]; + + let chunk = if is_whitespace { + HtmlTextChunk::Whitespace(slice) + } else { + HtmlTextChunk::Word(slice) + }; + + Some((start, chunk)) + } +} + +impl FusedIterator for HtmlSplitChunksIterator<'_> {} + +/// An iterator adaptor that allows a lookahead of three tokens +/// +/// # Examples +/// ``` +/// use biome_html_formatter::utils::HtmlChildrenIterator; +/// +/// let buffer = vec![1, 2, 3, 4]; +/// +/// let mut iter = HtmlChildrenIterator::new(buffer.iter()); +/// +/// assert_eq!(iter.peek(), Some(&&1)); +/// assert_eq!(iter.peek_next(), Some(&&2)); +/// assert_eq!(iter.peek_next_next(), Some(&&3)); +/// assert_eq!(iter.next(), Some(&1)); +/// assert_eq!(iter.next(), Some(&2)); +/// assert_eq!(iter.next(), Some(&3)); +/// ``` +#[derive(Clone, Debug)] +pub struct HtmlChildrenIterator { + iter: I, + + peeked: Option>, + peeked_next: Option>, + peeked_next_next: Option>, +} + +impl HtmlChildrenIterator { + pub fn new(iter: I) -> Self { + Self { + iter, + peeked: None, + peeked_next: None, + peeked_next_next: None, + } + } + + pub fn peek(&mut self) -> Option<&I::Item> { + let iter = &mut self.iter; + self.peeked.get_or_insert_with(|| iter.next()).as_ref() + } + + pub fn peek_next(&mut self) -> Option<&I::Item> { + let iter = &mut self.iter; + let peeked = &mut self.peeked; + + self.peeked_next + .get_or_insert_with(|| { + peeked.get_or_insert_with(|| iter.next()); + iter.next() + }) + .as_ref() + } + + pub fn peek_next_next(&mut self) -> Option<&I::Item> { + let iter = &mut self.iter; + let peeked = &mut self.peeked; + let peeked_next = &mut self.peeked_next; + + self.peeked_next_next + .get_or_insert_with(|| { + peeked.get_or_insert_with(|| iter.next()); + peeked_next.get_or_insert_with(|| iter.next()); + iter.next() + }) + .as_ref() + } +} + +impl Iterator for HtmlChildrenIterator { + type Item = I::Item; + + fn next(&mut self) -> Option { + match self.peeked.take() { + Some(peeked) => { + self.peeked = self.peeked_next.take(); + self.peeked_next = self.peeked_next_next.take(); + peeked + } + None => self.iter.next(), + } + } +} diff --git a/crates/biome_html_formatter/src/util/mod.rs b/crates/biome_html_formatter/src/util/mod.rs new file mode 100644 index 00000000000..6cb815d9823 --- /dev/null +++ b/crates/biome_html_formatter/src/util/mod.rs @@ -0,0 +1 @@ +pub(crate) mod children; diff --git a/crates/biome_html_parser/src/lexer/tests.rs b/crates/biome_html_parser/src/lexer/tests.rs index 8774b2378d1..e437b5064fb 100644 --- a/crates/biome_html_parser/src/lexer/tests.rs +++ b/crates/biome_html_parser/src/lexer/tests.rs @@ -149,6 +149,21 @@ fn element() { } } +#[test] +fn element_with_text() { + assert_lex! { + "
hello world
", + L_ANGLE: 1, + HTML_LITERAL: 3, + R_ANGLE: 1, + HTML_LITERAL: 11, + L_ANGLE: 1, + SLASH: 1, + HTML_LITERAL: 3, + R_ANGLE: 1, + } +} + #[test] fn doctype_with_quirk() { assert_lex! {