diff --git a/crates/biome_markdown_factory/src/generated/node_factory.rs b/crates/biome_markdown_factory/src/generated/node_factory.rs index a9e24e1b6c8d..df5cc3ea0525 100644 --- a/crates/biome_markdown_factory/src/generated/node_factory.rs +++ b/crates/biome_markdown_factory/src/generated/node_factory.rs @@ -621,9 +621,15 @@ pub fn md_textual(value_token: SyntaxToken) -> MdTextual { [Some(SyntaxElement::Token(value_token))], )) } -pub fn md_thematic_break_block(value_token: SyntaxToken) -> MdThematicBreakBlock { +pub fn md_thematic_break_block(parts: MdThematicBreakPartList) -> MdThematicBreakBlock { MdThematicBreakBlock::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_THEMATIC_BREAK_BLOCK, + [Some(SyntaxElement::Node(parts.into_syntax()))], + )) +} +pub fn md_thematic_break_char(value_token: SyntaxToken) -> MdThematicBreakChar { + MdThematicBreakChar::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_THEMATIC_BREAK_CHAR, [Some(SyntaxElement::Token(value_token))], )) } @@ -711,6 +717,18 @@ where .map(|item| Some(item.into_syntax().into())), )) } +pub fn md_thematic_break_part_list(items: I) -> MdThematicBreakPartList +where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, +{ + MdThematicBreakPartList::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_THEMATIC_BREAK_PART_LIST, + items + .into_iter() + .map(|item| Some(item.into_syntax().into())), + )) +} pub fn md_bogus(slots: I) -> MdBogus where I: IntoIterator>, diff --git a/crates/biome_markdown_factory/src/generated/syntax_factory.rs b/crates/biome_markdown_factory/src/generated/syntax_factory.rs index 1dba1842e0ea..b71f352884e5 100644 --- a/crates/biome_markdown_factory/src/generated/syntax_factory.rs +++ b/crates/biome_markdown_factory/src/generated/syntax_factory.rs @@ -1091,7 +1091,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && element.kind() == MD_THEMATIC_BREAK_LITERAL + && MdThematicBreakPartList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -1105,6 +1105,25 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_THEMATIC_BREAK_BLOCK, children) } + MD_THEMATIC_BREAK_CHAR => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && matches!(element.kind(), T ! [*] | T ! [-] | T!["_"]) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_THEMATIC_BREAK_CHAR.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_THEMATIC_BREAK_CHAR, children) + } MD_BLOCK_LIST => Self::make_node_list_syntax(kind, children, AnyMdBlock::can_cast), MD_BULLET_LIST => { Self::make_node_list_syntax(kind, children, AnyMdBulletListMember::can_cast) @@ -1120,6 +1139,9 @@ impl SyntaxFactory for MarkdownSyntaxFactory { MD_QUOTE_INDENT_LIST => { Self::make_node_list_syntax(kind, children, MdQuoteIndent::can_cast) } + MD_THEMATIC_BREAK_PART_LIST => { + Self::make_node_list_syntax(kind, children, AnyMdThematicBreakPart::can_cast) + } _ => unreachable!("Is {:?} a token?", kind), } } diff --git a/crates/biome_markdown_formatter/src/generated.rs b/crates/biome_markdown_formatter/src/generated.rs index b37589ab8673..967af2dd54a1 100644 --- a/crates/biome_markdown_formatter/src/generated.rs +++ b/crates/biome_markdown_formatter/src/generated.rs @@ -1442,6 +1442,94 @@ impl IntoFormat for biome_markdown_syntax::MdThematicBreakBlock ) } } +impl FormatRule + for crate::markdown::auxiliary::thematic_break_char::FormatMdThematicBreakChar +{ + type Context = MdFormatContext; + #[inline(always)] + fn fmt( + &self, + node: &biome_markdown_syntax::MdThematicBreakChar, + f: &mut MarkdownFormatter, + ) -> FormatResult<()> { + FormatNodeRule::::fmt(self, node, f) + } +} +impl AsFormat for biome_markdown_syntax::MdThematicBreakChar { + type Format<'a> = FormatRefWithRule< + 'a, + biome_markdown_syntax::MdThematicBreakChar, + crate::markdown::auxiliary::thematic_break_char::FormatMdThematicBreakChar, + >; + fn format(&self) -> Self::Format<'_> { + FormatRefWithRule::new( + self, + crate::markdown::auxiliary::thematic_break_char::FormatMdThematicBreakChar::default(), + ) + } +} +impl IntoFormat for biome_markdown_syntax::MdThematicBreakChar { + type Format = FormatOwnedWithRule< + biome_markdown_syntax::MdThematicBreakChar, + crate::markdown::auxiliary::thematic_break_char::FormatMdThematicBreakChar, + >; + fn into_format(self) -> Self::Format { + FormatOwnedWithRule::new( + self, + crate::markdown::auxiliary::thematic_break_char::FormatMdThematicBreakChar::default(), + ) + } +} +impl AsFormat for biome_markdown_syntax::MdThematicBreakPartList { + type Format<'a> = FormatRefWithRule< + 'a, + biome_markdown_syntax::MdThematicBreakPartList, + crate::markdown::lists::thematic_break_part_list::FormatMdThematicBreakPartList, + >; + fn format(&self) -> Self::Format<'_> { + FormatRefWithRule::new( + self, + crate::markdown::lists::thematic_break_part_list::FormatMdThematicBreakPartList::default(), + ) + } +} +impl IntoFormat for biome_markdown_syntax::MdThematicBreakPartList { + type Format = FormatOwnedWithRule< + biome_markdown_syntax::MdThematicBreakPartList, + crate::markdown::lists::thematic_break_part_list::FormatMdThematicBreakPartList, + >; + fn into_format(self) -> Self::Format { + FormatOwnedWithRule::new( + self, + crate::markdown::lists::thematic_break_part_list::FormatMdThematicBreakPartList::default(), + ) + } +} +impl AsFormat for biome_markdown_syntax::AnyMdThematicBreakPart { + type Format<'a> = FormatRefWithRule< + 'a, + biome_markdown_syntax::AnyMdThematicBreakPart, + crate::markdown::any::thematic_break_part::FormatAnyMdThematicBreakPart, + >; + fn format(&self) -> Self::Format<'_> { + FormatRefWithRule::new( + self, + crate::markdown::any::thematic_break_part::FormatAnyMdThematicBreakPart::default(), + ) + } +} +impl IntoFormat for biome_markdown_syntax::AnyMdThematicBreakPart { + type Format = FormatOwnedWithRule< + biome_markdown_syntax::AnyMdThematicBreakPart, + crate::markdown::any::thematic_break_part::FormatAnyMdThematicBreakPart, + >; + fn into_format(self) -> Self::Format { + FormatOwnedWithRule::new( + self, + crate::markdown::any::thematic_break_part::FormatAnyMdThematicBreakPart::default(), + ) + } +} impl AsFormat for biome_markdown_syntax::MdBlockList { type Format<'a> = FormatRefWithRule< 'a, diff --git a/crates/biome_markdown_formatter/src/markdown/any/mod.rs b/crates/biome_markdown_formatter/src/markdown/any/mod.rs index 9cd99d0060d0..82ce1c1b4237 100644 --- a/crates/biome_markdown_formatter/src/markdown/any/mod.rs +++ b/crates/biome_markdown_formatter/src/markdown/any/mod.rs @@ -6,3 +6,4 @@ pub(crate) mod code_block; pub(crate) mod container_block; pub(crate) mod inline; pub(crate) mod leaf_block; +pub(crate) mod thematic_break_part; diff --git a/crates/biome_markdown_formatter/src/markdown/any/thematic_break_part.rs b/crates/biome_markdown_formatter/src/markdown/any/thematic_break_part.rs new file mode 100644 index 000000000000..700a626bacb7 --- /dev/null +++ b/crates/biome_markdown_formatter/src/markdown/any/thematic_break_part.rs @@ -0,0 +1,13 @@ +use crate::prelude::*; +use biome_markdown_syntax::AnyMdThematicBreakPart; +#[derive(Debug, Clone, Default)] +pub(crate) struct FormatAnyMdThematicBreakPart; +impl FormatRule for FormatAnyMdThematicBreakPart { + type Context = MdFormatContext; + fn fmt(&self, node: &AnyMdThematicBreakPart, f: &mut MarkdownFormatter) -> FormatResult<()> { + match node { + AnyMdThematicBreakPart::MdIndentToken(node) => node.format().fmt(f), + AnyMdThematicBreakPart::MdThematicBreakChar(node) => node.format().fmt(f), + } + } +} diff --git a/crates/biome_markdown_formatter/src/markdown/auxiliary/mod.rs b/crates/biome_markdown_formatter/src/markdown/auxiliary/mod.rs index b144e1dfa72e..ed2f199bfca4 100644 --- a/crates/biome_markdown_formatter/src/markdown/auxiliary/mod.rs +++ b/crates/biome_markdown_formatter/src/markdown/auxiliary/mod.rs @@ -38,3 +38,4 @@ pub(crate) mod setext_header; pub(crate) mod soft_break; pub(crate) mod textual; pub(crate) mod thematic_break_block; +pub(crate) mod thematic_break_char; diff --git a/crates/biome_markdown_formatter/src/markdown/auxiliary/thematic_break_char.rs b/crates/biome_markdown_formatter/src/markdown/auxiliary/thematic_break_char.rs new file mode 100644 index 000000000000..bf6dbbb956bb --- /dev/null +++ b/crates/biome_markdown_formatter/src/markdown/auxiliary/thematic_break_char.rs @@ -0,0 +1,14 @@ +use crate::prelude::*; +use biome_markdown_syntax::MdThematicBreakChar; +use biome_rowan::AstNode; +#[derive(Debug, Clone, Default)] +pub(crate) struct FormatMdThematicBreakChar; +impl FormatNodeRule for FormatMdThematicBreakChar { + fn fmt_fields( + &self, + node: &MdThematicBreakChar, + f: &mut MarkdownFormatter, + ) -> FormatResult<()> { + format_verbatim_node(node.syntax()).fmt(f) + } +} diff --git a/crates/biome_markdown_formatter/src/markdown/lists/mod.rs b/crates/biome_markdown_formatter/src/markdown/lists/mod.rs index d3e753f35b0d..d84675734953 100644 --- a/crates/biome_markdown_formatter/src/markdown/lists/mod.rs +++ b/crates/biome_markdown_formatter/src/markdown/lists/mod.rs @@ -7,3 +7,4 @@ pub(crate) mod hash_list; pub(crate) mod indent_token_list; pub(crate) mod inline_item_list; pub(crate) mod quote_indent_list; +pub(crate) mod thematic_break_part_list; diff --git a/crates/biome_markdown_formatter/src/markdown/lists/thematic_break_part_list.rs b/crates/biome_markdown_formatter/src/markdown/lists/thematic_break_part_list.rs new file mode 100644 index 000000000000..343674877f42 --- /dev/null +++ b/crates/biome_markdown_formatter/src/markdown/lists/thematic_break_part_list.rs @@ -0,0 +1,10 @@ +use crate::prelude::*; +use biome_markdown_syntax::MdThematicBreakPartList; +#[derive(Debug, Clone, Default)] +pub(crate) struct FormatMdThematicBreakPartList; +impl FormatRule for FormatMdThematicBreakPartList { + type Context = MdFormatContext; + fn fmt(&self, node: &MdThematicBreakPartList, f: &mut MarkdownFormatter) -> FormatResult<()> { + f.join().entries(node.iter().formatted()).finish() + } +} diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 994e91418a68..c34f03ca72de 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -15,44 +15,29 @@ use biome_unicode_table::lookup_byte; use crate::syntax::{MAX_BLOCK_PREFIX_INDENT, TAB_STOP_SPACES}; -/// Lexer context for different markdown parsing modes. -/// -/// Different contexts affect how the lexer tokenizes input: -/// - `Regular`: Normal markdown parsing with inline element detection -/// - `FencedCodeBlock`: Inside fenced code block, no markdown parsing -/// - `HtmlBlock`: Inside HTML block, minimal markdown parsing -/// - `LinkDefinition`: Inside link reference definition, whitespace separates tokens -/// - `CodeSpan`: Inside inline code span, backslashes are literal (no escapes) -/// - `EmphasisInline`: Emit single STAR/UNDERSCORE tokens for partial delimiter consumption +/// Lexer context for different markdown parsing modes #[derive(Debug, Copy, Clone, Eq, PartialEq, Default)] pub enum MarkdownLexContext { - /// Normal markdown parsing with full inline element detection. + /// Normal markdown parsing with inline element detection. #[default] Regular, - /// Inside a fenced code block - content is treated as raw text. - /// No markdown parsing occurs within fenced code blocks. - /// Reserved for context-aware lexing in fenced code blocks. + /// Inside fenced code block - no markdown parsing. #[expect(dead_code)] FencedCodeBlock, - /// Inside an HTML block - content is treated as raw HTML. - /// Minimal markdown parsing, primarily looking for block end conditions. - /// Reserved for context-aware lexing in HTML blocks. + /// Inside HTML block - minimal markdown parsing. #[expect(dead_code)] HtmlBlock, - /// Inside a link reference definition (after `]:`). - /// In this context, whitespace is significant and separates destination from title. - /// Text tokens stop at whitespace to allow proper parsing. + /// Inside link definition (after `]:`). Whitespace separates destination from title. LinkDefinition, - /// Inside an inline code span. - /// Per CommonMark §6.1, backslash escapes are not processed inside code spans. - /// Backslash is treated as a literal character, not an escape. + /// Inside inline code span. Backslashes are literal per CommonMark §6.1. CodeSpan, - /// Inside emphasis delimiter processing. - /// In this context, `*` and `_` are always emitted as single-character tokens - /// (STAR, UNDERSCORE) rather than double tokens (DOUBLE_STAR, DOUBLE_UNDERSCORE). - /// This allows partial consumption of delimiter runs when the match algorithm - /// determines only 1 char should be used from a 2-char run. + /// Emphasis delimiter processing. Emit single STAR/UNDERSCORE tokens for partial consumption. EmphasisInline, + /// Inside thematic break parts decomposition. + /// In this context, break markers (`*`, `-`, `_`) emit as individual + /// STAR/MINUS/UNDERSCORE tokens and whitespace emits as MD_INDENT_CHAR, + /// instead of aggregating into MD_THEMATIC_BREAK_LITERAL. + ThematicBreakParts, } impl LexContext for MarkdownLexContext { @@ -62,19 +47,22 @@ impl LexContext for MarkdownLexContext { } } -/// Context in which the [MarkdownLexContext]'s current should be re-lexed. -/// Used for re-lexing scenarios where context changes how tokens are parsed. +/// Re-lexing context for when token interpretation changes. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum MarkdownReLexContext { /// Re-lex using regular markdown rules. #[expect(dead_code)] Regular, - /// Re-lex for link definition context where whitespace is significant. + /// Re-lex for link definition (whitespace is significant). LinkDefinition, - /// Re-lex for emphasis inline context where `*` and `_` emit single tokens. - /// Used when the emphasis matching algorithm needs to partially consume - /// a DOUBLE_STAR or DOUBLE_UNDERSCORE token. + /// Re-lex for emphasis (emit single tokens for partial consumption). EmphasisInline, + /// Re-lex for thematic break parts decomposition. + /// Decomposes MD_THEMATIC_BREAK_LITERAL into individual marker/space tokens. + /// Currently unused: we use `force_relex_in_context(MarkdownLexContext::ThematicBreakParts)` + /// directly, but kept for symmetry with the lex context enum. + #[expect(dead_code)] + ThematicBreakParts, } /// An extremely fast, lookup table based, lossless Markdown lexer @@ -93,8 +81,6 @@ pub(crate) struct MarkdownLexer<'src> { unicode_bom_length: usize, /// Byte offset of the current token from the start of the source. - /// - /// The range of the current token can be computed by `self.position - self.current_start` current_start: TextSize, /// The kind of the current token @@ -123,18 +109,10 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { self.current_kind } - fn position(&self) -> usize { - self.position - } - fn current_start(&self) -> TextSize { self.current_start } - fn push_diagnostic(&mut self, diagnostic: ParseDiagnostic) { - self.diagnostics.push(diagnostic); - } - fn next_token(&mut self, context: Self::LexContext) -> Self::Kind { self.current_start = self.text_position(); self.current_flags = TokenFlags::empty(); @@ -203,6 +181,14 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { self.current_flags } + fn position(&self) -> usize { + self.position + } + + fn push_diagnostic(&mut self, diagnostic: ParseDiagnostic) { + self.diagnostics.push(diagnostic); + } + #[inline] fn advance_char_unchecked(&mut self) { let c = self.current_char_unchecked(); @@ -244,16 +230,22 @@ impl<'src> MarkdownLexer<'src> { ) -> MarkdownSyntaxKind { let dispatched = lookup_byte(current); match dispatched { - // Whitespace handling depends on context: - // - At start of line (after_newline): whitespace is significant for indentation - // detection (e.g., 4+ spaces = code block), so emit as separate tokens - // - In middle of line: whitespace is just text content, include in textual token - // - Exception: 2+ spaces before newline is a hard line break - // - In LinkDefinition context: whitespace is always significant (separates destination from title) - // - In CodeSpan context: whitespace is literal content, no hard-line-break detection + // Whitespace handling is context-sensitive and order-dependent: + // 1. Check newline first (highest priority - block separator) + // 2. ThematicBreakParts: emit single MD_INDENT_CHAR tokens + // 3. CodeSpan: no special handling (backslash escapes disabled) + // 4. LinkDefinition: whitespace separates destination from title + // 5. Line start: single whitespace tokens for indentation detection + // 6. After block quote marker: optional space handling + // 7. Hard line break: 2+ spaces before newline + // 8. Default: whitespace is part of text content WHS => { if current == b'\n' || current == b'\r' { self.consume_newline() + } else if matches!(context, MarkdownLexContext::ThematicBreakParts) { + // In ThematicBreakParts context, emit one MD_INDENT_CHAR per space/tab. + self.advance(1); + MD_INDENT_CHAR } else if matches!(context, MarkdownLexContext::CodeSpan) { // In code span context, whitespace is literal content. // No hard-line-break detection - the renderer normalizes line endings to spaces. @@ -316,13 +308,7 @@ impl<'src> MarkdownLexer<'src> { } } - /// Consume a backslash escape sequence. - /// - /// Per CommonMark spec: - /// - Backslash before ASCII punctuation makes it literal - /// - Backslash before newline is a hard line break - /// - /// Escapable: `!"#$%&'()*+,-./:;<=>?@[\]^_\`{|}~` + /// Consume a backslash escape sequence (literal punctuation or hard line break). fn consume_escape(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); @@ -392,14 +378,7 @@ impl<'src> MarkdownLexer<'src> { MD_TEXTUAL_LITERAL } - /// Try to consume an entity or numeric character reference per CommonMark §6.2. - /// - /// Valid patterns: - /// - Named entity: `&name;` where name is 2-31 alphanumeric chars starting with letter - /// - Decimal numeric: `&#digits;` where digits is 1-7 decimal digits - /// - Hexadecimal: `&#xhex;` or `&#Xhex;` where hex is 1-6 hex digits - /// - /// If not valid, falls back to consuming as textual. + /// Try to consume entity or numeric character reference per CommonMark §6.2. fn consume_entity_or_textual(&mut self, context: MarkdownLexContext) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); debug_assert!(matches!(self.current_byte(), Some(b'&'))); @@ -414,13 +393,7 @@ impl<'src> MarkdownLexer<'src> { self.consume_textual(context) } - /// Check if text at current position matches a valid entity reference pattern. - /// Returns the length of the entity if valid, None otherwise. - /// - /// Patterns per CommonMark §6.2: - /// - Named: `&name;` where name is 2-31 alphanumeric chars starting with letter - /// - Decimal: `&#digits;` where digits is 1-7 decimal digits - /// - Hex: `&#xhex;` or `&#Xhex;` where hex is 1-6 hex digits + /// Check if text matches entity reference pattern. Returns length if valid. fn match_entity_reference(&self) -> Option { // Must start with & if self.byte_at(0) != Some(b'&') { @@ -440,8 +413,7 @@ impl<'src> MarkdownLexer<'src> { } } - /// Match a named entity reference: `&name;` - /// Name must be 2-31 alphanumeric chars starting with a letter. + /// Match named entity: `&name;` (2-31 alphanumeric, starts with letter). fn match_named_entity(&self) -> Option { self.match_entity_with(1, 2, 31, |byte, index| { if index == 0 { @@ -452,7 +424,7 @@ impl<'src> MarkdownLexer<'src> { }) } - /// Match a numeric entity reference: `&#digits;` or `&#xhex;` / `&#Xhex;` + /// Match numeric entity: `&#digits;` or `&#xhex;`/`&#Xhex;` fn match_numeric_entity(&self) -> Option { // Position 0 is '&', position 1 is '#' let next = self.byte_at(2)?; @@ -468,12 +440,12 @@ impl<'src> MarkdownLexer<'src> { } } - /// Match a decimal numeric entity: `&#digits;` (1-7 decimal digits) + /// Match decimal entity: `&#digits;` (1-7 digits) fn match_decimal_entity(&self) -> Option { self.match_entity_with(2, 1, 7, |byte, _| byte.is_ascii_digit()) } - /// Match a hexadecimal numeric entity: `&#xhex;` or `&#Xhex;` (1-6 hex digits) + /// Match hex entity: `&#xhex;` or `&#Xhex;` (1-6 hex digits) fn match_hex_entity(&self) -> Option { self.match_entity_with(3, 1, 6, |byte, _| byte.is_ascii_hexdigit()) } @@ -612,12 +584,7 @@ impl<'src> MarkdownLexer<'src> { WHITESPACE } - /// Consumes whitespace in LinkDefinition context as textual literal. - /// This prevents it from being treated as trivia by the parser, which is critical - /// for correctly parsing link reference definitions where whitespace is a significant separator. - /// - /// ## Safety - /// Must be called at a valid UT8 char boundary + /// Consumes whitespace in LinkDefinition context as textual literal (not trivia). fn consume_link_definition_whitespace(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); @@ -705,13 +672,13 @@ impl<'src> MarkdownLexer<'src> { saw_marker } - /// Consumes thematic break literal, setext underline, or returns emphasis marker tokens. - /// Called when we see *, -, or _. + /// Consumes thematic break, setext underline, or emphasis markers (*, -, _). /// /// For `-` at line start: - /// - 1-2 dashes followed by newline: setext underline (H2) - /// - 3+ dashes followed by newline: thematic break (not setext; the parser may - /// convert dash-only thematic breaks to setext when preceded by a paragraph) + /// - 1-2 dashes + newline: setext underline (H2) + /// - 3+ dashes + newline: thematic break (parser may convert to setext if after paragraph) + /// + /// This distinction is critical for correct heading vs horizontal rule parsing. fn consume_thematic_break_or_emphasis( &mut self, dispatched: Dispatch, @@ -736,6 +703,18 @@ impl<'src> MarkdownLexer<'src> { // Save position to restore if not a thematic break let start_position = self.position; + // In ThematicBreakParts context, emit single-char tokens for break markers. + if matches!(context, MarkdownLexContext::ThematicBreakParts) { + self.advance(1); + return match start_char { + b'*' => STAR, + b'_' => UNDERSCORE, + // start_char is always b'*', b'_', or b'-' (set by dispatched match above). + // Defensive: treat any other byte as MINUS rather than panicking. + _ => MINUS, + }; + } + // For `-` at line start with 1-2 dashes, emit setext underline. // 3+ dashes could be thematic break, so let that logic handle it. // The parser may convert dash-only thematic breaks to setext when preceded by paragraph. @@ -1025,12 +1004,11 @@ impl<'src> MarkdownLexer<'src> { self.position >= self.source.len() } - /// Consume consecutive textual characters until we hit a special markdown character. - /// This groups multiple characters into a single MD_TEXTUAL_LITERAL token for efficiency. - /// Spaces and tabs are included in the text token (treated as regular text content), - /// but newlines end the token since they have semantic meaning as block separators. - /// Also stops before trailing spaces that could form a hard line break (2+ spaces before newline). - /// In LinkDefinition context, stops at any whitespace to allow proper destination/title parsing. + /// Consume textual characters until hitting special markdown syntax. + /// + /// Special handling for `force_ordered_list_marker`: + /// Consumes leading whitespace separately if followed by an ordered list marker. + /// This prevents whitespace from being merged with the marker, maintaining correct token boundaries. #[inline] fn consume_textual(&mut self, context: MarkdownLexContext) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); @@ -1049,7 +1027,7 @@ impl<'src> MarkdownLexer<'src> { } } - // Consume at least one character + // Consume at least one character - ensures progress to avoid infinite loops let char = self.current_char_unchecked(); self.advance(char.len_utf8()); @@ -1244,6 +1222,7 @@ impl<'src> ReLexer<'src> for MarkdownLexer<'src> { MarkdownReLexContext::Regular => MarkdownLexContext::Regular, MarkdownReLexContext::LinkDefinition => MarkdownLexContext::LinkDefinition, MarkdownReLexContext::EmphasisInline => MarkdownLexContext::EmphasisInline, + MarkdownReLexContext::ThematicBreakParts => MarkdownLexContext::ThematicBreakParts, }; let re_lexed_kind = match self.current_byte() { diff --git a/crates/biome_markdown_parser/src/parser.rs b/crates/biome_markdown_parser/src/parser.rs index 111db4cc3226..040ba7efa812 100644 --- a/crates/biome_markdown_parser/src/parser.rs +++ b/crates/biome_markdown_parser/src/parser.rs @@ -238,6 +238,27 @@ impl<'source> MarkdownParser<'source> { self.source.bump_link_definition(); } + /// Force re-lex the current token in ThematicBreakParts context. + /// Decomposes MD_THEMATIC_BREAK_LITERAL into individual marker/space tokens. + /// Must NOT be called inside lookahead. + pub(crate) fn force_relex_thematic_break_parts(&mut self) { + self.source + .force_relex_in_context(MarkdownLexContext::ThematicBreakParts); + } + + /// Bump the current token and lex the next in ThematicBreakParts context, + /// ensuring sustained parts-mode tokenization across the loop. + /// + /// Unlike `source.bump_thematic_break_parts()` (which only advances the lexer), + /// this method also registers the token with the tree builder via `push_token`, + /// so the token appears in the CST. + pub(crate) fn bump_thematic_break_parts(&mut self) { + let kind = self.cur(); + let end = self.cur_range().end(); + self.context_mut().push_token(kind, end); + self.source.bump_thematic_break_parts(); + } + pub fn checkpoint(&self) -> MarkdownParserCheckpoint { MarkdownParserCheckpoint { context: self.context.checkpoint(), diff --git a/crates/biome_markdown_parser/src/syntax/list.rs b/crates/biome_markdown_parser/src/syntax/list.rs index 5ab668421f51..40da4b9f22c5 100644 --- a/crates/biome_markdown_parser/src/syntax/list.rs +++ b/crates/biome_markdown_parser/src/syntax/list.rs @@ -1672,10 +1672,8 @@ fn parse_first_line_blocks( is_thematic_break_pattern(p) }); - if is_thematic_break { - if parse_thematic_break_block(p).is_present() { - state.record_first_line_block(); - } + if is_thematic_break && parse_thematic_break_block(p).is_present() { + state.record_first_line_block(); return LoopAction::Continue; } diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs index d10dcf790c2f..978491f508b4 100644 --- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs +++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs @@ -91,12 +91,13 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool { return break_count >= THEMATIC_BREAK_MIN_CHARS && has_eol; } - // Get the break character from the first non-whitespace token - let break_char = if p.at(T![*]) { + // Get the break character from the first non-whitespace token. + // DOUBLE_STAR / DOUBLE_UNDERSCORE count as 2 of the underlying char. + let break_char = if p.at(T![*]) || p.at(T![**]) { '*' } else if p.at(T![-]) { '-' - } else if p.at(UNDERSCORE) { + } else if p.at(UNDERSCORE) || p.at(DOUBLE_UNDERSCORE) { '_' } else if p.at(MD_TEXTUAL_LITERAL) { let text = p.cur_text(); @@ -114,20 +115,24 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool { return false; }; - // Count matching characters + // Count matching characters. + // DOUBLE_STAR / DOUBLE_UNDERSCORE contribute 2 to the count. let mut count = 0usize; loop { - // Check for the break character - let is_break = match break_char { - '*' => p.at(T![*]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "*"), - '-' => p.at(T![-]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "-"), - '_' => p.at(UNDERSCORE) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "_"), - _ => false, + let (is_break, char_count) = match break_char { + '*' if p.at(T![**]) => (true, 2), + '*' if p.at(T![*]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "*") => (true, 1), + '-' if p.at(T![-]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "-") => (true, 1), + '_' if p.at(DOUBLE_UNDERSCORE) => (true, 2), + '_' if p.at(UNDERSCORE) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "_") => { + (true, 1) + } + _ => (false, 0), }; if is_break { - count += 1; + count += char_count; p.bump_any(); continue; } @@ -152,100 +157,93 @@ pub(crate) fn parse_thematic_break_block(p: &mut MarkdownParser) -> ParsedSyntax } let m = p.start(); + // skip_line_indent unchanged — deferred to Phase 5 p.skip_line_indent(MAX_BLOCK_PREFIX_INDENT); - // If the lexer produced MD_THEMATIC_BREAK_LITERAL, use it directly. - // Otherwise, parse the thematic break pattern from individual tokens and - // ensure we emit a literal token (required by the grammar). - if p.at(MD_THEMATIC_BREAK_LITERAL) { - p.expect(MD_THEMATIC_BREAK_LITERAL); - } else { - parse_thematic_break_tokens(p); - } + parse_thematic_break_parts(p); Present(m.complete(p, MD_THEMATIC_BREAK_BLOCK)) } -/// Parse a thematic break from individual tokens when the lexer didn't produce -/// MD_THEMATIC_BREAK_LITERAL (e.g., after a list marker was consumed). -fn parse_thematic_break_tokens(p: &mut MarkdownParser) { - // Skip leading whitespace - while p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { - p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); - } +// #region parse_thematic_break_parts - // If the entire thematic break is in a single textual token, remap it. - if p.at(MD_TEXTUAL_LITERAL) - && p.cur_text() - .chars() - .all(|c| c == ' ' || c == '\t' || c == '*' || c == '-' || c == '_') - { - let has_eol = p.lookahead(|p| { - p.bump(MD_TEXTUAL_LITERAL); - while p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { - p.bump(MD_TEXTUAL_LITERAL); - } - p.at(NEWLINE) || p.at(T![EOF]) - }); - if !has_eol { - return; - } - p.bump_remap(MD_THEMATIC_BREAK_LITERAL); - return; - } - - // Determine the break character for multi-token cases. - let break_char = if p.at(T![*]) { - Some('*') - } else if p.at(T![-]) { - Some('-') - } else if p.at(UNDERSCORE) { - Some('_') - } else if p.at(MD_TEXTUAL_LITERAL) { - let text = p.cur_text(); - match text.chars().next() { - Some('*') => Some('*'), - Some('-') => Some('-'), - Some('_') => Some('_'), - _ => None, - } +/// Parse thematic break content into a MdThematicBreakPartList. +/// +/// Handles both paths: +/// - Happy path: MD_THEMATIC_BREAK_LITERAL present -> re-lex into parts +/// - Fallback path: individual tokens already available (e.g., after list marker) +fn parse_thematic_break_parts(p: &mut MarkdownParser) { + let list_m = p.start(); + + // If lexer produced a single literal, decompose it via re-lex. + // Track this so all subsequent bumps use parts-mode context. + // Mutable: fallback MD_TEXTUAL_LITERAL tokens also trigger re-lex (see below). + let mut relex_active = if p.at(MD_THEMATIC_BREAK_LITERAL) { + p.force_relex_thematic_break_parts(); + true } else { - None + false }; - // Emit the required literal token by remapping the first break marker token. - if break_char.is_some() - && (p.at(T![*]) || p.at(T![-]) || p.at(UNDERSCORE) || p.at(MD_TEXTUAL_LITERAL)) - { - p.bump_remap(MD_THEMATIC_BREAK_LITERAL); - } - - // Parse all break characters and whitespace until end of line + // Shared emission loop for both paths. + // In relex_active mode: tokens are STAR/MINUS/UNDERSCORE/MD_INDENT_CHAR + // from the ThematicBreakParts context — use bump_thematic_break_parts(). + // In fallback mode: tokens may be individual punctuation (STAR etc.) or + // multi-char MD_TEXTUAL_LITERAL — the latter triggers re-lex on demand. loop { if p.at(NEWLINE) || p.at(T![EOF]) { break; } - // Check for the break character - let is_break = match break_char { - Some('*') => p.at(T![*]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "*"), - Some('-') => p.at(T![-]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "-"), - Some('_') => p.at(UNDERSCORE) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == "_"), - _ => false, - }; + // Break character (STAR/MINUS/UNDERSCORE) — from re-lex or regular context + if p.at(T![*]) || p.at(T![-]) || p.at(UNDERSCORE) { + let char_m = p.start(); + if relex_active { + p.bump_thematic_break_parts(); + } else { + p.bump_any(); + } + char_m.complete(p, MD_THEMATIC_BREAK_CHAR); + continue; + } - if is_break { - p.parse_as_skipped_trivia_tokens(|p| p.bump_any()); + // Whitespace (MD_INDENT_CHAR) — from re-lex or regular context + if p.at(MD_INDENT_CHAR) { + let char_m = p.start(); + if relex_active { + p.bump_thematic_break_parts(); + } else { + p.bump(MD_INDENT_CHAR); + } + char_m.complete(p, MD_INDENT_TOKEN); continue; } - // Skip whitespace between break characters - if p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { - p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + // Grouped tokens (DOUBLE_STAR, DOUBLE_UNDERSCORE) or multi-char + // MD_TEXTUAL_LITERAL — force re-lex to decompose into single-char tokens. + if p.at(T![**]) || p.at(DOUBLE_UNDERSCORE) { + p.force_relex_thematic_break_parts(); + relex_active = true; continue; } - // Other content - shouldn't happen if at_thematic_break_block returned true + if p.at(MD_TEXTUAL_LITERAL) { + let first_char = p.cur_text().as_bytes().first().copied(); + match first_char { + Some(b'*' | b'-' | b'_' | b' ' | b'\t') => { + p.force_relex_thematic_break_parts(); + relex_active = true; + continue; + } + _ => break, + } + } + + // Unexpected token — shouldn't happen if detection was correct break; } + + list_m.complete(p, MD_THEMATIC_BREAK_PART_LIST); } + +// #endregion diff --git a/crates/biome_markdown_parser/src/token_source.rs b/crates/biome_markdown_parser/src/token_source.rs index 71bc216ceae4..f8b816b6b7a9 100644 --- a/crates/biome_markdown_parser/src/token_source.rs +++ b/crates/biome_markdown_parser/src/token_source.rs @@ -170,6 +170,13 @@ impl<'source> MarkdownTokenSource<'source> { self.bump_with_context(MarkdownLexContext::LinkDefinition); } + /// Bump the current token using the ThematicBreakParts context. + /// In this context, break markers emit as individual STAR/MINUS/UNDERSCORE + /// and whitespace emits as MD_INDENT_CHAR. + pub fn bump_thematic_break_parts(&mut self) { + self.bump_with_context(MarkdownLexContext::ThematicBreakParts); + } + /// Creates a checkpoint to which it can later return using [Self::rewind]. pub fn checkpoint(&self) -> MarkdownTokenSourceCheckpoint { MarkdownTokenSourceCheckpoint { diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap index dc50b533627c..433b613aeca8 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap @@ -226,7 +226,17 @@ MdDocument { ], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@301..304 "---" [] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@301..302 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@302..303 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@303..304 "-" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@304..305 "\n" [] [], @@ -345,7 +355,17 @@ MdDocument { ], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@468..471 "---" [] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@468..469 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@469..470 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@470..471 "-" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@471..472 "\n" [] [], @@ -508,7 +528,13 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@300..301 "\n" [] [] 1: (empty) 11: MD_THEMATIC_BREAK_BLOCK@301..304 - 0: MD_THEMATIC_BREAK_LITERAL@301..304 "---" [] [] + 0: MD_THEMATIC_BREAK_PART_LIST@301..304 + 0: MD_THEMATIC_BREAK_CHAR@301..302 + 0: MINUS@301..302 "-" [] [] + 1: MD_THEMATIC_BREAK_CHAR@302..303 + 0: MINUS@302..303 "-" [] [] + 2: MD_THEMATIC_BREAK_CHAR@303..304 + 0: MINUS@303..304 "-" [] [] 12: MD_NEWLINE@304..305 0: NEWLINE@304..305 "\n" [] [] 13: MD_NEWLINE@305..306 @@ -586,7 +612,13 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@467..468 "\n" [] [] 1: (empty) 22: MD_THEMATIC_BREAK_BLOCK@468..471 - 0: MD_THEMATIC_BREAK_LITERAL@468..471 "---" [] [] + 0: MD_THEMATIC_BREAK_PART_LIST@468..471 + 0: MD_THEMATIC_BREAK_CHAR@468..469 + 0: MINUS@468..469 "-" [] [] + 1: MD_THEMATIC_BREAK_CHAR@469..470 + 0: MINUS@469..470 "-" [] [] + 2: MD_THEMATIC_BREAK_CHAR@470..471 + 0: MINUS@470..471 "-" [] [] 23: MD_NEWLINE@471..472 0: NEWLINE@471..472 "\n" [] [] 24: MD_NEWLINE@472..473 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap index ea465511a670..40d93eb48442 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap @@ -183,7 +183,17 @@ MdDocument { hard_line: missing (optional), }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@151..154 "***" [] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@151..152 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@152..153 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@153..154 "*" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@154..155 "\n" [] [], @@ -297,7 +307,13 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@150..151 "\n" [] [] 1: (empty) 15: MD_THEMATIC_BREAK_BLOCK@151..154 - 0: MD_THEMATIC_BREAK_LITERAL@151..154 "***" [] [] + 0: MD_THEMATIC_BREAK_PART_LIST@151..154 + 0: MD_THEMATIC_BREAK_CHAR@151..152 + 0: STAR@151..152 "*" [] [] + 1: MD_THEMATIC_BREAK_CHAR@152..153 + 0: STAR@152..153 "*" [] [] + 2: MD_THEMATIC_BREAK_CHAR@153..154 + 0: STAR@153..154 "*" [] [] 16: MD_NEWLINE@154..155 0: NEWLINE@154..155 "\n" [] [] 2: EOF@155..155 "" [] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap index f83e7aa6746c..75a86991bc12 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap @@ -24,25 +24,71 @@ MdDocument { bom_token: missing (optional), value: MdBlockList [ MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@0..4 "*" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdThematicBreakChar { + value: STAR@4..5 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@5..6 "*" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@6..7 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@7..11 "***" [Skipped(" ")] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@7..9 "*" [Skipped(" ")] [], + }, + MdThematicBreakChar { + value: STAR@9..10 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@10..11 "*" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@11..12 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@12..18 "- - -" [Skipped(" ")] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@12..14 "-" [Skipped(" ")] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@14..15 " " [] [], + }, + MdThematicBreakChar { + value: MINUS@15..16 "-" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@16..17 " " [] [], + }, + MdThematicBreakChar { + value: MINUS@17..18 "-" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@18..19 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@19..22 "___" [] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@19..20 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@20..21 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@21..22 "_" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@22..23 "\n" [] [], @@ -51,7 +97,23 @@ MdDocument { value_token: NEWLINE@23..24 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@24..30 "_ _ _" [Skipped(" ")] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@24..26 "_" [Skipped(" ")] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@26..27 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@27..28 "_" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@28..29 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@29..30 "_" [] [], + }, + ], }, MdNewline { value_token: NEWLINE@30..31 "\n" [] [], @@ -60,7 +122,23 @@ MdDocument { value_token: NEWLINE@31..32 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@32..37 "* * *" [] [], + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@32..33 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@33..34 " " [] [], + }, + MdThematicBreakChar { + value: STAR@34..35 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@35..36 " " [] [], + }, + MdThematicBreakChar { + value: STAR@36..37 "*" [] [], + }, + ], }, ], eof_token: EOF@37..37 "" [] [], @@ -74,31 +152,79 @@ MdDocument { 0: (empty) 1: MD_BLOCK_LIST@0..37 0: MD_THEMATIC_BREAK_BLOCK@0..6 - 0: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 0: MD_THEMATIC_BREAK_PART_LIST@0..6 + 0: MD_THEMATIC_BREAK_CHAR@0..4 + 0: STAR@0..4 "*" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_THEMATIC_BREAK_CHAR@4..5 + 0: STAR@4..5 "*" [] [] + 2: MD_THEMATIC_BREAK_CHAR@5..6 + 0: STAR@5..6 "*" [] [] 1: MD_NEWLINE@6..7 0: NEWLINE@6..7 "\n" [] [] 2: MD_THEMATIC_BREAK_BLOCK@7..11 - 0: MD_THEMATIC_BREAK_LITERAL@7..11 "***" [Skipped(" ")] [] + 0: MD_THEMATIC_BREAK_PART_LIST@7..11 + 0: MD_THEMATIC_BREAK_CHAR@7..9 + 0: STAR@7..9 "*" [Skipped(" ")] [] + 1: MD_THEMATIC_BREAK_CHAR@9..10 + 0: STAR@9..10 "*" [] [] + 2: MD_THEMATIC_BREAK_CHAR@10..11 + 0: STAR@10..11 "*" [] [] 3: MD_NEWLINE@11..12 0: NEWLINE@11..12 "\n" [] [] 4: MD_THEMATIC_BREAK_BLOCK@12..18 - 0: MD_THEMATIC_BREAK_LITERAL@12..18 "- - -" [Skipped(" ")] [] + 0: MD_THEMATIC_BREAK_PART_LIST@12..18 + 0: MD_THEMATIC_BREAK_CHAR@12..14 + 0: MINUS@12..14 "-" [Skipped(" ")] [] + 1: MD_INDENT_TOKEN@14..15 + 0: MD_INDENT_CHAR@14..15 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@15..16 + 0: MINUS@15..16 "-" [] [] + 3: MD_INDENT_TOKEN@16..17 + 0: MD_INDENT_CHAR@16..17 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@17..18 + 0: MINUS@17..18 "-" [] [] 5: MD_NEWLINE@18..19 0: NEWLINE@18..19 "\n" [] [] 6: MD_THEMATIC_BREAK_BLOCK@19..22 - 0: MD_THEMATIC_BREAK_LITERAL@19..22 "___" [] [] + 0: MD_THEMATIC_BREAK_PART_LIST@19..22 + 0: MD_THEMATIC_BREAK_CHAR@19..20 + 0: UNDERSCORE@19..20 "_" [] [] + 1: MD_THEMATIC_BREAK_CHAR@20..21 + 0: UNDERSCORE@20..21 "_" [] [] + 2: MD_THEMATIC_BREAK_CHAR@21..22 + 0: UNDERSCORE@21..22 "_" [] [] 7: MD_NEWLINE@22..23 0: NEWLINE@22..23 "\n" [] [] 8: MD_NEWLINE@23..24 0: NEWLINE@23..24 "\n" [] [] 9: MD_THEMATIC_BREAK_BLOCK@24..30 - 0: MD_THEMATIC_BREAK_LITERAL@24..30 "_ _ _" [Skipped(" ")] [] + 0: MD_THEMATIC_BREAK_PART_LIST@24..30 + 0: MD_THEMATIC_BREAK_CHAR@24..26 + 0: UNDERSCORE@24..26 "_" [Skipped(" ")] [] + 1: MD_INDENT_TOKEN@26..27 + 0: MD_INDENT_CHAR@26..27 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@27..28 + 0: UNDERSCORE@27..28 "_" [] [] + 3: MD_INDENT_TOKEN@28..29 + 0: MD_INDENT_CHAR@28..29 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@29..30 + 0: UNDERSCORE@29..30 "_" [] [] 10: MD_NEWLINE@30..31 0: NEWLINE@30..31 "\n" [] [] 11: MD_NEWLINE@31..32 0: NEWLINE@31..32 "\n" [] [] 12: MD_THEMATIC_BREAK_BLOCK@32..37 - 0: MD_THEMATIC_BREAK_LITERAL@32..37 "* * *" [] [] + 0: MD_THEMATIC_BREAK_PART_LIST@32..37 + 0: MD_THEMATIC_BREAK_CHAR@32..33 + 0: STAR@32..33 "*" [] [] + 1: MD_INDENT_TOKEN@33..34 + 0: MD_INDENT_CHAR@33..34 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@34..35 + 0: STAR@34..35 "*" [] [] + 3: MD_INDENT_TOKEN@35..36 + 0: MD_INDENT_CHAR@35..36 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@36..37 + 0: STAR@36..37 "*" [] [] 2: EOF@37..37 "" [] [] ``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md new file mode 100644 index 000000000000..5129e35084a4 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md @@ -0,0 +1,15 @@ +- * * * + +- --- + +- ___ + +* --- + +- ** * + +- __ _ + +- ** ** + +- __ __ diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap new file mode 100644 index 000000000000..b0210e1f09aa --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap @@ -0,0 +1,471 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- + +## Input + +``` +- * * * + +- --- + +- ___ + +* --- + +- ** * + +- __ _ + +- ** ** + +- __ __ + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@0..1 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@1..2 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@2..3 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@3..4 " " [] [], + }, + MdThematicBreakChar { + value: STAR@4..5 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@5..6 " " [] [], + }, + MdThematicBreakChar { + value: STAR@6..7 "*" [] [], + }, + ], + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@7..8 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@8..9 "\n" [] [], + }, + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@9..10 "-" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@10..11 " " [] [], + }, + MdThematicBreakChar { + value: MINUS@11..12 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@12..13 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@13..14 "-" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@14..15 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@15..16 "\n" [] [], + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@16..17 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@17..18 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@18..19 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@19..20 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@20..21 "_" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@21..22 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@22..23 "\n" [] [], + }, + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: STAR@23..24 "*" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@24..25 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@25..26 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@26..27 "-" [] [], + }, + MdThematicBreakChar { + value: MINUS@27..28 "-" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@28..29 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@29..30 "\n" [] [], + }, + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@30..31 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@31..32 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@32..33 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@33..34 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@34..35 " " [] [], + }, + MdThematicBreakChar { + value: STAR@35..36 "*" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@36..37 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@37..38 "\n" [] [], + }, + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@38..39 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@39..40 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@40..41 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@41..42 "_" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@42..43 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@43..44 "_" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@44..45 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@45..46 "\n" [] [], + }, + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@46..47 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@47..48 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@48..49 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@49..50 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@50..51 " " [] [], + }, + MdThematicBreakChar { + value: STAR@51..52 "*" [] [], + }, + MdThematicBreakChar { + value: STAR@52..53 "*" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@53..54 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@54..55 "\n" [] [], + }, + MdBullet { + prefix: MdListMarkerPrefix { + pre_marker_indent: MdIndentTokenList [], + marker: MINUS@55..56 "-" [] [], + post_marker_space_token: MD_LIST_POST_MARKER_SPACE@56..57 " " [] [], + content_indent: MdIndentTokenList [], + }, + content: MdBlockList [ + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@57..58 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@58..59 "_" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@59..60 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@60..61 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@61..62 "_" [] [], + }, + ], + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@62..63 "\n" [] [], + }, + ], + eof_token: EOF@63..63 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..63 + 0: (empty) + 1: MD_BLOCK_LIST@0..63 + 0: MD_BULLET_LIST_ITEM@0..7 + 0: MD_BULLET_LIST@0..7 + 0: MD_BULLET@0..7 + 0: MD_LIST_MARKER_PREFIX@0..2 + 0: MD_INDENT_TOKEN_LIST@0..0 + 1: MINUS@0..1 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@1..2 " " [] [] + 3: MD_INDENT_TOKEN_LIST@2..2 + 1: MD_BLOCK_LIST@2..7 + 0: MD_THEMATIC_BREAK_BLOCK@2..7 + 0: MD_THEMATIC_BREAK_PART_LIST@2..7 + 0: MD_THEMATIC_BREAK_CHAR@2..3 + 0: STAR@2..3 "*" [] [] + 1: MD_INDENT_TOKEN@3..4 + 0: MD_INDENT_CHAR@3..4 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@4..5 + 0: STAR@4..5 "*" [] [] + 3: MD_INDENT_TOKEN@5..6 + 0: MD_INDENT_CHAR@5..6 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@6..7 + 0: STAR@6..7 "*" [] [] + 1: MD_NEWLINE@7..8 + 0: NEWLINE@7..8 "\n" [] [] + 2: MD_NEWLINE@8..9 + 0: NEWLINE@8..9 "\n" [] [] + 3: MD_THEMATIC_BREAK_BLOCK@9..14 + 0: MD_THEMATIC_BREAK_PART_LIST@9..14 + 0: MD_THEMATIC_BREAK_CHAR@9..10 + 0: MINUS@9..10 "-" [] [] + 1: MD_INDENT_TOKEN@10..11 + 0: MD_INDENT_CHAR@10..11 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@11..12 + 0: MINUS@11..12 "-" [] [] + 3: MD_THEMATIC_BREAK_CHAR@12..13 + 0: MINUS@12..13 "-" [] [] + 4: MD_THEMATIC_BREAK_CHAR@13..14 + 0: MINUS@13..14 "-" [] [] + 4: MD_NEWLINE@14..15 + 0: NEWLINE@14..15 "\n" [] [] + 5: MD_NEWLINE@15..16 + 0: NEWLINE@15..16 "\n" [] [] + 6: MD_BULLET_LIST_ITEM@16..62 + 0: MD_BULLET_LIST@16..62 + 0: MD_BULLET@16..22 + 0: MD_LIST_MARKER_PREFIX@16..18 + 0: MD_INDENT_TOKEN_LIST@16..16 + 1: MINUS@16..17 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@17..18 " " [] [] + 3: MD_INDENT_TOKEN_LIST@18..18 + 1: MD_BLOCK_LIST@18..22 + 0: MD_THEMATIC_BREAK_BLOCK@18..21 + 0: MD_THEMATIC_BREAK_PART_LIST@18..21 + 0: MD_THEMATIC_BREAK_CHAR@18..19 + 0: UNDERSCORE@18..19 "_" [] [] + 1: MD_THEMATIC_BREAK_CHAR@19..20 + 0: UNDERSCORE@19..20 "_" [] [] + 2: MD_THEMATIC_BREAK_CHAR@20..21 + 0: UNDERSCORE@20..21 "_" [] [] + 1: MD_NEWLINE@21..22 + 0: NEWLINE@21..22 "\n" [] [] + 1: MD_NEWLINE@22..23 + 0: NEWLINE@22..23 "\n" [] [] + 2: MD_BULLET@23..29 + 0: MD_LIST_MARKER_PREFIX@23..25 + 0: MD_INDENT_TOKEN_LIST@23..23 + 1: STAR@23..24 "*" [] [] + 2: MD_LIST_POST_MARKER_SPACE@24..25 " " [] [] + 3: MD_INDENT_TOKEN_LIST@25..25 + 1: MD_BLOCK_LIST@25..29 + 0: MD_THEMATIC_BREAK_BLOCK@25..28 + 0: MD_THEMATIC_BREAK_PART_LIST@25..28 + 0: MD_THEMATIC_BREAK_CHAR@25..26 + 0: MINUS@25..26 "-" [] [] + 1: MD_THEMATIC_BREAK_CHAR@26..27 + 0: MINUS@26..27 "-" [] [] + 2: MD_THEMATIC_BREAK_CHAR@27..28 + 0: MINUS@27..28 "-" [] [] + 1: MD_NEWLINE@28..29 + 0: NEWLINE@28..29 "\n" [] [] + 3: MD_NEWLINE@29..30 + 0: NEWLINE@29..30 "\n" [] [] + 4: MD_BULLET@30..37 + 0: MD_LIST_MARKER_PREFIX@30..32 + 0: MD_INDENT_TOKEN_LIST@30..30 + 1: MINUS@30..31 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@31..32 " " [] [] + 3: MD_INDENT_TOKEN_LIST@32..32 + 1: MD_BLOCK_LIST@32..37 + 0: MD_THEMATIC_BREAK_BLOCK@32..36 + 0: MD_THEMATIC_BREAK_PART_LIST@32..36 + 0: MD_THEMATIC_BREAK_CHAR@32..33 + 0: STAR@32..33 "*" [] [] + 1: MD_THEMATIC_BREAK_CHAR@33..34 + 0: STAR@33..34 "*" [] [] + 2: MD_INDENT_TOKEN@34..35 + 0: MD_INDENT_CHAR@34..35 " " [] [] + 3: MD_THEMATIC_BREAK_CHAR@35..36 + 0: STAR@35..36 "*" [] [] + 1: MD_NEWLINE@36..37 + 0: NEWLINE@36..37 "\n" [] [] + 5: MD_NEWLINE@37..38 + 0: NEWLINE@37..38 "\n" [] [] + 6: MD_BULLET@38..45 + 0: MD_LIST_MARKER_PREFIX@38..40 + 0: MD_INDENT_TOKEN_LIST@38..38 + 1: MINUS@38..39 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@39..40 " " [] [] + 3: MD_INDENT_TOKEN_LIST@40..40 + 1: MD_BLOCK_LIST@40..45 + 0: MD_THEMATIC_BREAK_BLOCK@40..44 + 0: MD_THEMATIC_BREAK_PART_LIST@40..44 + 0: MD_THEMATIC_BREAK_CHAR@40..41 + 0: UNDERSCORE@40..41 "_" [] [] + 1: MD_THEMATIC_BREAK_CHAR@41..42 + 0: UNDERSCORE@41..42 "_" [] [] + 2: MD_INDENT_TOKEN@42..43 + 0: MD_INDENT_CHAR@42..43 " " [] [] + 3: MD_THEMATIC_BREAK_CHAR@43..44 + 0: UNDERSCORE@43..44 "_" [] [] + 1: MD_NEWLINE@44..45 + 0: NEWLINE@44..45 "\n" [] [] + 7: MD_NEWLINE@45..46 + 0: NEWLINE@45..46 "\n" [] [] + 8: MD_BULLET@46..54 + 0: MD_LIST_MARKER_PREFIX@46..48 + 0: MD_INDENT_TOKEN_LIST@46..46 + 1: MINUS@46..47 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@47..48 " " [] [] + 3: MD_INDENT_TOKEN_LIST@48..48 + 1: MD_BLOCK_LIST@48..54 + 0: MD_THEMATIC_BREAK_BLOCK@48..53 + 0: MD_THEMATIC_BREAK_PART_LIST@48..53 + 0: MD_THEMATIC_BREAK_CHAR@48..49 + 0: STAR@48..49 "*" [] [] + 1: MD_THEMATIC_BREAK_CHAR@49..50 + 0: STAR@49..50 "*" [] [] + 2: MD_INDENT_TOKEN@50..51 + 0: MD_INDENT_CHAR@50..51 " " [] [] + 3: MD_THEMATIC_BREAK_CHAR@51..52 + 0: STAR@51..52 "*" [] [] + 4: MD_THEMATIC_BREAK_CHAR@52..53 + 0: STAR@52..53 "*" [] [] + 1: MD_NEWLINE@53..54 + 0: NEWLINE@53..54 "\n" [] [] + 9: MD_NEWLINE@54..55 + 0: NEWLINE@54..55 "\n" [] [] + 10: MD_BULLET@55..62 + 0: MD_LIST_MARKER_PREFIX@55..57 + 0: MD_INDENT_TOKEN_LIST@55..55 + 1: MINUS@55..56 "-" [] [] + 2: MD_LIST_POST_MARKER_SPACE@56..57 " " [] [] + 3: MD_INDENT_TOKEN_LIST@57..57 + 1: MD_BLOCK_LIST@57..62 + 0: MD_THEMATIC_BREAK_BLOCK@57..62 + 0: MD_THEMATIC_BREAK_PART_LIST@57..62 + 0: MD_THEMATIC_BREAK_CHAR@57..58 + 0: UNDERSCORE@57..58 "_" [] [] + 1: MD_THEMATIC_BREAK_CHAR@58..59 + 0: UNDERSCORE@58..59 "_" [] [] + 2: MD_INDENT_TOKEN@59..60 + 0: MD_INDENT_CHAR@59..60 " " [] [] + 3: MD_THEMATIC_BREAK_CHAR@60..61 + 0: UNDERSCORE@60..61 "_" [] [] + 4: MD_THEMATIC_BREAK_CHAR@61..62 + 0: UNDERSCORE@61..62 "_" [] [] + 7: MD_NEWLINE@62..63 + 0: NEWLINE@62..63 "\n" [] [] + 2: EOF@63..63 "" [] [] + +``` diff --git a/crates/biome_markdown_syntax/src/generated/kind.rs b/crates/biome_markdown_syntax/src/generated/kind.rs index ef6f90f8cf50..3688c4a78f62 100644 --- a/crates/biome_markdown_syntax/src/generated/kind.rs +++ b/crates/biome_markdown_syntax/src/generated/kind.rs @@ -108,6 +108,8 @@ pub enum MarkdownSyntaxKind { MD_INDENT_TOKEN, MD_INDENT_TOKEN_LIST, MD_LIST_MARKER_PREFIX, + MD_THEMATIC_BREAK_CHAR, + MD_THEMATIC_BREAK_PART_LIST, #[doc(hidden)] __LAST, } @@ -171,6 +173,7 @@ impl MarkdownSyntaxKind { | MD_INLINE_ITEM_LIST | MD_INDENTED_CODE_LINE_LIST | MD_INDENT_TOKEN_LIST + | MD_THEMATIC_BREAK_PART_LIST ) } pub fn from_keyword(ident: &str) -> Option { diff --git a/crates/biome_markdown_syntax/src/generated/macros.rs b/crates/biome_markdown_syntax/src/generated/macros.rs index 11444cd9b201..93eed417c4c5 100644 --- a/crates/biome_markdown_syntax/src/generated/macros.rs +++ b/crates/biome_markdown_syntax/src/generated/macros.rs @@ -169,6 +169,10 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdThematicBreakBlock::new_unchecked(node) }; $body } + $crate::MarkdownSyntaxKind::MD_THEMATIC_BREAK_CHAR => { + let $pattern = unsafe { $crate::MdThematicBreakChar::new_unchecked(node) }; + $body + } $crate::MarkdownSyntaxKind::MD_BOGUS => { let $pattern = unsafe { $crate::MdBogus::new_unchecked(node) }; $body @@ -201,6 +205,10 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdQuoteIndentList::new_unchecked(node) }; $body } + $crate::MarkdownSyntaxKind::MD_THEMATIC_BREAK_PART_LIST => { + let $pattern = unsafe { $crate::MdThematicBreakPartList::new_unchecked(node) }; + $body + } _ => unreachable!(), }, } diff --git a/crates/biome_markdown_syntax/src/generated/nodes.rs b/crates/biome_markdown_syntax/src/generated/nodes.rs index f81d76aedadc..6266b6543ce6 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes.rs @@ -1596,11 +1596,11 @@ impl MdThematicBreakBlock { } pub fn as_fields(&self) -> MdThematicBreakBlockFields { MdThematicBreakBlockFields { - value_token: self.value_token(), + parts: self.parts(), } } - pub fn value_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) + pub fn parts(&self) -> MdThematicBreakPartList { + support::list(&self.syntax, 0usize) } } impl Serialize for MdThematicBreakBlock { @@ -1613,7 +1613,42 @@ impl Serialize for MdThematicBreakBlock { } #[derive(Serialize)] pub struct MdThematicBreakBlockFields { - pub value_token: SyntaxResult, + pub parts: MdThematicBreakPartList, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdThematicBreakChar { + pub(crate) syntax: SyntaxNode, +} +impl MdThematicBreakChar { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdThematicBreakCharFields { + MdThematicBreakCharFields { + value: self.value(), + } + } + pub fn value(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } +} +impl Serialize for MdThematicBreakChar { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdThematicBreakCharFields { + pub value: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash, Serialize)] pub enum AnyMdBlock { @@ -1890,6 +1925,25 @@ impl AnyMdLeafBlock { } } } +#[derive(Clone, PartialEq, Eq, Hash, Serialize)] +pub enum AnyMdThematicBreakPart { + MdIndentToken(MdIndentToken), + MdThematicBreakChar(MdThematicBreakChar), +} +impl AnyMdThematicBreakPart { + pub fn as_md_indent_token(&self) -> Option<&MdIndentToken> { + match &self { + Self::MdIndentToken(item) => Some(item), + _ => None, + } + } + pub fn as_md_thematic_break_char(&self) -> Option<&MdThematicBreakChar> { + match &self { + Self::MdThematicBreakChar(item) => Some(item), + _ => None, + } + } +} impl AstNode for MdAutolink { type Language = Language; const KIND_SET: SyntaxKindSet = @@ -3818,10 +3872,7 @@ impl std::fmt::Debug for MdThematicBreakBlock { let result = if current_depth < 16 { DEPTH.set(current_depth + 1); f.debug_struct("MdThematicBreakBlock") - .field( - "value_token", - &support::DebugSyntaxResult(self.value_token()), - ) + .field("parts", &self.parts()) .finish() } else { f.debug_struct("MdThematicBreakBlock").finish() @@ -3840,6 +3891,53 @@ impl From for SyntaxElement { n.syntax.into() } } +impl AstNode for MdThematicBreakChar { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_THEMATIC_BREAK_CHAR as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_THEMATIC_BREAK_CHAR + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdThematicBreakChar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdThematicBreakChar") + .field("value", &support::DebugSyntaxResult(self.value())) + .finish() + } else { + f.debug_struct("MdThematicBreakChar").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdThematicBreakChar) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdThematicBreakChar) -> Self { + n.syntax.into() + } +} impl From for AnyMdBlock { fn from(node: MdQuotePrefix) -> Self { Self::MdQuotePrefix(node) @@ -4486,6 +4584,66 @@ impl From for SyntaxElement { node.into() } } +impl From for AnyMdThematicBreakPart { + fn from(node: MdIndentToken) -> Self { + Self::MdIndentToken(node) + } +} +impl From for AnyMdThematicBreakPart { + fn from(node: MdThematicBreakChar) -> Self { + Self::MdThematicBreakChar(node) + } +} +impl AstNode for AnyMdThematicBreakPart { + type Language = Language; + const KIND_SET: SyntaxKindSet = + MdIndentToken::KIND_SET.union(MdThematicBreakChar::KIND_SET); + fn can_cast(kind: SyntaxKind) -> bool { + matches!(kind, MD_INDENT_TOKEN | MD_THEMATIC_BREAK_CHAR) + } + fn cast(syntax: SyntaxNode) -> Option { + let res = match syntax.kind() { + MD_INDENT_TOKEN => Self::MdIndentToken(MdIndentToken { syntax }), + MD_THEMATIC_BREAK_CHAR => Self::MdThematicBreakChar(MdThematicBreakChar { syntax }), + _ => return None, + }; + Some(res) + } + fn syntax(&self) -> &SyntaxNode { + match self { + Self::MdIndentToken(it) => it.syntax(), + Self::MdThematicBreakChar(it) => it.syntax(), + } + } + fn into_syntax(self) -> SyntaxNode { + match self { + Self::MdIndentToken(it) => it.into_syntax(), + Self::MdThematicBreakChar(it) => it.into_syntax(), + } + } +} +impl std::fmt::Debug for AnyMdThematicBreakPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MdIndentToken(it) => std::fmt::Debug::fmt(it, f), + Self::MdThematicBreakChar(it) => std::fmt::Debug::fmt(it, f), + } + } +} +impl From for SyntaxNode { + fn from(n: AnyMdThematicBreakPart) -> Self { + match n { + AnyMdThematicBreakPart::MdIndentToken(it) => it.into_syntax(), + AnyMdThematicBreakPart::MdThematicBreakChar(it) => it.into_syntax(), + } + } +} +impl From for SyntaxElement { + fn from(n: AnyMdThematicBreakPart) -> Self { + let node: SyntaxNode = n.into(); + node.into() + } +} impl std::fmt::Display for AnyMdBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -4516,6 +4674,11 @@ impl std::fmt::Display for AnyMdLeafBlock { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for AnyMdThematicBreakPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MdAutolink { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -4706,6 +4869,11 @@ impl std::fmt::Display for MdThematicBreakBlock { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MdThematicBreakChar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} #[derive(Clone, PartialEq, Eq, Hash, Serialize)] pub struct MdBogus { syntax: SyntaxNode, @@ -5337,6 +5505,88 @@ impl IntoIterator for MdQuoteIndentList { self.iter() } } +#[derive(Clone, Eq, PartialEq, Hash)] +pub struct MdThematicBreakPartList { + syntax_list: SyntaxList, +} +impl MdThematicBreakPartList { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { + syntax_list: syntax.into_list(), + } + } +} +impl AstNode for MdThematicBreakPartList { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_THEMATIC_BREAK_PART_LIST as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_THEMATIC_BREAK_PART_LIST + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { + syntax_list: syntax.into_list(), + }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + self.syntax_list.node() + } + fn into_syntax(self) -> SyntaxNode { + self.syntax_list.into_node() + } +} +impl Serialize for MdThematicBreakPartList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for e in self.iter() { + seq.serialize_element(&e)?; + } + seq.end() + } +} +impl AstNodeList for MdThematicBreakPartList { + type Language = Language; + type Node = AnyMdThematicBreakPart; + fn syntax_list(&self) -> &SyntaxList { + &self.syntax_list + } + fn into_syntax_list(self) -> SyntaxList { + self.syntax_list + } +} +impl Debug for MdThematicBreakPartList { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str("MdThematicBreakPartList ")?; + f.debug_list().entries(self.iter()).finish() + } +} +impl IntoIterator for &MdThematicBreakPartList { + type Item = AnyMdThematicBreakPart; + type IntoIter = AstNodeListIterator; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} +impl IntoIterator for MdThematicBreakPartList { + type Item = AnyMdThematicBreakPart; + type IntoIter = AstNodeListIterator; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} #[derive(Clone)] pub struct DebugSyntaxElementChildren(pub SyntaxElementChildren); impl Debug for DebugSyntaxElementChildren { diff --git a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs index 50ed4ea7701f..134acf961fb6 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs @@ -618,6 +618,14 @@ impl MdTextual { } } impl MdThematicBreakBlock { + pub fn with_parts(self, element: MdThematicBreakPartList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdThematicBreakChar { pub fn with_value_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax diff --git a/xtask/codegen/markdown.ungram b/xtask/codegen/markdown.ungram index 026d1acecd80..0aa0b3d7a346 100644 --- a/xtask/codegen/markdown.ungram +++ b/xtask/codegen/markdown.ungram @@ -183,6 +183,15 @@ MdQuoteIndentList = MdQuoteIndent* MdIndentToken = 'md_indent_char' MdIndentTokenList = MdIndentToken* +// Individual thematic break character (*, -, or _). +MdThematicBreakChar = value: ('*' | '-' | '_') + +// A thematic break component: either a break character or inter-marker whitespace. +AnyMdThematicBreakPart = MdThematicBreakChar | MdIndentToken + +// List of thematic break parts. +MdThematicBreakPartList = AnyMdThematicBreakPart* + MdBulletListItem = MdBulletList MdOrderedListItem = MdBulletList @@ -330,7 +339,7 @@ MdEntityReference = value: 'md_entity_literal' // --- // ___ // https://spec.commonmark.org/0.31.2/#container-blocks-and-leaf-blocks -MdThematicBreakBlock = value: 'md_thematic_break_literal' +MdThematicBreakBlock = parts: MdThematicBreakPartList // Explicit newline node for inter-block newlines. // This preserves NEWLINEs in the CST without creating "newline-only paragraphs". diff --git a/xtask/codegen/src/markdown_kinds_src.rs b/xtask/codegen/src/markdown_kinds_src.rs index d3be4b4fdb0c..929a91198cdd 100644 --- a/xtask/codegen/src/markdown_kinds_src.rs +++ b/xtask/codegen/src/markdown_kinds_src.rs @@ -101,5 +101,7 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_INDENT_TOKEN", "MD_INDENT_TOKEN_LIST", "MD_LIST_MARKER_PREFIX", + "MD_THEMATIC_BREAK_CHAR", + "MD_THEMATIC_BREAK_PART_LIST", ], };