From 570daa82bcfc482758702326478d60f1f0fd1080 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Thu, 2 Apr 2026 17:46:04 -0400 Subject: [PATCH 1/7] fix(markdown_parser): recognize setext heading inside blockquote After consuming a blockquote prefix (`> `), the lexer's `after_newline` flag is false, so `---` is lexed as MINUS tokens instead of MD_THEMATIC_BREAK_LITERAL. This prevented setext heading detection inside blockquotes. Add `force_relex_at_line_start` to the buffered lexer which re-lexes the current token with `after_line_break = true`. Use it in `classify_quote_break_after_newline` (lookahead) and `break_for_quote_prefix_after_inline_newline` (parse path) so the lexer produces the correct block-level tokens after a quote prefix. --- .../biome_markdown_parser/src/lexer/tests.rs | 33 ++- crates/biome_markdown_parser/src/parser.rs | 9 + .../biome_markdown_parser/src/syntax/mod.rs | 8 +- .../biome_markdown_parser/src/token_source.rs | 8 + .../ok/setext_heading_edge_cases.md.snap | 37 +-- .../ok/setext_heading_in_blockquote.md | 8 + .../ok/setext_heading_in_blockquote.md.snap | 215 ++++++++++++++++++ .../biome_markdown_parser/tests/spec_test.rs | 6 + crates/biome_parser/src/lexer.rs | 33 +++ 9 files changed, 328 insertions(+), 29 deletions(-) create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 328107b31ddb..b59fb364016e 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -4,7 +4,7 @@ use super::{MarkdownLexer, TextSize}; use crate::lexer::MarkdownLexContext; use biome_markdown_syntax::MarkdownSyntaxKind::*; -use biome_parser::lexer::Lexer; +use biome_parser::lexer::{BufferedLexer, Lexer}; use quickcheck_macros::quickcheck; use std::sync::mpsc::channel; use std::thread; @@ -574,3 +574,34 @@ fn block_quote_simple() { NEWLINE:1, } } + +#[test] +fn force_relex_at_line_start_produces_thematic_break() { + // After consuming a blockquote prefix (`> `), `---` is normally lexed as + // MINUS tokens because after_newline is false. force_relex_at_line_start + // should make the lexer treat the position as a line start, producing + // MD_THEMATIC_BREAK_LITERAL instead. + let source = "> ---\n"; + let lexer = MarkdownLexer::from_str(source); + let mut buffered = BufferedLexer::new(lexer); + + // Lex first token: `>` (R_ANGLE) + buffered.next_token(MarkdownLexContext::Regular); + assert_eq!(buffered.current(), R_ANGLE); + + // Lex second token: ` ` (whitespace as MD_TEXTUAL_LITERAL) + buffered.next_token(MarkdownLexContext::Regular); + assert_eq!(buffered.current(), MD_TEXTUAL_LITERAL); + + // Lex third token: without re-lex, `---` becomes MINUS + buffered.next_token(MarkdownLexContext::Regular); + assert_eq!(buffered.current(), MINUS, "without re-lex, should be MINUS"); + + // Now re-lex at line start — should produce MD_THEMATIC_BREAK_LITERAL + let kind = buffered.force_relex_at_line_start(MarkdownLexContext::Regular); + assert_eq!( + kind, MD_THEMATIC_BREAK_LITERAL, + "after force_relex_at_line_start, `---` should be MD_THEMATIC_BREAK_LITERAL" + ); + assert_eq!(buffered.current(), MD_THEMATIC_BREAK_LITERAL); +} diff --git a/crates/biome_markdown_parser/src/parser.rs b/crates/biome_markdown_parser/src/parser.rs index 81ea93531c07..7f4348e6372b 100644 --- a/crates/biome_markdown_parser/src/parser.rs +++ b/crates/biome_markdown_parser/src/parser.rs @@ -217,6 +217,15 @@ impl<'source> MarkdownParser<'source> { .force_relex_in_context(MarkdownLexContext::Regular); } + /// Re-lex the current token in Regular context, treating the position as + /// a line start. After consuming a blockquote prefix, the lexer's + /// `after_newline` flag is false, which prevents it from producing + /// line-start-gated tokens like `MD_THEMATIC_BREAK_LITERAL`. This method + /// overrides that flag so the lexer behaves as if at line start. + pub(crate) fn force_relex_at_line_start(&mut self) { + self.source.force_relex_at_line_start(); + } + /// Force re-lex the current token in CodeSpan context. /// In this context, backslash is literal (not an escape character). /// Used for autolinks where `\>` should be `\` + `>` as separate tokens. diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs index 13008ad7b642..80ed9e4f53a4 100644 --- a/crates/biome_markdown_parser/src/syntax/mod.rs +++ b/crates/biome_markdown_parser/src/syntax/mod.rs @@ -788,6 +788,9 @@ fn classify_quote_break_after_newline( p.lookahead(|p| { consume_quote_prefix_without_virtual(p, quote_depth); with_virtual_line_start(p, p.cur_range().start(), |p| { + // Re-lex at line start so the lexer produces block-level tokens + // (e.g. MD_THEMATIC_BREAK_LITERAL for `---`) instead of MINUS. + p.force_relex_at_line_start(); if p.at(MD_SETEXT_UNDERLINE_LITERAL) || (p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p)) { @@ -865,9 +868,10 @@ fn break_for_quote_prefix_after_inline_newline(p: &mut MarkdownParser, quote_dep if has_quote_prefix(p, quote_depth) { let break_kind = classify_quote_break_after_newline(p, quote_depth); if matches!(break_kind, QuoteBreakKind::SetextUnderline) { - // Consume the quote prefix so the setext underline is visible - // to the paragraph parser. + // Consume the quote prefix and re-lex at line start so the + // paragraph parser sees MD_THEMATIC_BREAK_LITERAL for `---`. consume_quote_prefix(p, quote_depth); + p.force_relex_at_line_start(); } match break_kind { QuoteBreakKind::SetextUnderline | QuoteBreakKind::Other => return true, diff --git a/crates/biome_markdown_parser/src/token_source.rs b/crates/biome_markdown_parser/src/token_source.rs index 8227064cbae2..1b6cc00ed8c0 100644 --- a/crates/biome_markdown_parser/src/token_source.rs +++ b/crates/biome_markdown_parser/src/token_source.rs @@ -160,6 +160,14 @@ impl<'source> MarkdownTokenSource<'source> { self.lexer.force_relex_in_context(context) } + /// Re-lex the current token in Regular context, treating the position as + /// a line start. This makes the lexer produce line-start-gated tokens + /// like `MD_THEMATIC_BREAK_LITERAL`. + pub fn force_relex_at_line_start(&mut self) -> MarkdownSyntaxKind { + self.lexer + .force_relex_at_line_start(MarkdownLexContext::Regular) + } + pub fn set_force_ordered_list_marker(&mut self, value: bool) { self.lexer.lexer_mut().set_force_ordered_list_marker(value); } diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap index f7d2b149c1bc..93fd16e470c5 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap @@ -98,8 +98,8 @@ MdDocument { post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@36..37 " " [] [], }, content: MdBlockList [ - MdParagraph { - list: MdInlineItemList [ + MdSetextHeader { + content: MdInlineItemList [ MdTextual { value_token: MD_TEXTUAL_LITERAL@37..40 "Foo" [] [], }, @@ -111,20 +111,11 @@ MdDocument { marker_token: R_ANGLE@41..42 ">" [] [], post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@42..43 " " [] [], }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@43..44 "-" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@44..45 "-" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@45..46 "-" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@46..47 "\n" [] [], - }, ], - hard_line: missing (optional), + underline_token: MD_SETEXT_UNDERLINE_LITERAL@43..46 "---" [] [], + }, + MdNewline { + value_token: NEWLINE@46..47 "\n" [] [], }, ], }, @@ -242,8 +233,8 @@ MdDocument { 1: R_ANGLE@35..36 ">" [] [] 2: MD_QUOTE_POST_MARKER_SPACE@36..37 " " [] [] 1: MD_BLOCK_LIST@37..47 - 0: MD_PARAGRAPH@37..47 - 0: MD_INLINE_ITEM_LIST@37..47 + 0: MD_SETEXT_HEADER@37..46 + 0: MD_INLINE_ITEM_LIST@37..43 0: MD_TEXTUAL@37..40 0: MD_TEXTUAL_LITERAL@37..40 "Foo" [] [] 1: MD_TEXTUAL@40..41 @@ -252,15 +243,9 @@ MdDocument { 0: MD_QUOTE_INDENT_LIST@41..41 1: R_ANGLE@41..42 ">" [] [] 2: MD_QUOTE_POST_MARKER_SPACE@42..43 " " [] [] - 3: MD_TEXTUAL@43..44 - 0: MD_TEXTUAL_LITERAL@43..44 "-" [] [] - 4: MD_TEXTUAL@44..45 - 0: MD_TEXTUAL_LITERAL@44..45 "-" [] [] - 5: MD_TEXTUAL@45..46 - 0: MD_TEXTUAL_LITERAL@45..46 "-" [] [] - 6: MD_TEXTUAL@46..47 - 0: MD_TEXTUAL_LITERAL@46..47 "\n" [] [] - 1: (empty) + 1: MD_SETEXT_UNDERLINE_LITERAL@43..46 "---" [] [] + 1: MD_NEWLINE@46..47 + 0: NEWLINE@46..47 "\n" [] [] 10: MD_NEWLINE@47..48 0: NEWLINE@47..48 "\n" [] [] 11: MD_BULLET_LIST_ITEM@48..66 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md new file mode 100644 index 000000000000..ddbeeddc4b1b --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md @@ -0,0 +1,8 @@ +> Foo +> --- + +> Bar +> === + +> > Nested +> > --- diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap new file mode 100644 index 000000000000..970e238098ba --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap @@ -0,0 +1,215 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- + +## Input + +``` +> Foo +> --- + +> Bar +> === + +> > Nested +> > --- + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@0..1 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@1..2 " " [] [], + }, + content: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@2..5 "Foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 "\n" [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@6..7 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@7..8 " " [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@8..11 "---" [] [], + }, + MdNewline { + value_token: NEWLINE@11..12 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@12..13 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@13..14 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@14..15 " " [] [], + }, + content: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..18 "Bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..19 "\n" [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@19..20 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@20..21 " " [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@21..24 "===" [] [], + }, + MdNewline { + value_token: NEWLINE@24..25 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@25..26 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@26..27 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@27..28 " " [] [], + }, + content: MdBlockList [ + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@28..29 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@29..30 " " [] [], + }, + content: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..36 "Nested" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@36..37 "\n" [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@37..38 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@38..39 " " [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@39..40 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@40..41 " " [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] [], + }, + MdNewline { + value_token: NEWLINE@44..45 "\n" [] [], + }, + ], + }, + ], + }, + ], + eof_token: EOF@45..45 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..45 + 0: (empty) + 1: MD_BLOCK_LIST@0..45 + 0: MD_QUOTE@0..12 + 0: MD_QUOTE_PREFIX@0..2 + 0: MD_QUOTE_INDENT_LIST@0..0 + 1: R_ANGLE@0..1 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@1..2 " " [] [] + 1: MD_BLOCK_LIST@2..12 + 0: MD_SETEXT_HEADER@2..11 + 0: MD_INLINE_ITEM_LIST@2..8 + 0: MD_TEXTUAL@2..5 + 0: MD_TEXTUAL_LITERAL@2..5 "Foo" [] [] + 1: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 "\n" [] [] + 2: MD_QUOTE_PREFIX@6..8 + 0: MD_QUOTE_INDENT_LIST@6..6 + 1: R_ANGLE@6..7 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@7..8 " " [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@8..11 "---" [] [] + 1: MD_NEWLINE@11..12 + 0: NEWLINE@11..12 "\n" [] [] + 1: MD_NEWLINE@12..13 + 0: NEWLINE@12..13 "\n" [] [] + 2: MD_QUOTE@13..25 + 0: MD_QUOTE_PREFIX@13..15 + 0: MD_QUOTE_INDENT_LIST@13..13 + 1: R_ANGLE@13..14 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@14..15 " " [] [] + 1: MD_BLOCK_LIST@15..25 + 0: MD_SETEXT_HEADER@15..24 + 0: MD_INLINE_ITEM_LIST@15..21 + 0: MD_TEXTUAL@15..18 + 0: MD_TEXTUAL_LITERAL@15..18 "Bar" [] [] + 1: MD_TEXTUAL@18..19 + 0: MD_TEXTUAL_LITERAL@18..19 "\n" [] [] + 2: MD_QUOTE_PREFIX@19..21 + 0: MD_QUOTE_INDENT_LIST@19..19 + 1: R_ANGLE@19..20 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@20..21 " " [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@21..24 "===" [] [] + 1: MD_NEWLINE@24..25 + 0: NEWLINE@24..25 "\n" [] [] + 3: MD_NEWLINE@25..26 + 0: NEWLINE@25..26 "\n" [] [] + 4: MD_QUOTE@26..45 + 0: MD_QUOTE_PREFIX@26..28 + 0: MD_QUOTE_INDENT_LIST@26..26 + 1: R_ANGLE@26..27 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@27..28 " " [] [] + 1: MD_BLOCK_LIST@28..45 + 0: MD_QUOTE@28..45 + 0: MD_QUOTE_PREFIX@28..30 + 0: MD_QUOTE_INDENT_LIST@28..28 + 1: R_ANGLE@28..29 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@29..30 " " [] [] + 1: MD_BLOCK_LIST@30..45 + 0: MD_SETEXT_HEADER@30..44 + 0: MD_INLINE_ITEM_LIST@30..41 + 0: MD_TEXTUAL@30..36 + 0: MD_TEXTUAL_LITERAL@30..36 "Nested" [] [] + 1: MD_TEXTUAL@36..37 + 0: MD_TEXTUAL_LITERAL@36..37 "\n" [] [] + 2: MD_QUOTE_PREFIX@37..39 + 0: MD_QUOTE_INDENT_LIST@37..37 + 1: R_ANGLE@37..38 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@38..39 " " [] [] + 3: MD_QUOTE_PREFIX@39..41 + 0: MD_QUOTE_INDENT_LIST@39..39 + 1: R_ANGLE@39..40 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@40..41 " " [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] [] + 1: MD_NEWLINE@44..45 + 0: NEWLINE@44..45 "\n" [] [] + 2: EOF@45..45 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs index 48f27a282f6d..0b9e2dcf27c3 100644 --- a/crates/biome_markdown_parser/tests/spec_test.rs +++ b/crates/biome_markdown_parser/tests/spec_test.rs @@ -380,4 +380,10 @@ pub fn quick_test() { "Allowed:
ok
tag.\n", "

Allowed: <div class="a"

\n
\n

ok tag.

\n
\n", ); + // Setext heading inside blockquote + test_example( + 20002, + "> Foo\n> ---\n", + "
\n

Foo

\n
\n", + ); } diff --git a/crates/biome_parser/src/lexer.rs b/crates/biome_parser/src/lexer.rs index da1f8ed7c6c3..04f91c4199f9 100644 --- a/crates/biome_parser/src/lexer.rs +++ b/crates/biome_parser/src/lexer.rs @@ -689,6 +689,39 @@ where kind } + + /// Re-lex the current token in the given context, treating the position + /// as a line start. This overrides `after_line_break` to `true` so the + /// lexer produces line-start-gated tokens (e.g. thematic breaks). + pub fn force_relex_at_line_start(&mut self, context: Lex::LexContext) -> Lex::Kind { + let checkpoint = if let Some(current) = self.current.clone() { + current + } else if let Some(first) = self.lookahead.get_checkpoint(0).cloned() { + first + } else { + self.inner.checkpoint() + }; + + let rewind_checkpoint = LexerCheckpoint { + position: checkpoint.current_start, + current_start: checkpoint.current_start, + current_kind: Lex::Kind::EOF, + current_flags: TokenFlags::empty(), + after_line_break: true, + after_whitespace: checkpoint.after_whitespace, + unicode_bom_length: checkpoint.unicode_bom_length, + diagnostics_pos: checkpoint.diagnostics_pos, + }; + + self.inner.rewind(rewind_checkpoint); + self.current = None; + self.lookahead.clear(); + + let kind = self.inner.next_token(context); + self.current = Some(self.inner.checkpoint()); + + kind + } } impl<'l, Lex> BufferedLexer From 2df31960ca0926fa58cd2ad0e9a51961861bc06f Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Thu, 2 Apr 2026 21:19:29 -0400 Subject: [PATCH 2/7] fix(markdown_parser): parse quoted thematic breaks at line start --- .../biome_markdown_parser/src/syntax/quote.rs | 23 +++++++++++++++++++ .../biome_markdown_parser/tests/spec_test.rs | 1 + 2 files changed, 24 insertions(+) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index af909584bb16..13379cf01578 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -97,6 +97,7 @@ pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax { p.state_mut().block_quote_depth += 1; let marker_space = emit_quote_prefix_node(p); + force_relex_thematic_break_after_quote_prefix(p); p.set_virtual_line_start(); parse_quote_block_list(p); @@ -125,6 +126,27 @@ fn emit_quote_prefix_node(p: &mut MarkdownParser) -> bool { marker_space } +/// After consuming a quote prefix, selectively re-lex the current token as if +/// it were at line start when the remaining line could form a thematic break. +/// +/// Re-lexing unconditionally perturbs ordinary quoted text tokenization by +/// splitting leading spaces into separate tokens. We only need line-start +/// semantics here for thematic-break candidates like `> ---`. +fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) { + let is_thematic_break_candidate = p.at(T![-]) + || p.at(T![*]) + || p.at(UNDERSCORE) + || p.at(DOUBLE_UNDERSCORE) + || (p.at(MD_TEXTUAL_LITERAL) + && p.cur_text() + .chars() + .all(|c| c == ' ' || c == '\t' || c == '-' || c == '*' || c == '_')); + + if is_thematic_break_candidate { + p.force_relex_at_line_start(); + } +} + /// Emit one quote prefix token sequence: [indent?] `>` [optional space/tab]. /// /// Returns whether a post-marker separator was consumed. @@ -273,6 +295,7 @@ impl QuoteBlockList { { if has_quote_prefix(p, self.depth) { consume_quote_prefix(p, self.depth); + force_relex_thematic_break_after_quote_prefix(p); self.line_started_with_prefix = true; } else { return false; diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs index 0b9e2dcf27c3..2f8a475bebb2 100644 --- a/crates/biome_markdown_parser/tests/spec_test.rs +++ b/crates/biome_markdown_parser/tests/spec_test.rs @@ -386,4 +386,5 @@ pub fn quick_test() { "> Foo\n> ---\n", "
\n

Foo

\n
\n", ); + test_example(20003, "> ---\n", "
\n
\n
\n"); } From 23de06d6715c4f7e394210ef8d1837639b1e0458 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Fri, 3 Apr 2026 09:43:41 -0400 Subject: [PATCH 3/7] fix(review): use lookup table, fix mixed-char bug in thematic break candidate, add tests --- .../biome_markdown_parser/src/syntax/quote.rs | 37 +- .../ok/setext_heading_in_blockquote.md | 19 + .../ok/setext_heading_in_blockquote.md.snap | 530 +++++++++++++++++- 3 files changed, 579 insertions(+), 7 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index 13379cf01578..032e1fb62989 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -132,21 +132,52 @@ fn emit_quote_prefix_node(p: &mut MarkdownParser) -> bool { /// Re-lexing unconditionally perturbs ordinary quoted text tokenization by /// splitting leading spaces into separate tokens. We only need line-start /// semantics here for thematic-break candidates like `> ---`. +/// +/// A candidate is any line whose non-whitespace bytes are all the **same** +/// thematic break character (`-`, `*`, or `_`). Per CommonMark §4.1, mixing +/// different break characters (e.g. `_*-`) does **not** form a thematic break. fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) { let is_thematic_break_candidate = p.at(T![-]) || p.at(T![*]) || p.at(UNDERSCORE) || p.at(DOUBLE_UNDERSCORE) || (p.at(MD_TEXTUAL_LITERAL) - && p.cur_text() - .chars() - .all(|c| c == ' ' || c == '\t' || c == '-' || c == '*' || c == '_')); + && is_thematic_break_candidate_text(p.cur_text())); if is_thematic_break_candidate { p.force_relex_at_line_start(); } } +/// Check if `text` could be a thematic break: all non-whitespace bytes must be +/// the **same** thematic break character (`-`, `*`, or `_`). +fn is_thematic_break_candidate_text(text: &str) -> bool { + use biome_unicode_table::{Dispatch::WHS, lookup_byte}; + + let mut break_char: Option = None; + for &b in text.as_bytes() { + // Skip whitespace (space, tab, etc.) via the shared lookup table. + if lookup_byte(b) == WHS { + continue; + } + match b { + b'-' | b'*' | b'_' => { + if let Some(expected) = break_char { + // Mixed break characters like `_*-` are not valid. + if b != expected { + return false; + } + } else { + break_char = Some(b); + } + } + // Any other non-whitespace byte disqualifies the line. + _ => return false, + } + } + break_char.is_some() +} + /// Emit one quote prefix token sequence: [indent?] `>` [optional space/tab]. /// /// Returns whether a post-marker separator was consumed. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md index ddbeeddc4b1b..9827d404a6f3 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md @@ -6,3 +6,22 @@ > > Nested > > --- + +> Dashes with spaces +> - - - + +> Stars +> *** + +> Stars with spaces +> * * * + +> Underscores +> ___ + +> Underscores with spaces +> _ _ _ + +> Mixed break chars are NOT thematic breaks (CommonMark §4.1), +> so this line is a continuation paragraph, not a heading. +> -*_ diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap index 970e238098ba..227f0407701c 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap @@ -15,6 +15,25 @@ expression: snapshot > > Nested > > --- +> Dashes with spaces +> - - - + +> Stars +> *** + +> Stars with spaces +> * * * + +> Underscores +> ___ + +> Underscores with spaces +> _ _ _ + +> Mixed break chars are NOT thematic breaks (CommonMark §4.1), +> so this line is a continuation paragraph, not a heading. +> -*_ + ``` @@ -128,17 +147,319 @@ MdDocument { }, ], }, + MdNewline { + value_token: NEWLINE@45..46 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@46..47 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@47..48 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@48..66 "Dashes with spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@66..67 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@67..68 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@68..69 " " [] [], + }, + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: MINUS@69..70 "-" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@70..71 " " [] [], + }, + MdThematicBreakChar { + value: MINUS@71..72 "-" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@72..73 " " [] [], + }, + MdThematicBreakChar { + value: MINUS@73..74 "-" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@74..75 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@75..76 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@76..77 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@77..78 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@78..83 "Stars" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@83..84 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@84..85 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@85..86 " " [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@86..87 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@87..88 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..89 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@89..90 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@90..91 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@91..92 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@92..93 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@93..110 "Stars with spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..111 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@111..112 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@112..113 " " [] [], + }, + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: STAR@113..114 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@114..115 " " [] [], + }, + MdThematicBreakChar { + value: STAR@115..116 "*" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@116..117 " " [] [], + }, + MdThematicBreakChar { + value: STAR@117..118 "*" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@118..119 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@119..120 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@120..121 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@121..122 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@122..133 "Underscores" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..134 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@134..135 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@135..136 " " [] [], + }, + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@136..137 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@137..138 "_" [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@138..139 "_" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@139..140 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@140..141 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@141..142 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@142..143 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@143..166 "Underscores with spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@166..167 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@167..168 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@168..169 " " [] [], + }, + MdThematicBreakBlock { + parts: MdThematicBreakPartList [ + MdThematicBreakChar { + value: UNDERSCORE@169..170 "_" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@170..171 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@171..172 "_" [] [], + }, + MdIndentToken { + md_indent_char_token: MD_INDENT_CHAR@172..173 " " [] [], + }, + MdThematicBreakChar { + value: UNDERSCORE@173..174 "_" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@174..175 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@175..176 "\n" [] [], + }, + MdQuote { + prefix: MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@176..177 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@177..178 " " [] [], + }, + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@178..220 "Mixed break chars are NOT thematic breaks " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@220..221 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@221..237 "CommonMark §4.1" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@237..238 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@238..239 "," [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@239..240 "\n" [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@240..241 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@241..242 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@242..298 "so this line is a continuation paragraph, not a heading." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@298..299 "\n" [] [], + }, + MdQuotePrefix { + pre_marker_indent: MdQuoteIndentList [], + marker_token: R_ANGLE@299..300 ">" [] [], + post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@300..301 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@301..302 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@302..303 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@303..304 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@304..305 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, ], - eof_token: EOF@45..45 "" [] [], + eof_token: EOF@305..305 "" [] [], } ``` ## CST ``` -0: MD_DOCUMENT@0..45 +0: MD_DOCUMENT@0..305 0: (empty) - 1: MD_BLOCK_LIST@0..45 + 1: MD_BLOCK_LIST@0..305 0: MD_QUOTE@0..12 0: MD_QUOTE_PREFIX@0..2 0: MD_QUOTE_INDENT_LIST@0..0 @@ -210,6 +531,207 @@ MdDocument { 1: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] [] 1: MD_NEWLINE@44..45 0: NEWLINE@44..45 "\n" [] [] - 2: EOF@45..45 "" [] [] + 5: MD_NEWLINE@45..46 + 0: NEWLINE@45..46 "\n" [] [] + 6: MD_QUOTE@46..75 + 0: MD_QUOTE_PREFIX@46..48 + 0: MD_QUOTE_INDENT_LIST@46..46 + 1: R_ANGLE@46..47 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@47..48 " " [] [] + 1: MD_BLOCK_LIST@48..75 + 0: MD_PARAGRAPH@48..67 + 0: MD_INLINE_ITEM_LIST@48..67 + 0: MD_TEXTUAL@48..66 + 0: MD_TEXTUAL_LITERAL@48..66 "Dashes with spaces" [] [] + 1: MD_TEXTUAL@66..67 + 0: MD_TEXTUAL_LITERAL@66..67 "\n" [] [] + 1: (empty) + 1: MD_QUOTE_PREFIX@67..69 + 0: MD_QUOTE_INDENT_LIST@67..67 + 1: R_ANGLE@67..68 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@68..69 " " [] [] + 2: MD_THEMATIC_BREAK_BLOCK@69..74 + 0: MD_THEMATIC_BREAK_PART_LIST@69..74 + 0: MD_THEMATIC_BREAK_CHAR@69..70 + 0: MINUS@69..70 "-" [] [] + 1: MD_INDENT_TOKEN@70..71 + 0: MD_INDENT_CHAR@70..71 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@71..72 + 0: MINUS@71..72 "-" [] [] + 3: MD_INDENT_TOKEN@72..73 + 0: MD_INDENT_CHAR@72..73 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@73..74 + 0: MINUS@73..74 "-" [] [] + 3: MD_NEWLINE@74..75 + 0: NEWLINE@74..75 "\n" [] [] + 7: MD_NEWLINE@75..76 + 0: NEWLINE@75..76 "\n" [] [] + 8: MD_QUOTE@76..90 + 0: MD_QUOTE_PREFIX@76..78 + 0: MD_QUOTE_INDENT_LIST@76..76 + 1: R_ANGLE@76..77 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@77..78 " " [] [] + 1: MD_BLOCK_LIST@78..90 + 0: MD_PARAGRAPH@78..84 + 0: MD_INLINE_ITEM_LIST@78..84 + 0: MD_TEXTUAL@78..83 + 0: MD_TEXTUAL_LITERAL@78..83 "Stars" [] [] + 1: MD_TEXTUAL@83..84 + 0: MD_TEXTUAL_LITERAL@83..84 "\n" [] [] + 1: (empty) + 1: MD_QUOTE_PREFIX@84..86 + 0: MD_QUOTE_INDENT_LIST@84..84 + 1: R_ANGLE@84..85 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@85..86 " " [] [] + 2: MD_PARAGRAPH@86..90 + 0: MD_INLINE_ITEM_LIST@86..90 + 0: MD_TEXTUAL@86..87 + 0: MD_TEXTUAL_LITERAL@86..87 "*" [] [] + 1: MD_TEXTUAL@87..88 + 0: MD_TEXTUAL_LITERAL@87..88 "*" [] [] + 2: MD_TEXTUAL@88..89 + 0: MD_TEXTUAL_LITERAL@88..89 "*" [] [] + 3: MD_TEXTUAL@89..90 + 0: MD_TEXTUAL_LITERAL@89..90 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@90..91 + 0: NEWLINE@90..91 "\n" [] [] + 10: MD_QUOTE@91..119 + 0: MD_QUOTE_PREFIX@91..93 + 0: MD_QUOTE_INDENT_LIST@91..91 + 1: R_ANGLE@91..92 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@92..93 " " [] [] + 1: MD_BLOCK_LIST@93..119 + 0: MD_PARAGRAPH@93..111 + 0: MD_INLINE_ITEM_LIST@93..111 + 0: MD_TEXTUAL@93..110 + 0: MD_TEXTUAL_LITERAL@93..110 "Stars with spaces" [] [] + 1: MD_TEXTUAL@110..111 + 0: MD_TEXTUAL_LITERAL@110..111 "\n" [] [] + 1: (empty) + 1: MD_QUOTE_PREFIX@111..113 + 0: MD_QUOTE_INDENT_LIST@111..111 + 1: R_ANGLE@111..112 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@112..113 " " [] [] + 2: MD_THEMATIC_BREAK_BLOCK@113..118 + 0: MD_THEMATIC_BREAK_PART_LIST@113..118 + 0: MD_THEMATIC_BREAK_CHAR@113..114 + 0: STAR@113..114 "*" [] [] + 1: MD_INDENT_TOKEN@114..115 + 0: MD_INDENT_CHAR@114..115 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@115..116 + 0: STAR@115..116 "*" [] [] + 3: MD_INDENT_TOKEN@116..117 + 0: MD_INDENT_CHAR@116..117 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@117..118 + 0: STAR@117..118 "*" [] [] + 3: MD_NEWLINE@118..119 + 0: NEWLINE@118..119 "\n" [] [] + 11: MD_NEWLINE@119..120 + 0: NEWLINE@119..120 "\n" [] [] + 12: MD_QUOTE@120..140 + 0: MD_QUOTE_PREFIX@120..122 + 0: MD_QUOTE_INDENT_LIST@120..120 + 1: R_ANGLE@120..121 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@121..122 " " [] [] + 1: MD_BLOCK_LIST@122..140 + 0: MD_PARAGRAPH@122..134 + 0: MD_INLINE_ITEM_LIST@122..134 + 0: MD_TEXTUAL@122..133 + 0: MD_TEXTUAL_LITERAL@122..133 "Underscores" [] [] + 1: MD_TEXTUAL@133..134 + 0: MD_TEXTUAL_LITERAL@133..134 "\n" [] [] + 1: (empty) + 1: MD_QUOTE_PREFIX@134..136 + 0: MD_QUOTE_INDENT_LIST@134..134 + 1: R_ANGLE@134..135 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@135..136 " " [] [] + 2: MD_THEMATIC_BREAK_BLOCK@136..139 + 0: MD_THEMATIC_BREAK_PART_LIST@136..139 + 0: MD_THEMATIC_BREAK_CHAR@136..137 + 0: UNDERSCORE@136..137 "_" [] [] + 1: MD_THEMATIC_BREAK_CHAR@137..138 + 0: UNDERSCORE@137..138 "_" [] [] + 2: MD_THEMATIC_BREAK_CHAR@138..139 + 0: UNDERSCORE@138..139 "_" [] [] + 3: MD_NEWLINE@139..140 + 0: NEWLINE@139..140 "\n" [] [] + 13: MD_NEWLINE@140..141 + 0: NEWLINE@140..141 "\n" [] [] + 14: MD_QUOTE@141..175 + 0: MD_QUOTE_PREFIX@141..143 + 0: MD_QUOTE_INDENT_LIST@141..141 + 1: R_ANGLE@141..142 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@142..143 " " [] [] + 1: MD_BLOCK_LIST@143..175 + 0: MD_PARAGRAPH@143..167 + 0: MD_INLINE_ITEM_LIST@143..167 + 0: MD_TEXTUAL@143..166 + 0: MD_TEXTUAL_LITERAL@143..166 "Underscores with spaces" [] [] + 1: MD_TEXTUAL@166..167 + 0: MD_TEXTUAL_LITERAL@166..167 "\n" [] [] + 1: (empty) + 1: MD_QUOTE_PREFIX@167..169 + 0: MD_QUOTE_INDENT_LIST@167..167 + 1: R_ANGLE@167..168 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@168..169 " " [] [] + 2: MD_THEMATIC_BREAK_BLOCK@169..174 + 0: MD_THEMATIC_BREAK_PART_LIST@169..174 + 0: MD_THEMATIC_BREAK_CHAR@169..170 + 0: UNDERSCORE@169..170 "_" [] [] + 1: MD_INDENT_TOKEN@170..171 + 0: MD_INDENT_CHAR@170..171 " " [] [] + 2: MD_THEMATIC_BREAK_CHAR@171..172 + 0: UNDERSCORE@171..172 "_" [] [] + 3: MD_INDENT_TOKEN@172..173 + 0: MD_INDENT_CHAR@172..173 " " [] [] + 4: MD_THEMATIC_BREAK_CHAR@173..174 + 0: UNDERSCORE@173..174 "_" [] [] + 3: MD_NEWLINE@174..175 + 0: NEWLINE@174..175 "\n" [] [] + 15: MD_NEWLINE@175..176 + 0: NEWLINE@175..176 "\n" [] [] + 16: MD_QUOTE@176..305 + 0: MD_QUOTE_PREFIX@176..178 + 0: MD_QUOTE_INDENT_LIST@176..176 + 1: R_ANGLE@176..177 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@177..178 " " [] [] + 1: MD_BLOCK_LIST@178..305 + 0: MD_PARAGRAPH@178..305 + 0: MD_INLINE_ITEM_LIST@178..305 + 0: MD_TEXTUAL@178..220 + 0: MD_TEXTUAL_LITERAL@178..220 "Mixed break chars are NOT thematic breaks " [] [] + 1: MD_TEXTUAL@220..221 + 0: MD_TEXTUAL_LITERAL@220..221 "(" [] [] + 2: MD_TEXTUAL@221..237 + 0: MD_TEXTUAL_LITERAL@221..237 "CommonMark §4.1" [] [] + 3: MD_TEXTUAL@237..238 + 0: MD_TEXTUAL_LITERAL@237..238 ")" [] [] + 4: MD_TEXTUAL@238..239 + 0: MD_TEXTUAL_LITERAL@238..239 "," [] [] + 5: MD_TEXTUAL@239..240 + 0: MD_TEXTUAL_LITERAL@239..240 "\n" [] [] + 6: MD_QUOTE_PREFIX@240..242 + 0: MD_QUOTE_INDENT_LIST@240..240 + 1: R_ANGLE@240..241 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@241..242 " " [] [] + 7: MD_TEXTUAL@242..298 + 0: MD_TEXTUAL_LITERAL@242..298 "so this line is a continuation paragraph, not a heading." [] [] + 8: MD_TEXTUAL@298..299 + 0: MD_TEXTUAL_LITERAL@298..299 "\n" [] [] + 9: MD_QUOTE_PREFIX@299..301 + 0: MD_QUOTE_INDENT_LIST@299..299 + 1: R_ANGLE@299..300 ">" [] [] + 2: MD_QUOTE_POST_MARKER_SPACE@300..301 " " [] [] + 10: MD_TEXTUAL@301..302 + 0: MD_TEXTUAL_LITERAL@301..302 "-" [] [] + 11: MD_TEXTUAL@302..303 + 0: MD_TEXTUAL_LITERAL@302..303 "*" [] [] + 12: MD_TEXTUAL@303..304 + 0: MD_TEXTUAL_LITERAL@303..304 "_" [] [] + 13: MD_TEXTUAL@304..305 + 0: MD_TEXTUAL_LITERAL@304..305 "\n" [] [] + 1: (empty) + 2: EOF@305..305 "" [] [] ``` From e753161117559ac2c3cdaa2dd6d813a95ff8952f Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 3 Apr 2026 13:52:52 +0000 Subject: [PATCH 4/7] [autofix.ci] apply automated fixes --- crates/biome_markdown_parser/src/syntax/quote.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index 032e1fb62989..e9a31062a394 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -141,8 +141,7 @@ fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) { || p.at(T![*]) || p.at(UNDERSCORE) || p.at(DOUBLE_UNDERSCORE) - || (p.at(MD_TEXTUAL_LITERAL) - && is_thematic_break_candidate_text(p.cur_text())); + || (p.at(MD_TEXTUAL_LITERAL) && is_thematic_break_candidate_text(p.cur_text())); if is_thematic_break_candidate { p.force_relex_at_line_start(); From 633b0c59a842dcfb7bd95a36c405d0c2de0181f8 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Fri, 3 Apr 2026 10:26:55 -0400 Subject: [PATCH 5/7] fix(markdown): relex quoted thematic breaks after indented code --- crates/biome_markdown_parser/src/syntax/quote.rs | 9 +++++++-- crates/biome_markdown_parser/tests/spec_test.rs | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index e9a31062a394..cf0495aae3a1 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -97,7 +97,7 @@ pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax { p.state_mut().block_quote_depth += 1; let marker_space = emit_quote_prefix_node(p); - force_relex_thematic_break_after_quote_prefix(p); + relex_after_quote_prefix_consumed(p); p.set_virtual_line_start(); parse_quote_block_list(p); @@ -148,6 +148,10 @@ fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) { } } +fn relex_after_quote_prefix_consumed(p: &mut MarkdownParser) { + force_relex_thematic_break_after_quote_prefix(p); +} + /// Check if `text` could be a thematic break: all non-whitespace bytes must be /// the **same** thematic break character (`-`, `*`, or `_`). fn is_thematic_break_candidate_text(text: &str) -> bool { @@ -325,7 +329,7 @@ impl QuoteBlockList { { if has_quote_prefix(p, self.depth) { consume_quote_prefix(p, self.depth); - force_relex_thematic_break_after_quote_prefix(p); + relex_after_quote_prefix_consumed(p); self.line_started_with_prefix = true; } else { return false; @@ -586,6 +590,7 @@ fn parse_code_block_newline(p: &mut MarkdownParser, depth: usize) -> bool { } consume_quote_prefix(p, depth); + relex_after_quote_prefix_consumed(p); // Blank lines (consecutive newlines) are allowed in indented code if p.at(NEWLINE) { diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs index 2f8a475bebb2..1589cbf45da9 100644 --- a/crates/biome_markdown_parser/tests/spec_test.rs +++ b/crates/biome_markdown_parser/tests/spec_test.rs @@ -288,6 +288,12 @@ pub fn quick_test() { "> ```\n> hello\n> ```\n", "
\n
hello\n
\n
\n", ); + // Quoted indented code must terminate before a quoted thematic break. + test_example( + 99921, + "> code\n> ---\n", + "
\n
code\n
\n
\n
\n", + ); test_example( 9993, "- foo\n - bar\n", From 71f073b46b80456dacdb41c069d40109b3ffa496 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Fri, 3 Apr 2026 10:37:38 -0400 Subject: [PATCH 6/7] fix(markdown): stop quoted code before thematic break --- .../biome_markdown_parser/src/syntax/quote.rs | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index cf0495aae3a1..aed2f989813d 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -589,16 +589,30 @@ fn parse_code_block_newline(p: &mut MarkdownParser, depth: usize) -> bool { return false; } + let continues_code_block = p.lookahead(|p| { + consume_quote_prefix(p, depth); + + // Blank lines (consecutive newlines) are allowed in indented code. + if p.at(NEWLINE) { + return true; + } + + at_quote_indented_code_start(p) + }); + + if !continues_code_block { + return false; + } + consume_quote_prefix(p, depth); relex_after_quote_prefix_consumed(p); - // Blank lines (consecutive newlines) are allowed in indented code + // Blank lines (consecutive newlines) are allowed in indented code. if p.at(NEWLINE) { return true; } - // Next line must still be indented to continue the code block - at_quote_indented_code_start(p) + true } /// Parse a single textual token in an indented code block. From e9eca1242ef0bc3586b76372b02f840d81173af4 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Sun, 5 Apr 2026 19:29:59 -0400 Subject: [PATCH 7/7] refactor: use dispatch table for thematic break char matching Address review feedback: use `biome_unicode_table` dispatch variants (MIN, MUL, IDT) instead of raw byte literals for thematic break character matching in `is_thematic_break_candidate_text`. --- .../biome_markdown_parser/src/syntax/quote.rs | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index aed2f989813d..20e3153eeb56 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -155,27 +155,34 @@ fn relex_after_quote_prefix_consumed(p: &mut MarkdownParser) { /// Check if `text` could be a thematic break: all non-whitespace bytes must be /// the **same** thematic break character (`-`, `*`, or `_`). fn is_thematic_break_candidate_text(text: &str) -> bool { - use biome_unicode_table::{Dispatch::WHS, lookup_byte}; + use biome_unicode_table::{ + Dispatch::{IDT, MIN, MUL, WHS}, + lookup_byte, + }; let mut break_char: Option = None; for &b in text.as_bytes() { + let dispatched = lookup_byte(b); // Skip whitespace (space, tab, etc.) via the shared lookup table. - if lookup_byte(b) == WHS { + if dispatched == WHS { continue; } - match b { - b'-' | b'*' | b'_' => { - if let Some(expected) = break_char { - // Mixed break characters like `_*-` are not valid. - if b != expected { - return false; - } - } else { - break_char = Some(b); + // Match thematic break characters via dispatch variants: + // MIN = `-`, MUL = `*`, IDT = `_` (IDT also covers letters, so + // narrow to `b'_'` explicitly). + let is_break_char = matches!(dispatched, MIN | MUL) || (dispatched == IDT && b == b'_'); + if is_break_char { + if let Some(expected) = break_char { + // Mixed break characters like `_*-` are not valid. + if b != expected { + return false; } + } else { + break_char = Some(b); } + } else { // Any other non-whitespace byte disqualifies the line. - _ => return false, + return false; } } break_char.is_some()