From 570daa82bcfc482758702326478d60f1f0fd1080 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Thu, 2 Apr 2026 17:46:04 -0400
Subject: [PATCH 1/7] fix(markdown_parser): recognize setext heading inside
blockquote
After consuming a blockquote prefix (`> `), the lexer's `after_newline`
flag is false, so `---` is lexed as MINUS tokens instead of
MD_THEMATIC_BREAK_LITERAL. This prevented setext heading detection
inside blockquotes.
Add `force_relex_at_line_start` to the buffered lexer which re-lexes
the current token with `after_line_break = true`. Use it in
`classify_quote_break_after_newline` (lookahead) and
`break_for_quote_prefix_after_inline_newline` (parse path) so the
lexer produces the correct block-level tokens after a quote prefix.
---
.../biome_markdown_parser/src/lexer/tests.rs | 33 ++-
crates/biome_markdown_parser/src/parser.rs | 9 +
.../biome_markdown_parser/src/syntax/mod.rs | 8 +-
.../biome_markdown_parser/src/token_source.rs | 8 +
.../ok/setext_heading_edge_cases.md.snap | 37 +--
.../ok/setext_heading_in_blockquote.md | 8 +
.../ok/setext_heading_in_blockquote.md.snap | 215 ++++++++++++++++++
.../biome_markdown_parser/tests/spec_test.rs | 6 +
crates/biome_parser/src/lexer.rs | 33 +++
9 files changed, 328 insertions(+), 29 deletions(-)
create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs
index 328107b31ddb..b59fb364016e 100644
--- a/crates/biome_markdown_parser/src/lexer/tests.rs
+++ b/crates/biome_markdown_parser/src/lexer/tests.rs
@@ -4,7 +4,7 @@
use super::{MarkdownLexer, TextSize};
use crate::lexer::MarkdownLexContext;
use biome_markdown_syntax::MarkdownSyntaxKind::*;
-use biome_parser::lexer::Lexer;
+use biome_parser::lexer::{BufferedLexer, Lexer};
use quickcheck_macros::quickcheck;
use std::sync::mpsc::channel;
use std::thread;
@@ -574,3 +574,34 @@ fn block_quote_simple() {
NEWLINE:1,
}
}
+
+#[test]
+fn force_relex_at_line_start_produces_thematic_break() {
+ // After consuming a blockquote prefix (`> `), `---` is normally lexed as
+ // MINUS tokens because after_newline is false. force_relex_at_line_start
+ // should make the lexer treat the position as a line start, producing
+ // MD_THEMATIC_BREAK_LITERAL instead.
+ let source = "> ---\n";
+ let lexer = MarkdownLexer::from_str(source);
+ let mut buffered = BufferedLexer::new(lexer);
+
+ // Lex first token: `>` (R_ANGLE)
+ buffered.next_token(MarkdownLexContext::Regular);
+ assert_eq!(buffered.current(), R_ANGLE);
+
+ // Lex second token: ` ` (whitespace as MD_TEXTUAL_LITERAL)
+ buffered.next_token(MarkdownLexContext::Regular);
+ assert_eq!(buffered.current(), MD_TEXTUAL_LITERAL);
+
+ // Lex third token: without re-lex, `---` becomes MINUS
+ buffered.next_token(MarkdownLexContext::Regular);
+ assert_eq!(buffered.current(), MINUS, "without re-lex, should be MINUS");
+
+ // Now re-lex at line start — should produce MD_THEMATIC_BREAK_LITERAL
+ let kind = buffered.force_relex_at_line_start(MarkdownLexContext::Regular);
+ assert_eq!(
+ kind, MD_THEMATIC_BREAK_LITERAL,
+ "after force_relex_at_line_start, `---` should be MD_THEMATIC_BREAK_LITERAL"
+ );
+ assert_eq!(buffered.current(), MD_THEMATIC_BREAK_LITERAL);
+}
diff --git a/crates/biome_markdown_parser/src/parser.rs b/crates/biome_markdown_parser/src/parser.rs
index 81ea93531c07..7f4348e6372b 100644
--- a/crates/biome_markdown_parser/src/parser.rs
+++ b/crates/biome_markdown_parser/src/parser.rs
@@ -217,6 +217,15 @@ impl<'source> MarkdownParser<'source> {
.force_relex_in_context(MarkdownLexContext::Regular);
}
+ /// Re-lex the current token in Regular context, treating the position as
+ /// a line start. After consuming a blockquote prefix, the lexer's
+ /// `after_newline` flag is false, which prevents it from producing
+ /// line-start-gated tokens like `MD_THEMATIC_BREAK_LITERAL`. This method
+ /// overrides that flag so the lexer behaves as if at line start.
+ pub(crate) fn force_relex_at_line_start(&mut self) {
+ self.source.force_relex_at_line_start();
+ }
+
/// Force re-lex the current token in CodeSpan context.
/// In this context, backslash is literal (not an escape character).
/// Used for autolinks where `\>` should be `\` + `>` as separate tokens.
diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs
index 13008ad7b642..80ed9e4f53a4 100644
--- a/crates/biome_markdown_parser/src/syntax/mod.rs
+++ b/crates/biome_markdown_parser/src/syntax/mod.rs
@@ -788,6 +788,9 @@ fn classify_quote_break_after_newline(
p.lookahead(|p| {
consume_quote_prefix_without_virtual(p, quote_depth);
with_virtual_line_start(p, p.cur_range().start(), |p| {
+ // Re-lex at line start so the lexer produces block-level tokens
+ // (e.g. MD_THEMATIC_BREAK_LITERAL for `---`) instead of MINUS.
+ p.force_relex_at_line_start();
if p.at(MD_SETEXT_UNDERLINE_LITERAL)
|| (p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p))
{
@@ -865,9 +868,10 @@ fn break_for_quote_prefix_after_inline_newline(p: &mut MarkdownParser, quote_dep
if has_quote_prefix(p, quote_depth) {
let break_kind = classify_quote_break_after_newline(p, quote_depth);
if matches!(break_kind, QuoteBreakKind::SetextUnderline) {
- // Consume the quote prefix so the setext underline is visible
- // to the paragraph parser.
+ // Consume the quote prefix and re-lex at line start so the
+ // paragraph parser sees MD_THEMATIC_BREAK_LITERAL for `---`.
consume_quote_prefix(p, quote_depth);
+ p.force_relex_at_line_start();
}
match break_kind {
QuoteBreakKind::SetextUnderline | QuoteBreakKind::Other => return true,
diff --git a/crates/biome_markdown_parser/src/token_source.rs b/crates/biome_markdown_parser/src/token_source.rs
index 8227064cbae2..1b6cc00ed8c0 100644
--- a/crates/biome_markdown_parser/src/token_source.rs
+++ b/crates/biome_markdown_parser/src/token_source.rs
@@ -160,6 +160,14 @@ impl<'source> MarkdownTokenSource<'source> {
self.lexer.force_relex_in_context(context)
}
+ /// Re-lex the current token in Regular context, treating the position as
+ /// a line start. This makes the lexer produce line-start-gated tokens
+ /// like `MD_THEMATIC_BREAK_LITERAL`.
+ pub fn force_relex_at_line_start(&mut self) -> MarkdownSyntaxKind {
+ self.lexer
+ .force_relex_at_line_start(MarkdownLexContext::Regular)
+ }
+
pub fn set_force_ordered_list_marker(&mut self, value: bool) {
self.lexer.lexer_mut().set_force_ordered_list_marker(value);
}
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap
index f7d2b149c1bc..93fd16e470c5 100644
--- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_edge_cases.md.snap
@@ -98,8 +98,8 @@ MdDocument {
post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@36..37 " " [] [],
},
content: MdBlockList [
- MdParagraph {
- list: MdInlineItemList [
+ MdSetextHeader {
+ content: MdInlineItemList [
MdTextual {
value_token: MD_TEXTUAL_LITERAL@37..40 "Foo" [] [],
},
@@ -111,20 +111,11 @@ MdDocument {
marker_token: R_ANGLE@41..42 ">" [] [],
post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@42..43 " " [] [],
},
- MdTextual {
- value_token: MD_TEXTUAL_LITERAL@43..44 "-" [] [],
- },
- MdTextual {
- value_token: MD_TEXTUAL_LITERAL@44..45 "-" [] [],
- },
- MdTextual {
- value_token: MD_TEXTUAL_LITERAL@45..46 "-" [] [],
- },
- MdTextual {
- value_token: MD_TEXTUAL_LITERAL@46..47 "\n" [] [],
- },
],
- hard_line: missing (optional),
+ underline_token: MD_SETEXT_UNDERLINE_LITERAL@43..46 "---" [] [],
+ },
+ MdNewline {
+ value_token: NEWLINE@46..47 "\n" [] [],
},
],
},
@@ -242,8 +233,8 @@ MdDocument {
1: R_ANGLE@35..36 ">" [] []
2: MD_QUOTE_POST_MARKER_SPACE@36..37 " " [] []
1: MD_BLOCK_LIST@37..47
- 0: MD_PARAGRAPH@37..47
- 0: MD_INLINE_ITEM_LIST@37..47
+ 0: MD_SETEXT_HEADER@37..46
+ 0: MD_INLINE_ITEM_LIST@37..43
0: MD_TEXTUAL@37..40
0: MD_TEXTUAL_LITERAL@37..40 "Foo" [] []
1: MD_TEXTUAL@40..41
@@ -252,15 +243,9 @@ MdDocument {
0: MD_QUOTE_INDENT_LIST@41..41
1: R_ANGLE@41..42 ">" [] []
2: MD_QUOTE_POST_MARKER_SPACE@42..43 " " [] []
- 3: MD_TEXTUAL@43..44
- 0: MD_TEXTUAL_LITERAL@43..44 "-" [] []
- 4: MD_TEXTUAL@44..45
- 0: MD_TEXTUAL_LITERAL@44..45 "-" [] []
- 5: MD_TEXTUAL@45..46
- 0: MD_TEXTUAL_LITERAL@45..46 "-" [] []
- 6: MD_TEXTUAL@46..47
- 0: MD_TEXTUAL_LITERAL@46..47 "\n" [] []
- 1: (empty)
+ 1: MD_SETEXT_UNDERLINE_LITERAL@43..46 "---" [] []
+ 1: MD_NEWLINE@46..47
+ 0: NEWLINE@46..47 "\n" [] []
10: MD_NEWLINE@47..48
0: NEWLINE@47..48 "\n" [] []
11: MD_BULLET_LIST_ITEM@48..66
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
new file mode 100644
index 000000000000..ddbeeddc4b1b
--- /dev/null
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
@@ -0,0 +1,8 @@
+> Foo
+> ---
+
+> Bar
+> ===
+
+> > Nested
+> > ---
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
new file mode 100644
index 000000000000..970e238098ba
--- /dev/null
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
@@ -0,0 +1,215 @@
+---
+source: crates/biome_markdown_parser/tests/spec_test.rs
+expression: snapshot
+---
+
+## Input
+
+```
+> Foo
+> ---
+
+> Bar
+> ===
+
+> > Nested
+> > ---
+
+```
+
+
+## AST
+
+```
+MdDocument {
+ bom_token: missing (optional),
+ value: MdBlockList [
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@0..1 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@1..2 " " [] [],
+ },
+ content: MdBlockList [
+ MdSetextHeader {
+ content: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@2..5 "Foo" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@5..6 "\n" [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@6..7 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@7..8 " " [] [],
+ },
+ ],
+ underline_token: MD_SETEXT_UNDERLINE_LITERAL@8..11 "---" [] [],
+ },
+ MdNewline {
+ value_token: NEWLINE@11..12 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@12..13 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@13..14 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@14..15 " " [] [],
+ },
+ content: MdBlockList [
+ MdSetextHeader {
+ content: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@15..18 "Bar" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@18..19 "\n" [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@19..20 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@20..21 " " [] [],
+ },
+ ],
+ underline_token: MD_SETEXT_UNDERLINE_LITERAL@21..24 "===" [] [],
+ },
+ MdNewline {
+ value_token: NEWLINE@24..25 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@25..26 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@26..27 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@27..28 " " [] [],
+ },
+ content: MdBlockList [
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@28..29 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@29..30 " " [] [],
+ },
+ content: MdBlockList [
+ MdSetextHeader {
+ content: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@30..36 "Nested" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@36..37 "\n" [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@37..38 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@38..39 " " [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@39..40 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@40..41 " " [] [],
+ },
+ ],
+ underline_token: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] [],
+ },
+ MdNewline {
+ value_token: NEWLINE@44..45 "\n" [] [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ eof_token: EOF@45..45 "" [] [],
+}
+```
+
+## CST
+
+```
+0: MD_DOCUMENT@0..45
+ 0: (empty)
+ 1: MD_BLOCK_LIST@0..45
+ 0: MD_QUOTE@0..12
+ 0: MD_QUOTE_PREFIX@0..2
+ 0: MD_QUOTE_INDENT_LIST@0..0
+ 1: R_ANGLE@0..1 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@1..2 " " [] []
+ 1: MD_BLOCK_LIST@2..12
+ 0: MD_SETEXT_HEADER@2..11
+ 0: MD_INLINE_ITEM_LIST@2..8
+ 0: MD_TEXTUAL@2..5
+ 0: MD_TEXTUAL_LITERAL@2..5 "Foo" [] []
+ 1: MD_TEXTUAL@5..6
+ 0: MD_TEXTUAL_LITERAL@5..6 "\n" [] []
+ 2: MD_QUOTE_PREFIX@6..8
+ 0: MD_QUOTE_INDENT_LIST@6..6
+ 1: R_ANGLE@6..7 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@7..8 " " [] []
+ 1: MD_SETEXT_UNDERLINE_LITERAL@8..11 "---" [] []
+ 1: MD_NEWLINE@11..12
+ 0: NEWLINE@11..12 "\n" [] []
+ 1: MD_NEWLINE@12..13
+ 0: NEWLINE@12..13 "\n" [] []
+ 2: MD_QUOTE@13..25
+ 0: MD_QUOTE_PREFIX@13..15
+ 0: MD_QUOTE_INDENT_LIST@13..13
+ 1: R_ANGLE@13..14 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@14..15 " " [] []
+ 1: MD_BLOCK_LIST@15..25
+ 0: MD_SETEXT_HEADER@15..24
+ 0: MD_INLINE_ITEM_LIST@15..21
+ 0: MD_TEXTUAL@15..18
+ 0: MD_TEXTUAL_LITERAL@15..18 "Bar" [] []
+ 1: MD_TEXTUAL@18..19
+ 0: MD_TEXTUAL_LITERAL@18..19 "\n" [] []
+ 2: MD_QUOTE_PREFIX@19..21
+ 0: MD_QUOTE_INDENT_LIST@19..19
+ 1: R_ANGLE@19..20 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@20..21 " " [] []
+ 1: MD_SETEXT_UNDERLINE_LITERAL@21..24 "===" [] []
+ 1: MD_NEWLINE@24..25
+ 0: NEWLINE@24..25 "\n" [] []
+ 3: MD_NEWLINE@25..26
+ 0: NEWLINE@25..26 "\n" [] []
+ 4: MD_QUOTE@26..45
+ 0: MD_QUOTE_PREFIX@26..28
+ 0: MD_QUOTE_INDENT_LIST@26..26
+ 1: R_ANGLE@26..27 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@27..28 " " [] []
+ 1: MD_BLOCK_LIST@28..45
+ 0: MD_QUOTE@28..45
+ 0: MD_QUOTE_PREFIX@28..30
+ 0: MD_QUOTE_INDENT_LIST@28..28
+ 1: R_ANGLE@28..29 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@29..30 " " [] []
+ 1: MD_BLOCK_LIST@30..45
+ 0: MD_SETEXT_HEADER@30..44
+ 0: MD_INLINE_ITEM_LIST@30..41
+ 0: MD_TEXTUAL@30..36
+ 0: MD_TEXTUAL_LITERAL@30..36 "Nested" [] []
+ 1: MD_TEXTUAL@36..37
+ 0: MD_TEXTUAL_LITERAL@36..37 "\n" [] []
+ 2: MD_QUOTE_PREFIX@37..39
+ 0: MD_QUOTE_INDENT_LIST@37..37
+ 1: R_ANGLE@37..38 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@38..39 " " [] []
+ 3: MD_QUOTE_PREFIX@39..41
+ 0: MD_QUOTE_INDENT_LIST@39..39
+ 1: R_ANGLE@39..40 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@40..41 " " [] []
+ 1: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] []
+ 1: MD_NEWLINE@44..45
+ 0: NEWLINE@44..45 "\n" [] []
+ 2: EOF@45..45 "" [] []
+
+```
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index 48f27a282f6d..0b9e2dcf27c3 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -380,4 +380,10 @@ pub fn quick_test() {
"Allowed:
ok
tag.\n",
"Allowed: <div class="a"
\n\nok tag.
\n
\n",
);
+ // Setext heading inside blockquote
+ test_example(
+ 20002,
+ "> Foo\n> ---\n",
+ "\nFoo
\n
\n",
+ );
}
diff --git a/crates/biome_parser/src/lexer.rs b/crates/biome_parser/src/lexer.rs
index da1f8ed7c6c3..04f91c4199f9 100644
--- a/crates/biome_parser/src/lexer.rs
+++ b/crates/biome_parser/src/lexer.rs
@@ -689,6 +689,39 @@ where
kind
}
+
+ /// Re-lex the current token in the given context, treating the position
+ /// as a line start. This overrides `after_line_break` to `true` so the
+ /// lexer produces line-start-gated tokens (e.g. thematic breaks).
+ pub fn force_relex_at_line_start(&mut self, context: Lex::LexContext) -> Lex::Kind {
+ let checkpoint = if let Some(current) = self.current.clone() {
+ current
+ } else if let Some(first) = self.lookahead.get_checkpoint(0).cloned() {
+ first
+ } else {
+ self.inner.checkpoint()
+ };
+
+ let rewind_checkpoint = LexerCheckpoint {
+ position: checkpoint.current_start,
+ current_start: checkpoint.current_start,
+ current_kind: Lex::Kind::EOF,
+ current_flags: TokenFlags::empty(),
+ after_line_break: true,
+ after_whitespace: checkpoint.after_whitespace,
+ unicode_bom_length: checkpoint.unicode_bom_length,
+ diagnostics_pos: checkpoint.diagnostics_pos,
+ };
+
+ self.inner.rewind(rewind_checkpoint);
+ self.current = None;
+ self.lookahead.clear();
+
+ let kind = self.inner.next_token(context);
+ self.current = Some(self.inner.checkpoint());
+
+ kind
+ }
}
impl<'l, Lex> BufferedLexer
From 2df31960ca0926fa58cd2ad0e9a51961861bc06f Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Thu, 2 Apr 2026 21:19:29 -0400
Subject: [PATCH 2/7] fix(markdown_parser): parse quoted thematic breaks at
line start
---
.../biome_markdown_parser/src/syntax/quote.rs | 23 +++++++++++++++++++
.../biome_markdown_parser/tests/spec_test.rs | 1 +
2 files changed, 24 insertions(+)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index af909584bb16..13379cf01578 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -97,6 +97,7 @@ pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax {
p.state_mut().block_quote_depth += 1;
let marker_space = emit_quote_prefix_node(p);
+ force_relex_thematic_break_after_quote_prefix(p);
p.set_virtual_line_start();
parse_quote_block_list(p);
@@ -125,6 +126,27 @@ fn emit_quote_prefix_node(p: &mut MarkdownParser) -> bool {
marker_space
}
+/// After consuming a quote prefix, selectively re-lex the current token as if
+/// it were at line start when the remaining line could form a thematic break.
+///
+/// Re-lexing unconditionally perturbs ordinary quoted text tokenization by
+/// splitting leading spaces into separate tokens. We only need line-start
+/// semantics here for thematic-break candidates like `> ---`.
+fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) {
+ let is_thematic_break_candidate = p.at(T![-])
+ || p.at(T![*])
+ || p.at(UNDERSCORE)
+ || p.at(DOUBLE_UNDERSCORE)
+ || (p.at(MD_TEXTUAL_LITERAL)
+ && p.cur_text()
+ .chars()
+ .all(|c| c == ' ' || c == '\t' || c == '-' || c == '*' || c == '_'));
+
+ if is_thematic_break_candidate {
+ p.force_relex_at_line_start();
+ }
+}
+
/// Emit one quote prefix token sequence: [indent?] `>` [optional space/tab].
///
/// Returns whether a post-marker separator was consumed.
@@ -273,6 +295,7 @@ impl QuoteBlockList {
{
if has_quote_prefix(p, self.depth) {
consume_quote_prefix(p, self.depth);
+ force_relex_thematic_break_after_quote_prefix(p);
self.line_started_with_prefix = true;
} else {
return false;
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index 0b9e2dcf27c3..2f8a475bebb2 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -386,4 +386,5 @@ pub fn quick_test() {
"> Foo\n> ---\n",
"\nFoo
\n
\n",
);
+ test_example(20003, "> ---\n", "\n
\n
\n");
}
From 23de06d6715c4f7e394210ef8d1837639b1e0458 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:43:41 -0400
Subject: [PATCH 3/7] fix(review): use lookup table, fix mixed-char bug in
thematic break candidate, add tests
---
.../biome_markdown_parser/src/syntax/quote.rs | 37 +-
.../ok/setext_heading_in_blockquote.md | 19 +
.../ok/setext_heading_in_blockquote.md.snap | 530 +++++++++++++++++-
3 files changed, 579 insertions(+), 7 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index 13379cf01578..032e1fb62989 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -132,21 +132,52 @@ fn emit_quote_prefix_node(p: &mut MarkdownParser) -> bool {
/// Re-lexing unconditionally perturbs ordinary quoted text tokenization by
/// splitting leading spaces into separate tokens. We only need line-start
/// semantics here for thematic-break candidates like `> ---`.
+///
+/// A candidate is any line whose non-whitespace bytes are all the **same**
+/// thematic break character (`-`, `*`, or `_`). Per CommonMark §4.1, mixing
+/// different break characters (e.g. `_*-`) does **not** form a thematic break.
fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) {
let is_thematic_break_candidate = p.at(T![-])
|| p.at(T![*])
|| p.at(UNDERSCORE)
|| p.at(DOUBLE_UNDERSCORE)
|| (p.at(MD_TEXTUAL_LITERAL)
- && p.cur_text()
- .chars()
- .all(|c| c == ' ' || c == '\t' || c == '-' || c == '*' || c == '_'));
+ && is_thematic_break_candidate_text(p.cur_text()));
if is_thematic_break_candidate {
p.force_relex_at_line_start();
}
}
+/// Check if `text` could be a thematic break: all non-whitespace bytes must be
+/// the **same** thematic break character (`-`, `*`, or `_`).
+fn is_thematic_break_candidate_text(text: &str) -> bool {
+ use biome_unicode_table::{Dispatch::WHS, lookup_byte};
+
+ let mut break_char: Option = None;
+ for &b in text.as_bytes() {
+ // Skip whitespace (space, tab, etc.) via the shared lookup table.
+ if lookup_byte(b) == WHS {
+ continue;
+ }
+ match b {
+ b'-' | b'*' | b'_' => {
+ if let Some(expected) = break_char {
+ // Mixed break characters like `_*-` are not valid.
+ if b != expected {
+ return false;
+ }
+ } else {
+ break_char = Some(b);
+ }
+ }
+ // Any other non-whitespace byte disqualifies the line.
+ _ => return false,
+ }
+ }
+ break_char.is_some()
+}
+
/// Emit one quote prefix token sequence: [indent?] `>` [optional space/tab].
///
/// Returns whether a post-marker separator was consumed.
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
index ddbeeddc4b1b..9827d404a6f3 100644
--- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md
@@ -6,3 +6,22 @@
> > Nested
> > ---
+
+> Dashes with spaces
+> - - -
+
+> Stars
+> ***
+
+> Stars with spaces
+> * * *
+
+> Underscores
+> ___
+
+> Underscores with spaces
+> _ _ _
+
+> Mixed break chars are NOT thematic breaks (CommonMark §4.1),
+> so this line is a continuation paragraph, not a heading.
+> -*_
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
index 970e238098ba..227f0407701c 100644
--- a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading_in_blockquote.md.snap
@@ -15,6 +15,25 @@ expression: snapshot
> > Nested
> > ---
+> Dashes with spaces
+> - - -
+
+> Stars
+> ***
+
+> Stars with spaces
+> * * *
+
+> Underscores
+> ___
+
+> Underscores with spaces
+> _ _ _
+
+> Mixed break chars are NOT thematic breaks (CommonMark §4.1),
+> so this line is a continuation paragraph, not a heading.
+> -*_
+
```
@@ -128,17 +147,319 @@ MdDocument {
},
],
},
+ MdNewline {
+ value_token: NEWLINE@45..46 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@46..47 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@47..48 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@48..66 "Dashes with spaces" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@66..67 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@67..68 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@68..69 " " [] [],
+ },
+ MdThematicBreakBlock {
+ parts: MdThematicBreakPartList [
+ MdThematicBreakChar {
+ value: MINUS@69..70 "-" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@70..71 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: MINUS@71..72 "-" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@72..73 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: MINUS@73..74 "-" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@74..75 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@75..76 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@76..77 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@77..78 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@78..83 "Stars" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@83..84 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@84..85 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@85..86 " " [] [],
+ },
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@86..87 "*" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@87..88 "*" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@88..89 "*" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@89..90 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@90..91 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@91..92 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@92..93 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@93..110 "Stars with spaces" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@110..111 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@111..112 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@112..113 " " [] [],
+ },
+ MdThematicBreakBlock {
+ parts: MdThematicBreakPartList [
+ MdThematicBreakChar {
+ value: STAR@113..114 "*" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@114..115 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: STAR@115..116 "*" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@116..117 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: STAR@117..118 "*" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@118..119 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@119..120 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@120..121 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@121..122 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@122..133 "Underscores" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@133..134 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@134..135 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@135..136 " " [] [],
+ },
+ MdThematicBreakBlock {
+ parts: MdThematicBreakPartList [
+ MdThematicBreakChar {
+ value: UNDERSCORE@136..137 "_" [] [],
+ },
+ MdThematicBreakChar {
+ value: UNDERSCORE@137..138 "_" [] [],
+ },
+ MdThematicBreakChar {
+ value: UNDERSCORE@138..139 "_" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@139..140 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@140..141 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@141..142 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@142..143 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@143..166 "Underscores with spaces" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@166..167 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@167..168 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@168..169 " " [] [],
+ },
+ MdThematicBreakBlock {
+ parts: MdThematicBreakPartList [
+ MdThematicBreakChar {
+ value: UNDERSCORE@169..170 "_" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@170..171 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: UNDERSCORE@171..172 "_" [] [],
+ },
+ MdIndentToken {
+ md_indent_char_token: MD_INDENT_CHAR@172..173 " " [] [],
+ },
+ MdThematicBreakChar {
+ value: UNDERSCORE@173..174 "_" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@174..175 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@175..176 "\n" [] [],
+ },
+ MdQuote {
+ prefix: MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@176..177 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@177..178 " " [] [],
+ },
+ content: MdBlockList [
+ MdParagraph {
+ list: MdInlineItemList [
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@178..220 "Mixed break chars are NOT thematic breaks " [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@220..221 "(" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@221..237 "CommonMark §4.1" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@237..238 ")" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@238..239 "," [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@239..240 "\n" [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@240..241 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@241..242 " " [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@242..298 "so this line is a continuation paragraph, not a heading." [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@298..299 "\n" [] [],
+ },
+ MdQuotePrefix {
+ pre_marker_indent: MdQuoteIndentList [],
+ marker_token: R_ANGLE@299..300 ">" [] [],
+ post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@300..301 " " [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@301..302 "-" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@302..303 "*" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@303..304 "_" [] [],
+ },
+ MdTextual {
+ value_token: MD_TEXTUAL_LITERAL@304..305 "\n" [] [],
+ },
+ ],
+ hard_line: missing (optional),
+ },
+ ],
+ },
],
- eof_token: EOF@45..45 "" [] [],
+ eof_token: EOF@305..305 "" [] [],
}
```
## CST
```
-0: MD_DOCUMENT@0..45
+0: MD_DOCUMENT@0..305
0: (empty)
- 1: MD_BLOCK_LIST@0..45
+ 1: MD_BLOCK_LIST@0..305
0: MD_QUOTE@0..12
0: MD_QUOTE_PREFIX@0..2
0: MD_QUOTE_INDENT_LIST@0..0
@@ -210,6 +531,207 @@ MdDocument {
1: MD_SETEXT_UNDERLINE_LITERAL@41..44 "---" [] []
1: MD_NEWLINE@44..45
0: NEWLINE@44..45 "\n" [] []
- 2: EOF@45..45 "" [] []
+ 5: MD_NEWLINE@45..46
+ 0: NEWLINE@45..46 "\n" [] []
+ 6: MD_QUOTE@46..75
+ 0: MD_QUOTE_PREFIX@46..48
+ 0: MD_QUOTE_INDENT_LIST@46..46
+ 1: R_ANGLE@46..47 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@47..48 " " [] []
+ 1: MD_BLOCK_LIST@48..75
+ 0: MD_PARAGRAPH@48..67
+ 0: MD_INLINE_ITEM_LIST@48..67
+ 0: MD_TEXTUAL@48..66
+ 0: MD_TEXTUAL_LITERAL@48..66 "Dashes with spaces" [] []
+ 1: MD_TEXTUAL@66..67
+ 0: MD_TEXTUAL_LITERAL@66..67 "\n" [] []
+ 1: (empty)
+ 1: MD_QUOTE_PREFIX@67..69
+ 0: MD_QUOTE_INDENT_LIST@67..67
+ 1: R_ANGLE@67..68 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@68..69 " " [] []
+ 2: MD_THEMATIC_BREAK_BLOCK@69..74
+ 0: MD_THEMATIC_BREAK_PART_LIST@69..74
+ 0: MD_THEMATIC_BREAK_CHAR@69..70
+ 0: MINUS@69..70 "-" [] []
+ 1: MD_INDENT_TOKEN@70..71
+ 0: MD_INDENT_CHAR@70..71 " " [] []
+ 2: MD_THEMATIC_BREAK_CHAR@71..72
+ 0: MINUS@71..72 "-" [] []
+ 3: MD_INDENT_TOKEN@72..73
+ 0: MD_INDENT_CHAR@72..73 " " [] []
+ 4: MD_THEMATIC_BREAK_CHAR@73..74
+ 0: MINUS@73..74 "-" [] []
+ 3: MD_NEWLINE@74..75
+ 0: NEWLINE@74..75 "\n" [] []
+ 7: MD_NEWLINE@75..76
+ 0: NEWLINE@75..76 "\n" [] []
+ 8: MD_QUOTE@76..90
+ 0: MD_QUOTE_PREFIX@76..78
+ 0: MD_QUOTE_INDENT_LIST@76..76
+ 1: R_ANGLE@76..77 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@77..78 " " [] []
+ 1: MD_BLOCK_LIST@78..90
+ 0: MD_PARAGRAPH@78..84
+ 0: MD_INLINE_ITEM_LIST@78..84
+ 0: MD_TEXTUAL@78..83
+ 0: MD_TEXTUAL_LITERAL@78..83 "Stars" [] []
+ 1: MD_TEXTUAL@83..84
+ 0: MD_TEXTUAL_LITERAL@83..84 "\n" [] []
+ 1: (empty)
+ 1: MD_QUOTE_PREFIX@84..86
+ 0: MD_QUOTE_INDENT_LIST@84..84
+ 1: R_ANGLE@84..85 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@85..86 " " [] []
+ 2: MD_PARAGRAPH@86..90
+ 0: MD_INLINE_ITEM_LIST@86..90
+ 0: MD_TEXTUAL@86..87
+ 0: MD_TEXTUAL_LITERAL@86..87 "*" [] []
+ 1: MD_TEXTUAL@87..88
+ 0: MD_TEXTUAL_LITERAL@87..88 "*" [] []
+ 2: MD_TEXTUAL@88..89
+ 0: MD_TEXTUAL_LITERAL@88..89 "*" [] []
+ 3: MD_TEXTUAL@89..90
+ 0: MD_TEXTUAL_LITERAL@89..90 "\n" [] []
+ 1: (empty)
+ 9: MD_NEWLINE@90..91
+ 0: NEWLINE@90..91 "\n" [] []
+ 10: MD_QUOTE@91..119
+ 0: MD_QUOTE_PREFIX@91..93
+ 0: MD_QUOTE_INDENT_LIST@91..91
+ 1: R_ANGLE@91..92 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@92..93 " " [] []
+ 1: MD_BLOCK_LIST@93..119
+ 0: MD_PARAGRAPH@93..111
+ 0: MD_INLINE_ITEM_LIST@93..111
+ 0: MD_TEXTUAL@93..110
+ 0: MD_TEXTUAL_LITERAL@93..110 "Stars with spaces" [] []
+ 1: MD_TEXTUAL@110..111
+ 0: MD_TEXTUAL_LITERAL@110..111 "\n" [] []
+ 1: (empty)
+ 1: MD_QUOTE_PREFIX@111..113
+ 0: MD_QUOTE_INDENT_LIST@111..111
+ 1: R_ANGLE@111..112 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@112..113 " " [] []
+ 2: MD_THEMATIC_BREAK_BLOCK@113..118
+ 0: MD_THEMATIC_BREAK_PART_LIST@113..118
+ 0: MD_THEMATIC_BREAK_CHAR@113..114
+ 0: STAR@113..114 "*" [] []
+ 1: MD_INDENT_TOKEN@114..115
+ 0: MD_INDENT_CHAR@114..115 " " [] []
+ 2: MD_THEMATIC_BREAK_CHAR@115..116
+ 0: STAR@115..116 "*" [] []
+ 3: MD_INDENT_TOKEN@116..117
+ 0: MD_INDENT_CHAR@116..117 " " [] []
+ 4: MD_THEMATIC_BREAK_CHAR@117..118
+ 0: STAR@117..118 "*" [] []
+ 3: MD_NEWLINE@118..119
+ 0: NEWLINE@118..119 "\n" [] []
+ 11: MD_NEWLINE@119..120
+ 0: NEWLINE@119..120 "\n" [] []
+ 12: MD_QUOTE@120..140
+ 0: MD_QUOTE_PREFIX@120..122
+ 0: MD_QUOTE_INDENT_LIST@120..120
+ 1: R_ANGLE@120..121 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@121..122 " " [] []
+ 1: MD_BLOCK_LIST@122..140
+ 0: MD_PARAGRAPH@122..134
+ 0: MD_INLINE_ITEM_LIST@122..134
+ 0: MD_TEXTUAL@122..133
+ 0: MD_TEXTUAL_LITERAL@122..133 "Underscores" [] []
+ 1: MD_TEXTUAL@133..134
+ 0: MD_TEXTUAL_LITERAL@133..134 "\n" [] []
+ 1: (empty)
+ 1: MD_QUOTE_PREFIX@134..136
+ 0: MD_QUOTE_INDENT_LIST@134..134
+ 1: R_ANGLE@134..135 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@135..136 " " [] []
+ 2: MD_THEMATIC_BREAK_BLOCK@136..139
+ 0: MD_THEMATIC_BREAK_PART_LIST@136..139
+ 0: MD_THEMATIC_BREAK_CHAR@136..137
+ 0: UNDERSCORE@136..137 "_" [] []
+ 1: MD_THEMATIC_BREAK_CHAR@137..138
+ 0: UNDERSCORE@137..138 "_" [] []
+ 2: MD_THEMATIC_BREAK_CHAR@138..139
+ 0: UNDERSCORE@138..139 "_" [] []
+ 3: MD_NEWLINE@139..140
+ 0: NEWLINE@139..140 "\n" [] []
+ 13: MD_NEWLINE@140..141
+ 0: NEWLINE@140..141 "\n" [] []
+ 14: MD_QUOTE@141..175
+ 0: MD_QUOTE_PREFIX@141..143
+ 0: MD_QUOTE_INDENT_LIST@141..141
+ 1: R_ANGLE@141..142 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@142..143 " " [] []
+ 1: MD_BLOCK_LIST@143..175
+ 0: MD_PARAGRAPH@143..167
+ 0: MD_INLINE_ITEM_LIST@143..167
+ 0: MD_TEXTUAL@143..166
+ 0: MD_TEXTUAL_LITERAL@143..166 "Underscores with spaces" [] []
+ 1: MD_TEXTUAL@166..167
+ 0: MD_TEXTUAL_LITERAL@166..167 "\n" [] []
+ 1: (empty)
+ 1: MD_QUOTE_PREFIX@167..169
+ 0: MD_QUOTE_INDENT_LIST@167..167
+ 1: R_ANGLE@167..168 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@168..169 " " [] []
+ 2: MD_THEMATIC_BREAK_BLOCK@169..174
+ 0: MD_THEMATIC_BREAK_PART_LIST@169..174
+ 0: MD_THEMATIC_BREAK_CHAR@169..170
+ 0: UNDERSCORE@169..170 "_" [] []
+ 1: MD_INDENT_TOKEN@170..171
+ 0: MD_INDENT_CHAR@170..171 " " [] []
+ 2: MD_THEMATIC_BREAK_CHAR@171..172
+ 0: UNDERSCORE@171..172 "_" [] []
+ 3: MD_INDENT_TOKEN@172..173
+ 0: MD_INDENT_CHAR@172..173 " " [] []
+ 4: MD_THEMATIC_BREAK_CHAR@173..174
+ 0: UNDERSCORE@173..174 "_" [] []
+ 3: MD_NEWLINE@174..175
+ 0: NEWLINE@174..175 "\n" [] []
+ 15: MD_NEWLINE@175..176
+ 0: NEWLINE@175..176 "\n" [] []
+ 16: MD_QUOTE@176..305
+ 0: MD_QUOTE_PREFIX@176..178
+ 0: MD_QUOTE_INDENT_LIST@176..176
+ 1: R_ANGLE@176..177 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@177..178 " " [] []
+ 1: MD_BLOCK_LIST@178..305
+ 0: MD_PARAGRAPH@178..305
+ 0: MD_INLINE_ITEM_LIST@178..305
+ 0: MD_TEXTUAL@178..220
+ 0: MD_TEXTUAL_LITERAL@178..220 "Mixed break chars are NOT thematic breaks " [] []
+ 1: MD_TEXTUAL@220..221
+ 0: MD_TEXTUAL_LITERAL@220..221 "(" [] []
+ 2: MD_TEXTUAL@221..237
+ 0: MD_TEXTUAL_LITERAL@221..237 "CommonMark §4.1" [] []
+ 3: MD_TEXTUAL@237..238
+ 0: MD_TEXTUAL_LITERAL@237..238 ")" [] []
+ 4: MD_TEXTUAL@238..239
+ 0: MD_TEXTUAL_LITERAL@238..239 "," [] []
+ 5: MD_TEXTUAL@239..240
+ 0: MD_TEXTUAL_LITERAL@239..240 "\n" [] []
+ 6: MD_QUOTE_PREFIX@240..242
+ 0: MD_QUOTE_INDENT_LIST@240..240
+ 1: R_ANGLE@240..241 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@241..242 " " [] []
+ 7: MD_TEXTUAL@242..298
+ 0: MD_TEXTUAL_LITERAL@242..298 "so this line is a continuation paragraph, not a heading." [] []
+ 8: MD_TEXTUAL@298..299
+ 0: MD_TEXTUAL_LITERAL@298..299 "\n" [] []
+ 9: MD_QUOTE_PREFIX@299..301
+ 0: MD_QUOTE_INDENT_LIST@299..299
+ 1: R_ANGLE@299..300 ">" [] []
+ 2: MD_QUOTE_POST_MARKER_SPACE@300..301 " " [] []
+ 10: MD_TEXTUAL@301..302
+ 0: MD_TEXTUAL_LITERAL@301..302 "-" [] []
+ 11: MD_TEXTUAL@302..303
+ 0: MD_TEXTUAL_LITERAL@302..303 "*" [] []
+ 12: MD_TEXTUAL@303..304
+ 0: MD_TEXTUAL_LITERAL@303..304 "_" [] []
+ 13: MD_TEXTUAL@304..305
+ 0: MD_TEXTUAL_LITERAL@304..305 "\n" [] []
+ 1: (empty)
+ 2: EOF@305..305 "" [] []
```
From e753161117559ac2c3cdaa2dd6d813a95ff8952f Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Fri, 3 Apr 2026 13:52:52 +0000
Subject: [PATCH 4/7] [autofix.ci] apply automated fixes
---
crates/biome_markdown_parser/src/syntax/quote.rs | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index 032e1fb62989..e9a31062a394 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -141,8 +141,7 @@ fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) {
|| p.at(T![*])
|| p.at(UNDERSCORE)
|| p.at(DOUBLE_UNDERSCORE)
- || (p.at(MD_TEXTUAL_LITERAL)
- && is_thematic_break_candidate_text(p.cur_text()));
+ || (p.at(MD_TEXTUAL_LITERAL) && is_thematic_break_candidate_text(p.cur_text()));
if is_thematic_break_candidate {
p.force_relex_at_line_start();
From 633b0c59a842dcfb7bd95a36c405d0c2de0181f8 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Fri, 3 Apr 2026 10:26:55 -0400
Subject: [PATCH 5/7] fix(markdown): relex quoted thematic breaks after
indented code
---
crates/biome_markdown_parser/src/syntax/quote.rs | 9 +++++++--
crates/biome_markdown_parser/tests/spec_test.rs | 6 ++++++
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index e9a31062a394..cf0495aae3a1 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -97,7 +97,7 @@ pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax {
p.state_mut().block_quote_depth += 1;
let marker_space = emit_quote_prefix_node(p);
- force_relex_thematic_break_after_quote_prefix(p);
+ relex_after_quote_prefix_consumed(p);
p.set_virtual_line_start();
parse_quote_block_list(p);
@@ -148,6 +148,10 @@ fn force_relex_thematic_break_after_quote_prefix(p: &mut MarkdownParser) {
}
}
+fn relex_after_quote_prefix_consumed(p: &mut MarkdownParser) {
+ force_relex_thematic_break_after_quote_prefix(p);
+}
+
/// Check if `text` could be a thematic break: all non-whitespace bytes must be
/// the **same** thematic break character (`-`, `*`, or `_`).
fn is_thematic_break_candidate_text(text: &str) -> bool {
@@ -325,7 +329,7 @@ impl QuoteBlockList {
{
if has_quote_prefix(p, self.depth) {
consume_quote_prefix(p, self.depth);
- force_relex_thematic_break_after_quote_prefix(p);
+ relex_after_quote_prefix_consumed(p);
self.line_started_with_prefix = true;
} else {
return false;
@@ -586,6 +590,7 @@ fn parse_code_block_newline(p: &mut MarkdownParser, depth: usize) -> bool {
}
consume_quote_prefix(p, depth);
+ relex_after_quote_prefix_consumed(p);
// Blank lines (consecutive newlines) are allowed in indented code
if p.at(NEWLINE) {
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index 2f8a475bebb2..1589cbf45da9 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -288,6 +288,12 @@ pub fn quick_test() {
"> ```\n> hello\n> ```\n",
"\nhello\n
\n
\n",
);
+ // Quoted indented code must terminate before a quoted thematic break.
+ test_example(
+ 99921,
+ "> code\n> ---\n",
+ "\ncode\n
\n
\n
\n",
+ );
test_example(
9993,
"- foo\n - bar\n",
From 71f073b46b80456dacdb41c069d40109b3ffa496 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Fri, 3 Apr 2026 10:37:38 -0400
Subject: [PATCH 6/7] fix(markdown): stop quoted code before thematic break
---
.../biome_markdown_parser/src/syntax/quote.rs | 20 ++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index cf0495aae3a1..aed2f989813d 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -589,16 +589,30 @@ fn parse_code_block_newline(p: &mut MarkdownParser, depth: usize) -> bool {
return false;
}
+ let continues_code_block = p.lookahead(|p| {
+ consume_quote_prefix(p, depth);
+
+ // Blank lines (consecutive newlines) are allowed in indented code.
+ if p.at(NEWLINE) {
+ return true;
+ }
+
+ at_quote_indented_code_start(p)
+ });
+
+ if !continues_code_block {
+ return false;
+ }
+
consume_quote_prefix(p, depth);
relex_after_quote_prefix_consumed(p);
- // Blank lines (consecutive newlines) are allowed in indented code
+ // Blank lines (consecutive newlines) are allowed in indented code.
if p.at(NEWLINE) {
return true;
}
- // Next line must still be indented to continue the code block
- at_quote_indented_code_start(p)
+ true
}
/// Parse a single textual token in an indented code block.
From e9eca1242ef0bc3586b76372b02f840d81173af4 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Sun, 5 Apr 2026 19:29:59 -0400
Subject: [PATCH 7/7] refactor: use dispatch table for thematic break char
matching
Address review feedback: use `biome_unicode_table` dispatch variants
(MIN, MUL, IDT) instead of raw byte literals for thematic break
character matching in `is_thematic_break_candidate_text`.
---
.../biome_markdown_parser/src/syntax/quote.rs | 31 ++++++++++++-------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
index aed2f989813d..20e3153eeb56 100644
--- a/crates/biome_markdown_parser/src/syntax/quote.rs
+++ b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -155,27 +155,34 @@ fn relex_after_quote_prefix_consumed(p: &mut MarkdownParser) {
/// Check if `text` could be a thematic break: all non-whitespace bytes must be
/// the **same** thematic break character (`-`, `*`, or `_`).
fn is_thematic_break_candidate_text(text: &str) -> bool {
- use biome_unicode_table::{Dispatch::WHS, lookup_byte};
+ use biome_unicode_table::{
+ Dispatch::{IDT, MIN, MUL, WHS},
+ lookup_byte,
+ };
let mut break_char: Option = None;
for &b in text.as_bytes() {
+ let dispatched = lookup_byte(b);
// Skip whitespace (space, tab, etc.) via the shared lookup table.
- if lookup_byte(b) == WHS {
+ if dispatched == WHS {
continue;
}
- match b {
- b'-' | b'*' | b'_' => {
- if let Some(expected) = break_char {
- // Mixed break characters like `_*-` are not valid.
- if b != expected {
- return false;
- }
- } else {
- break_char = Some(b);
+ // Match thematic break characters via dispatch variants:
+ // MIN = `-`, MUL = `*`, IDT = `_` (IDT also covers letters, so
+ // narrow to `b'_'` explicitly).
+ let is_break_char = matches!(dispatched, MIN | MUL) || (dispatched == IDT && b == b'_');
+ if is_break_char {
+ if let Some(expected) = break_char {
+ // Mixed break characters like `_*-` are not valid.
+ if b != expected {
+ return false;
}
+ } else {
+ break_char = Some(b);
}
+ } else {
// Any other non-whitespace byte disqualifies the line.
- _ => return false,
+ return false;
}
}
break_char.is_some()