From 95747c55aa01729b86663998aeb4dc2989fb130b Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:02:18 -0400
Subject: [PATCH 1/4] fix(markdown_parser): prefer list item over thematic
break for `- ---`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When the lexer produces `MD_THEMATIC_BREAK_LITERAL` for a line like
`- ---`, the thematic break interpretation won because it was checked
before list items in the block dispatcher.
Per CommonMark §5.2/§4.1 (and verified against commonmark.js +
markdown-it), when stripping a bullet marker + space from the token
leaves content that is itself a valid thematic break (3+ matching
chars), the list item interpretation should win. E.g.:
- `- ---` → list item containing
(3 chars remain)
- `- - -` → thematic break (only 2 chars remain after marker)
The fix adds a parser-side guard (`thematic_break_hides_list_item`)
that inspects the token text. When triggered, the token is re-lexed
via `ThematicBreakParts` context to expose the individual marker
tokens, then list item parsing proceeds normally.
---
.../biome_markdown_parser/src/syntax/mod.rs | 36 ++++--
.../src/syntax/thematic_break_block.rs | 41 +++++++
.../ok/thematic_break_in_list.md.snap | 109 ++++++++++--------
.../biome_markdown_parser/tests/spec_test.rs | 43 +++++--
4 files changed, 159 insertions(+), 70 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs
index d778917c0d86..14934aa6165e 100644
--- a/crates/biome_markdown_parser/src/syntax/mod.rs
+++ b/crates/biome_markdown_parser/src/syntax/mod.rs
@@ -58,7 +58,9 @@ use quote::{
at_quote, consume_quote_prefix, consume_quote_prefix_without_virtual, has_quote_prefix,
line_has_quote_prefix_at_current, parse_quote,
};
-use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block};
+use thematic_break_block::{
+ at_thematic_break_block, parse_thematic_break_block, thematic_break_hides_list_item,
+};
use crate::MarkdownParser;
@@ -289,17 +291,29 @@ pub(crate) fn parse_any_block_with_indent_code_policy(
} else if line_starts_with_fence(p) {
parse_fenced_code_block_force(p)
} else if at_thematic_break_block(p) {
- let break_block = try_parse(p, |p| {
- let break_block = parse_thematic_break_block(p);
- if break_block.is_absent() {
- return Err(());
- }
- Ok(break_block)
- });
- if let Ok(parsed) = break_block {
- parsed
+ // Per CommonMark §5.2 / §4.1: when the thematic break token starts with
+ // a bullet marker + space and the remaining content is itself a valid
+ // thematic break (3+ chars), the list item interpretation wins.
+ // E.g. `- ---` → list item containing
,
+ // but `- - -` → thematic break (only 2 chars after marker).
+ let is_hidden_list_item =
+ p.at(MD_THEMATIC_BREAK_LITERAL) && thematic_break_hides_list_item(p.cur_text());
+ if is_hidden_list_item {
+ p.force_relex_thematic_break_parts();
+ parse_bullet_list_item(p)
} else {
- parse_paragraph(p)
+ let break_block = try_parse(p, |p| {
+ let break_block = parse_thematic_break_block(p);
+ if break_block.is_absent() {
+ return Err(());
+ }
+ Ok(break_block)
+ });
+ if let Ok(parsed) = break_block {
+ parsed
+ } else {
+ parse_paragraph(p)
+ }
}
} else if at_header(p) {
// Check for too many hashes BEFORE try_parse (which would lose diagnostics on rewind)
diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
index 89d3bce075d7..b2879871418c 100644
--- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
@@ -42,6 +42,47 @@ pub(crate) fn at_thematic_break_block(p: &mut MarkdownParser) -> bool {
})
}
+/// Check if a `MD_THEMATIC_BREAK_LITERAL` token text should actually be parsed
+/// as a bullet list item whose content is a thematic break.
+///
+/// Returns `true` when the text can be split as:
+/// `bullet_marker` + `space/tab` + `consecutive_thematic_break`
+///
+/// The payload must be a CONSECUTIVE run of 3+ matching break characters
+/// with no internal spaces. This distinguishes:
+/// `- ---` → list item (payload `---` is consecutive)
+/// `- - -` → thematic break (payload `- -` has internal spaces)
+/// `- - - -` → thematic break (payload `- - -` has internal spaces)
+///
+/// Only bullet markers (`-`, `*`, `+`) are checked — ordered list markers
+/// cannot collide with thematic break characters.
+pub(crate) fn thematic_break_hides_list_item(text: &str) -> bool {
+ let bytes = text.as_bytes();
+ // Need at least: marker (1) + space (1) + 3 break chars = 5 bytes
+ if bytes.len() < 5 {
+ return false;
+ }
+ if !matches!(bytes[0], b'-' | b'*' | b'+') {
+ return false;
+ }
+ if !matches!(bytes[1], b' ' | b'\t') {
+ return false;
+ }
+
+ // The payload (after marker + space) must be 3+ consecutive matching
+ // break characters, optionally followed by trailing whitespace only.
+ let payload = text[2..].trim_end_matches([' ', '\t']);
+ let payload_bytes = payload.as_bytes();
+ if payload_bytes.len() < THEMATIC_BREAK_MIN_CHARS {
+ return false;
+ }
+ let break_char = payload_bytes[0];
+ if !matches!(break_char, b'-' | b'*' | b'_') {
+ return false;
+ }
+ payload_bytes.iter().all(|&b| b == break_char)
+}
+
/// Check if the remaining content forms a thematic break pattern.
///
/// Per CommonMark §4.1, a thematic break is 3 or more matching characters
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
index b8a714a0ef85..6e74597ccdac 100644
--- a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
@@ -70,33 +70,37 @@ MdDocument {
MdNewline {
value_token: NEWLINE@8..9 "\n" [] [],
},
- MdThematicBreakBlock {
- parts: MdThematicBreakPartList [
- MdThematicBreakChar {
- value: MINUS@9..10 "-" [] [],
- },
- MdIndentToken {
- md_indent_char_token: MD_INDENT_CHAR@10..11 " " [] [],
- },
- MdThematicBreakChar {
- value: MINUS@11..12 "-" [] [],
- },
- MdThematicBreakChar {
- value: MINUS@12..13 "-" [] [],
- },
- MdThematicBreakChar {
- value: MINUS@13..14 "-" [] [],
- },
- ],
- },
- MdNewline {
- value_token: NEWLINE@14..15 "\n" [] [],
- },
- MdNewline {
- value_token: NEWLINE@15..16 "\n" [] [],
- },
MdBulletListItem {
md_bullet_list: MdBulletList [
+ MdBullet {
+ prefix: MdListMarkerPrefix {
+ pre_marker_indent: MdIndentTokenList [],
+ marker: MINUS@9..10 "-" [] [],
+ post_marker_space_token: MD_LIST_POST_MARKER_SPACE@10..11 " " [] [],
+ content_indent: MdIndentTokenList [],
+ },
+ content: MdBlockList [
+ MdThematicBreakBlock {
+ parts: MdThematicBreakPartList [
+ MdThematicBreakChar {
+ value: MINUS@11..12 "-" [] [],
+ },
+ MdThematicBreakChar {
+ value: MINUS@12..13 "-" [] [],
+ },
+ MdThematicBreakChar {
+ value: MINUS@13..14 "-" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@14..15 "\n" [] [],
+ },
+ ],
+ },
+ MdNewline {
+ value_token: NEWLINE@15..16 "\n" [] [],
+ },
MdBullet {
prefix: MdListMarkerPrefix {
pre_marker_indent: MdIndentTokenList [],
@@ -332,25 +336,28 @@ MdDocument {
0: NEWLINE@7..8 "\n" [] []
2: MD_NEWLINE@8..9
0: NEWLINE@8..9 "\n" [] []
- 3: MD_THEMATIC_BREAK_BLOCK@9..14
- 0: MD_THEMATIC_BREAK_PART_LIST@9..14
- 0: MD_THEMATIC_BREAK_CHAR@9..10
- 0: MINUS@9..10 "-" [] []
- 1: MD_INDENT_TOKEN@10..11
- 0: MD_INDENT_CHAR@10..11 " " [] []
- 2: MD_THEMATIC_BREAK_CHAR@11..12
- 0: MINUS@11..12 "-" [] []
- 3: MD_THEMATIC_BREAK_CHAR@12..13
- 0: MINUS@12..13 "-" [] []
- 4: MD_THEMATIC_BREAK_CHAR@13..14
- 0: MINUS@13..14 "-" [] []
- 4: MD_NEWLINE@14..15
- 0: NEWLINE@14..15 "\n" [] []
- 5: MD_NEWLINE@15..16
- 0: NEWLINE@15..16 "\n" [] []
- 6: MD_BULLET_LIST_ITEM@16..21
- 0: MD_BULLET_LIST@16..21
- 0: MD_BULLET@16..21
+ 3: MD_BULLET_LIST_ITEM@9..21
+ 0: MD_BULLET_LIST@9..21
+ 0: MD_BULLET@9..15
+ 0: MD_LIST_MARKER_PREFIX@9..11
+ 0: MD_INDENT_TOKEN_LIST@9..9
+ 1: MINUS@9..10 "-" [] []
+ 2: MD_LIST_POST_MARKER_SPACE@10..11 " " [] []
+ 3: MD_INDENT_TOKEN_LIST@11..11
+ 1: MD_BLOCK_LIST@11..15
+ 0: MD_THEMATIC_BREAK_BLOCK@11..14
+ 0: MD_THEMATIC_BREAK_PART_LIST@11..14
+ 0: MD_THEMATIC_BREAK_CHAR@11..12
+ 0: MINUS@11..12 "-" [] []
+ 1: MD_THEMATIC_BREAK_CHAR@12..13
+ 0: MINUS@12..13 "-" [] []
+ 2: MD_THEMATIC_BREAK_CHAR@13..14
+ 0: MINUS@13..14 "-" [] []
+ 1: MD_NEWLINE@14..15
+ 0: NEWLINE@14..15 "\n" [] []
+ 1: MD_NEWLINE@15..16
+ 0: NEWLINE@15..16 "\n" [] []
+ 2: MD_BULLET@16..21
0: MD_LIST_MARKER_PREFIX@16..18
0: MD_INDENT_TOKEN_LIST@16..16
1: MINUS@16..17 "-" [] []
@@ -365,11 +372,11 @@ MdDocument {
0: UNDERSCORE@19..20 "_" [] []
2: MD_THEMATIC_BREAK_CHAR@20..21
0: UNDERSCORE@20..21 "_" [] []
- 7: MD_NEWLINE@21..22
+ 4: MD_NEWLINE@21..22
0: NEWLINE@21..22 "\n" [] []
- 8: MD_NEWLINE@22..23
+ 5: MD_NEWLINE@22..23
0: NEWLINE@22..23 "\n" [] []
- 9: MD_BULLET_LIST_ITEM@23..28
+ 6: MD_BULLET_LIST_ITEM@23..28
0: MD_BULLET_LIST@23..28
0: MD_BULLET@23..28
0: MD_LIST_MARKER_PREFIX@23..25
@@ -386,11 +393,11 @@ MdDocument {
0: MINUS@26..27 "-" [] []
2: MD_THEMATIC_BREAK_CHAR@27..28
0: MINUS@27..28 "-" [] []
- 10: MD_NEWLINE@28..29
+ 7: MD_NEWLINE@28..29
0: NEWLINE@28..29 "\n" [] []
- 11: MD_NEWLINE@29..30
+ 8: MD_NEWLINE@29..30
0: NEWLINE@29..30 "\n" [] []
- 12: MD_BULLET_LIST_ITEM@30..62
+ 9: MD_BULLET_LIST_ITEM@30..62
0: MD_BULLET_LIST@30..62
0: MD_BULLET@30..37
0: MD_LIST_MARKER_PREFIX@30..32
@@ -476,7 +483,7 @@ MdDocument {
0: UNDERSCORE@60..61 "_" [] []
4: MD_THEMATIC_BREAK_CHAR@61..62
0: UNDERSCORE@61..62 "_" [] []
- 13: MD_NEWLINE@62..63
+ 10: MD_NEWLINE@62..63
0: NEWLINE@62..63 "\n" [] []
2: EOF@63..63 "" [] []
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index d4c34ce710bf..508647d04aec 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -418,12 +418,10 @@ pub fn quick_test() {
"\n\n",
);
// Reduce: thematic break in list then different marker
- // NOTE: `- ---` is a pre-existing Biome bug where it parses as a top-level
- // thematic break instead of a list item containing
.
test_example(
30013,
"- ---\n\n+ item\n",
- "
\n\n",
+ "\n\n",
);
// Reduce: setext heading in list then different marker
test_example(
@@ -459,6 +457,38 @@ pub fn quick_test() {
"- outer\n - nested\n lazy line\nhello\n",
"\n- outer\n
\n- nested\nlazy line\nhello
\n
\n \n
\n",
);
+
+ // #region Thematic break vs list item precedence
+ //
+ // When a bullet marker + space leaves content that is itself a valid
+ // thematic break (3+ consecutive matching chars), the list item wins.
+ // When removing the marker leaves spaced or < 3 chars, it stays a break.
+
+ // `- ---` → list item containing
(3 consecutive dashes after `- `)
+ test_example(30020, "- ---\n", "\n");
+ // `* ***` → list item containing
(3 consecutive stars after `* `)
+ test_example(30021, "* ***\n", "\n");
+ // `+ ___` → list item containing
(3 consecutive underscores after `+ `)
+ test_example(30022, "+ ___\n", "\n");
+ // `- ---` with following content and marker change
+ test_example(
+ 30023,
+ "- ---\n\n+ item\n",
+ "\n\n",
+ );
+
+ // These remain thematic breaks — removing the marker leaves spaced or < 3 chars.
+ // `- - -` → thematic break (payload `- -` has internal spaces)
+ test_example(30024, "- - -\n", "
\n");
+ // `* * *` → thematic break (payload `* *` has internal spaces)
+ test_example(30025, "* * *\n", "
\n");
+ // Plain `---` → thematic break (no list marker prefix)
+ test_example(30026, "---\n", "
\n");
+ // `***` → thematic break
+ test_example(30027, "***\n", "
\n");
+ // `___` → thematic break (underscore is not a bullet marker)
+ test_example(30028, "___\n", "
\n");
+ // #endregion
}
fn fuzz_test_example(num: u32, input: &str, expected: &str) {
@@ -492,16 +522,12 @@ fn fuzz_mixed_markers_paragraph() {
);
}
-/// NOTE: `- ---` is parsed by Biome as a top-level thematic break rather than
-/// a list item containing `
`. This is a separate pre-existing bug
-/// (thematic break precedence over list marker) unrelated to the mixed-marker
-/// list-split fix. The expected value here matches Biome's current behavior.
#[test]
fn fuzz_mixed_markers_thematic_break() {
fuzz_test_example(
3,
"- ---\n\n+ item\n",
- "
\n\n",
+ "\n\n",
);
}
@@ -549,3 +575,4 @@ fn fuzz_code_after_list_not_absorbed() {
"\ncode here\n
\n",
);
}
+
From a5f958e4beacd2aecbdd2604f11ff47f2b2885e8 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Sun, 12 Apr 2026 18:46:09 +0000
Subject: [PATCH 2/4] [autofix.ci] apply automated fixes
---
crates/biome_markdown_parser/tests/spec_test.rs | 1 -
1 file changed, 1 deletion(-)
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index 508647d04aec..db089d8b807a 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -575,4 +575,3 @@ fn fuzz_code_after_list_not_absorbed() {
"\ncode here\n
\n",
);
}
-
From bd64d6901c34dd6f87ed695642350adee1dc331c Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:19:10 -0400
Subject: [PATCH 3/4] refactor(markdown_parser): use lookup_byte for thematic
break marker classification
Route `*`, `-`, and `_` classification through `biome_unicode_table::lookup_byte`
via a shared `is_break_marker` helper, following the project convention. Whitespace
checks (`' '`/`'\t'`) are kept explicit since `WHS` is semantically broader than
what CommonMark requires here.
---
.../src/syntax/thematic_break_block.rs | 49 +++++++++++++------
1 file changed, 33 insertions(+), 16 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
index b2879871418c..42dd74e19f8e 100644
--- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
@@ -21,10 +21,25 @@ use biome_parser::{
Parser,
prelude::ParsedSyntax::{self, *},
};
+use biome_unicode_table::Dispatch::{IDT, MIN, MUL};
+use biome_unicode_table::lookup_byte;
/// CommonMark requires 3 or more matching characters for thematic breaks.
const THEMATIC_BREAK_MIN_CHARS: usize = 3;
+/// Whether `byte` is a thematic break marker character (`*`, `-`, or `_`).
+///
+/// Uses the `biome_unicode_table` lookup table for `*` (`MUL`) and `-` (`MIN`).
+/// `_` shares the `IDT` dispatch variant with ASCII letters, so an explicit
+/// byte check is required to disambiguate.
+fn is_break_marker(byte: u8) -> bool {
+ match lookup_byte(byte) {
+ MUL | MIN => true,
+ IDT => byte == b'_',
+ _ => false,
+ }
+}
+
pub(crate) fn at_thematic_break_block(p: &mut MarkdownParser) -> bool {
p.lookahead(|p| {
if p.at_line_start() || p.at_start_of_input() {
@@ -101,22 +116,25 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool {
// If the entire line segment is a single textual literal, validate it directly.
if p.at(MD_TEXTUAL_LITERAL)
&& p.cur_text()
- .chars()
- .all(|c| c == ' ' || c == '\t' || c == '*' || c == '-' || c == '_')
+ .bytes()
+ .all(|b| b == b' ' || b == b'\t' || is_break_marker(b))
{
- let mut break_char = None;
+ let mut break_byte = None;
let mut break_count = 0usize;
- for c in p.cur_text().chars() {
- if c == ' ' || c == '\t' {
+ for b in p.cur_text().bytes() {
+ if b == b' ' || b == b'\t' {
continue;
}
- if let Some(existing) = break_char {
- if existing != c {
+ if !is_break_marker(b) {
+ return false;
+ }
+ if let Some(existing) = break_byte {
+ if existing != b {
return false;
}
} else {
- break_char = Some(c);
+ break_byte = Some(b);
}
break_count += 1;
}
@@ -143,11 +161,11 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool {
} else if p.at(MD_TEXTUAL_LITERAL) {
let text = p.cur_text();
if text.len() == 1 {
- match text.chars().next() {
- Some('*') => '*',
- Some('-') => '-',
- Some('_') => '_',
- _ => return false,
+ let b = text.as_bytes()[0];
+ if is_break_marker(b) {
+ b as char
+ } else {
+ return false;
}
} else {
return false;
@@ -270,9 +288,8 @@ fn parse_thematic_break_parts(p: &mut MarkdownParser) {
}
if p.at(MD_TEXTUAL_LITERAL) {
- let first_char = p.cur_text().as_bytes().first().copied();
- match first_char {
- Some(b'*' | b'-' | b'_' | b' ' | b'\t') => {
+ match p.cur_text().as_bytes().first().copied() {
+ Some(b) if is_break_marker(b) || b == b' ' || b == b'\t' => {
p.force_relex_thematic_break_parts();
relex_active = true;
continue;
From 81aaa78efd803c10facd9aba49131819434f1bf6 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:42:05 -0400
Subject: [PATCH 4/4] chore: update module_graph snapshot after upstream rebase
---
.../tests/snapshots/test_optional_and_readonly_members.snap | 1 -
1 file changed, 1 deletion(-)
diff --git a/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap b/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
index fd8582ae19d4..c7952f45b3f7 100644
--- a/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
+++ b/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
@@ -34,7 +34,6 @@ Imports {
```
Config => BindingTypeData {
Types Module(0) TypeId(2),
- Exported Ranges: (17..23)
}
```