From 95747c55aa01729b86663998aeb4dc2989fb130b Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Sun, 12 Apr 2026 14:02:18 -0400
Subject: [PATCH 1/4] fix(markdown_parser): prefer list item over thematic
 break for `- ---`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the lexer produces `MD_THEMATIC_BREAK_LITERAL` for a line like
`- ---`, the thematic break interpretation won because it was checked
before list items in the block dispatcher.

Per CommonMark §5.2/§4.1 (and verified against commonmark.js +
markdown-it), when stripping a bullet marker + space from the token
leaves content that is itself a valid thematic break (3+ matching
chars), the list item interpretation should win. E.g.:
  - `- ---` → list item containing <hr /> (3 chars remain)
  - `- - -` → thematic break (only 2 chars remain after marker)

The fix adds a parser-side guard (`thematic_break_hides_list_item`)
that inspects the token text. When triggered, the token is re-lexed
via `ThematicBreakParts` context to expose the individual marker
tokens, then list item parsing proceeds normally.
---
 .../biome_markdown_parser/src/syntax/mod.rs   |  36 ++++--
 .../src/syntax/thematic_break_block.rs        |  41 +++++++
 .../ok/thematic_break_in_list.md.snap         | 109 ++++++++++--------
 .../biome_markdown_parser/tests/spec_test.rs  |  43 +++++--
 4 files changed, 159 insertions(+), 70 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs
index d778917c0d86..14934aa6165e 100644
--- a/crates/biome_markdown_parser/src/syntax/mod.rs
+++ b/crates/biome_markdown_parser/src/syntax/mod.rs
@@ -58,7 +58,9 @@ use quote::{
     at_quote, consume_quote_prefix, consume_quote_prefix_without_virtual, has_quote_prefix,
     line_has_quote_prefix_at_current, parse_quote,
 };
-use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block};
+use thematic_break_block::{
+    at_thematic_break_block, parse_thematic_break_block, thematic_break_hides_list_item,
+};
 
 use crate::MarkdownParser;
 
@@ -289,17 +291,29 @@ pub(crate) fn parse_any_block_with_indent_code_policy(
     } else if line_starts_with_fence(p) {
         parse_fenced_code_block_force(p)
     } else if at_thematic_break_block(p) {
-        let break_block = try_parse(p, |p| {
-            let break_block = parse_thematic_break_block(p);
-            if break_block.is_absent() {
-                return Err(());
-            }
-            Ok(break_block)
-        });
-        if let Ok(parsed) = break_block {
-            parsed
+        // Per CommonMark §5.2 / §4.1: when the thematic break token starts with
+        // a bullet marker + space and the remaining content is itself a valid
+        // thematic break (3+ chars), the list item interpretation wins.
+        // E.g. `- ---` → list item containing <hr />,
+        //   but `- - -` → thematic break (only 2 chars after marker).
+        let is_hidden_list_item =
+            p.at(MD_THEMATIC_BREAK_LITERAL) && thematic_break_hides_list_item(p.cur_text());
+        if is_hidden_list_item {
+            p.force_relex_thematic_break_parts();
+            parse_bullet_list_item(p)
         } else {
-            parse_paragraph(p)
+            let break_block = try_parse(p, |p| {
+                let break_block = parse_thematic_break_block(p);
+                if break_block.is_absent() {
+                    return Err(());
+                }
+                Ok(break_block)
+            });
+            if let Ok(parsed) = break_block {
+                parsed
+            } else {
+                parse_paragraph(p)
+            }
         }
     } else if at_header(p) {
         // Check for too many hashes BEFORE try_parse (which would lose diagnostics on rewind)
diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
index 89d3bce075d7..b2879871418c 100644
--- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
@@ -42,6 +42,47 @@ pub(crate) fn at_thematic_break_block(p: &mut MarkdownParser) -> bool {
     })
 }
 
+/// Check if a `MD_THEMATIC_BREAK_LITERAL` token text should actually be parsed
+/// as a bullet list item whose content is a thematic break.
+///
+/// Returns `true` when the text can be split as:
+///   `bullet_marker` + `space/tab` + `consecutive_thematic_break`
+///
+/// The payload must be a CONSECUTIVE run of 3+ matching break characters
+/// with no internal spaces. This distinguishes:
+///   `- ---` → list item (payload `---` is consecutive)
+///   `- - -` → thematic break (payload `- -` has internal spaces)
+///   `- - - -` → thematic break (payload `- - -` has internal spaces)
+///
+/// Only bullet markers (`-`, `*`, `+`) are checked — ordered list markers
+/// cannot collide with thematic break characters.
+pub(crate) fn thematic_break_hides_list_item(text: &str) -> bool {
+    let bytes = text.as_bytes();
+    // Need at least: marker (1) + space (1) + 3 break chars = 5 bytes
+    if bytes.len() < 5 {
+        return false;
+    }
+    if !matches!(bytes[0], b'-' | b'*' | b'+') {
+        return false;
+    }
+    if !matches!(bytes[1], b' ' | b'\t') {
+        return false;
+    }
+
+    // The payload (after marker + space) must be 3+ consecutive matching
+    // break characters, optionally followed by trailing whitespace only.
+    let payload = text[2..].trim_end_matches([' ', '\t']);
+    let payload_bytes = payload.as_bytes();
+    if payload_bytes.len() < THEMATIC_BREAK_MIN_CHARS {
+        return false;
+    }
+    let break_char = payload_bytes[0];
+    if !matches!(break_char, b'-' | b'*' | b'_') {
+        return false;
+    }
+    payload_bytes.iter().all(|&b| b == break_char)
+}
+
 /// Check if the remaining content forms a thematic break pattern.
 ///
 /// Per CommonMark §4.1, a thematic break is 3 or more matching characters
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
index b8a714a0ef85..6e74597ccdac 100644
--- a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
+++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_in_list.md.snap
@@ -70,33 +70,37 @@ MdDocument {
         MdNewline {
             value_token: NEWLINE@8..9 "\n" [] [],
         },
-        MdThematicBreakBlock {
-            parts: MdThematicBreakPartList [
-                MdThematicBreakChar {
-                    value: MINUS@9..10 "-" [] [],
-                },
-                MdIndentToken {
-                    md_indent_char_token: MD_INDENT_CHAR@10..11 " " [] [],
-                },
-                MdThematicBreakChar {
-                    value: MINUS@11..12 "-" [] [],
-                },
-                MdThematicBreakChar {
-                    value: MINUS@12..13 "-" [] [],
-                },
-                MdThematicBreakChar {
-                    value: MINUS@13..14 "-" [] [],
-                },
-            ],
-        },
-        MdNewline {
-            value_token: NEWLINE@14..15 "\n" [] [],
-        },
-        MdNewline {
-            value_token: NEWLINE@15..16 "\n" [] [],
-        },
         MdBulletListItem {
             md_bullet_list: MdBulletList [
+                MdBullet {
+                    prefix: MdListMarkerPrefix {
+                        pre_marker_indent: MdIndentTokenList [],
+                        marker: MINUS@9..10 "-" [] [],
+                        post_marker_space_token: MD_LIST_POST_MARKER_SPACE@10..11 " " [] [],
+                        content_indent: MdIndentTokenList [],
+                    },
+                    content: MdBlockList [
+                        MdThematicBreakBlock {
+                            parts: MdThematicBreakPartList [
+                                MdThematicBreakChar {
+                                    value: MINUS@11..12 "-" [] [],
+                                },
+                                MdThematicBreakChar {
+                                    value: MINUS@12..13 "-" [] [],
+                                },
+                                MdThematicBreakChar {
+                                    value: MINUS@13..14 "-" [] [],
+                                },
+                            ],
+                        },
+                        MdNewline {
+                            value_token: NEWLINE@14..15 "\n" [] [],
+                        },
+                    ],
+                },
+                MdNewline {
+                    value_token: NEWLINE@15..16 "\n" [] [],
+                },
                 MdBullet {
                     prefix: MdListMarkerPrefix {
                         pre_marker_indent: MdIndentTokenList [],
@@ -332,25 +336,28 @@ MdDocument {
       0: NEWLINE@7..8 "\n" [] []
     2: MD_NEWLINE@8..9
       0: NEWLINE@8..9 "\n" [] []
-    3: MD_THEMATIC_BREAK_BLOCK@9..14
-      0: MD_THEMATIC_BREAK_PART_LIST@9..14
-        0: MD_THEMATIC_BREAK_CHAR@9..10
-          0: MINUS@9..10 "-" [] []
-        1: MD_INDENT_TOKEN@10..11
-          0: MD_INDENT_CHAR@10..11 " " [] []
-        2: MD_THEMATIC_BREAK_CHAR@11..12
-          0: MINUS@11..12 "-" [] []
-        3: MD_THEMATIC_BREAK_CHAR@12..13
-          0: MINUS@12..13 "-" [] []
-        4: MD_THEMATIC_BREAK_CHAR@13..14
-          0: MINUS@13..14 "-" [] []
-    4: MD_NEWLINE@14..15
-      0: NEWLINE@14..15 "\n" [] []
-    5: MD_NEWLINE@15..16
-      0: NEWLINE@15..16 "\n" [] []
-    6: MD_BULLET_LIST_ITEM@16..21
-      0: MD_BULLET_LIST@16..21
-        0: MD_BULLET@16..21
+    3: MD_BULLET_LIST_ITEM@9..21
+      0: MD_BULLET_LIST@9..21
+        0: MD_BULLET@9..15
+          0: MD_LIST_MARKER_PREFIX@9..11
+            0: MD_INDENT_TOKEN_LIST@9..9
+            1: MINUS@9..10 "-" [] []
+            2: MD_LIST_POST_MARKER_SPACE@10..11 " " [] []
+            3: MD_INDENT_TOKEN_LIST@11..11
+          1: MD_BLOCK_LIST@11..15
+            0: MD_THEMATIC_BREAK_BLOCK@11..14
+              0: MD_THEMATIC_BREAK_PART_LIST@11..14
+                0: MD_THEMATIC_BREAK_CHAR@11..12
+                  0: MINUS@11..12 "-" [] []
+                1: MD_THEMATIC_BREAK_CHAR@12..13
+                  0: MINUS@12..13 "-" [] []
+                2: MD_THEMATIC_BREAK_CHAR@13..14
+                  0: MINUS@13..14 "-" [] []
+            1: MD_NEWLINE@14..15
+              0: NEWLINE@14..15 "\n" [] []
+        1: MD_NEWLINE@15..16
+          0: NEWLINE@15..16 "\n" [] []
+        2: MD_BULLET@16..21
           0: MD_LIST_MARKER_PREFIX@16..18
             0: MD_INDENT_TOKEN_LIST@16..16
             1: MINUS@16..17 "-" [] []
@@ -365,11 +372,11 @@ MdDocument {
                   0: UNDERSCORE@19..20 "_" [] []
                 2: MD_THEMATIC_BREAK_CHAR@20..21
                   0: UNDERSCORE@20..21 "_" [] []
-    7: MD_NEWLINE@21..22
+    4: MD_NEWLINE@21..22
       0: NEWLINE@21..22 "\n" [] []
-    8: MD_NEWLINE@22..23
+    5: MD_NEWLINE@22..23
       0: NEWLINE@22..23 "\n" [] []
-    9: MD_BULLET_LIST_ITEM@23..28
+    6: MD_BULLET_LIST_ITEM@23..28
       0: MD_BULLET_LIST@23..28
         0: MD_BULLET@23..28
           0: MD_LIST_MARKER_PREFIX@23..25
@@ -386,11 +393,11 @@ MdDocument {
                   0: MINUS@26..27 "-" [] []
                 2: MD_THEMATIC_BREAK_CHAR@27..28
                   0: MINUS@27..28 "-" [] []
-    10: MD_NEWLINE@28..29
+    7: MD_NEWLINE@28..29
       0: NEWLINE@28..29 "\n" [] []
-    11: MD_NEWLINE@29..30
+    8: MD_NEWLINE@29..30
       0: NEWLINE@29..30 "\n" [] []
-    12: MD_BULLET_LIST_ITEM@30..62
+    9: MD_BULLET_LIST_ITEM@30..62
       0: MD_BULLET_LIST@30..62
         0: MD_BULLET@30..37
           0: MD_LIST_MARKER_PREFIX@30..32
@@ -476,7 +483,7 @@ MdDocument {
                   0: UNDERSCORE@60..61 "_" [] []
                 4: MD_THEMATIC_BREAK_CHAR@61..62
                   0: UNDERSCORE@61..62 "_" [] []
-    13: MD_NEWLINE@62..63
+    10: MD_NEWLINE@62..63
       0: NEWLINE@62..63 "\n" [] []
   2: EOF@63..63 "" [] []
 
diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index d4c34ce710bf..508647d04aec 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -418,12 +418,10 @@ pub fn quick_test() {
         "<ul>\n<li>bar</li>\n</ul>\n<ul>\n<li>item</li>\n</ul>\n",
     );
     // Reduce: thematic break in list then different marker
-    // NOTE: `- ---` is a pre-existing Biome bug where it parses as a top-level
-    // thematic break instead of a list item containing <hr />.
     test_example(
         30013,
         "- ---\n\n+ item\n",
-        "<hr />\n<ul>\n<li>item</li>\n</ul>\n",
+        "<ul>\n<li>\n<hr />\n</li>\n</ul>\n<ul>\n<li>item</li>\n</ul>\n",
     );
     // Reduce: setext heading in list then different marker
     test_example(
@@ -459,6 +457,38 @@ pub fn quick_test() {
         "- outer\n  - nested\n  lazy line\nhello\n",
         "<ul>\n<li>outer\n<ul>\n<li>nested\nlazy line\nhello</li>\n</ul>\n</li>\n</ul>\n",
     );
+
+    // #region Thematic break vs list item precedence
+    //
+    // When a bullet marker + space leaves content that is itself a valid
+    // thematic break (3+ consecutive matching chars), the list item wins.
+    // When removing the marker leaves spaced or < 3 chars, it stays a break.
+
+    // `- ---` → list item containing <hr /> (3 consecutive dashes after `- `)
+    test_example(30020, "- ---\n", "<ul>\n<li>\n<hr />\n</li>\n</ul>\n");
+    // `* ***` → list item containing <hr /> (3 consecutive stars after `* `)
+    test_example(30021, "* ***\n", "<ul>\n<li>\n<hr />\n</li>\n</ul>\n");
+    // `+ ___` → list item containing <hr /> (3 consecutive underscores after `+ `)
+    test_example(30022, "+ ___\n", "<ul>\n<li>\n<hr />\n</li>\n</ul>\n");
+    // `- ---` with following content and marker change
+    test_example(
+        30023,
+        "- ---\n\n+ item\n",
+        "<ul>\n<li>\n<hr />\n</li>\n</ul>\n<ul>\n<li>item</li>\n</ul>\n",
+    );
+
+    // These remain thematic breaks — removing the marker leaves spaced or < 3 chars.
+    // `- - -` → thematic break (payload `- -` has internal spaces)
+    test_example(30024, "- - -\n", "<hr />\n");
+    // `* * *` → thematic break (payload `* *` has internal spaces)
+    test_example(30025, "* * *\n", "<hr />\n");
+    // Plain `---` → thematic break (no list marker prefix)
+    test_example(30026, "---\n", "<hr />\n");
+    // `***` → thematic break
+    test_example(30027, "***\n", "<hr />\n");
+    // `___` → thematic break (underscore is not a bullet marker)
+    test_example(30028, "___\n", "<hr />\n");
+    // #endregion
 }
 
 fn fuzz_test_example(num: u32, input: &str, expected: &str) {
@@ -492,16 +522,12 @@ fn fuzz_mixed_markers_paragraph() {
     );
 }
 
-/// NOTE: `- ---` is parsed by Biome as a top-level thematic break rather than
-/// a list item containing `<hr />`. This is a separate pre-existing bug
-/// (thematic break precedence over list marker) unrelated to the mixed-marker
-/// list-split fix. The expected value here matches Biome's current behavior.
 #[test]
 fn fuzz_mixed_markers_thematic_break() {
     fuzz_test_example(
         3,
         "- ---\n\n+ item\n",
-        "<hr />\n<ul>\n<li>item</li>\n</ul>\n",
+        "<ul>\n<li>\n<hr />\n</li>\n</ul>\n<ul>\n<li>item</li>\n</ul>\n",
     );
 }
 
@@ -549,3 +575,4 @@ fn fuzz_code_after_list_not_absorbed() {
         "<ul>\n<li>one</li>\n<li>two</li>\n</ul>\n<pre><code>code here\n</code></pre>\n",
     );
 }
+

From a5f958e4beacd2aecbdd2604f11ff47f2b2885e8 Mon Sep 17 00:00:00 2001
From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com>
Date: Sun, 12 Apr 2026 18:46:09 +0000
Subject: [PATCH 2/4] [autofix.ci] apply automated fixes

---
 crates/biome_markdown_parser/tests/spec_test.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs
index 508647d04aec..db089d8b807a 100644
--- a/crates/biome_markdown_parser/tests/spec_test.rs
+++ b/crates/biome_markdown_parser/tests/spec_test.rs
@@ -575,4 +575,3 @@ fn fuzz_code_after_list_not_absorbed() {
         "<ul>\n<li>one</li>\n<li>two</li>\n</ul>\n<pre><code>code here\n</code></pre>\n",
     );
 }
-

From bd64d6901c34dd6f87ed695642350adee1dc331c Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:19:10 -0400
Subject: [PATCH 3/4] refactor(markdown_parser): use lookup_byte for thematic
 break marker classification

Route `*`, `-`, and `_` classification through `biome_unicode_table::lookup_byte`
via a shared `is_break_marker` helper, following the project convention. Whitespace
checks (`' '`/`'\t'`) are kept explicit since `WHS` is semantically broader than
what CommonMark requires here.
---
 .../src/syntax/thematic_break_block.rs        | 49 +++++++++++++------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
index b2879871418c..42dd74e19f8e 100644
--- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs
@@ -21,10 +21,25 @@ use biome_parser::{
     Parser,
     prelude::ParsedSyntax::{self, *},
 };
+use biome_unicode_table::Dispatch::{IDT, MIN, MUL};
+use biome_unicode_table::lookup_byte;
 
 /// CommonMark requires 3 or more matching characters for thematic breaks.
 const THEMATIC_BREAK_MIN_CHARS: usize = 3;
 
+/// Whether `byte` is a thematic break marker character (`*`, `-`, or `_`).
+///
+/// Uses the `biome_unicode_table` lookup table for `*` (`MUL`) and `-` (`MIN`).
+/// `_` shares the `IDT` dispatch variant with ASCII letters, so an explicit
+/// byte check is required to disambiguate.
+fn is_break_marker(byte: u8) -> bool {
+    match lookup_byte(byte) {
+        MUL | MIN => true,
+        IDT => byte == b'_',
+        _ => false,
+    }
+}
+
 pub(crate) fn at_thematic_break_block(p: &mut MarkdownParser) -> bool {
     p.lookahead(|p| {
         if p.at_line_start() || p.at_start_of_input() {
@@ -101,22 +116,25 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool {
     // If the entire line segment is a single textual literal, validate it directly.
     if p.at(MD_TEXTUAL_LITERAL)
         && p.cur_text()
-            .chars()
-            .all(|c| c == ' ' || c == '\t' || c == '*' || c == '-' || c == '_')
+            .bytes()
+            .all(|b| b == b' ' || b == b'\t' || is_break_marker(b))
     {
-        let mut break_char = None;
+        let mut break_byte = None;
         let mut break_count = 0usize;
 
-        for c in p.cur_text().chars() {
-            if c == ' ' || c == '\t' {
+        for b in p.cur_text().bytes() {
+            if b == b' ' || b == b'\t' {
                 continue;
             }
-            if let Some(existing) = break_char {
-                if existing != c {
+            if !is_break_marker(b) {
+                return false;
+            }
+            if let Some(existing) = break_byte {
+                if existing != b {
                     return false;
                 }
             } else {
-                break_char = Some(c);
+                break_byte = Some(b);
             }
             break_count += 1;
         }
@@ -143,11 +161,11 @@ fn is_thematic_break_pattern(p: &mut MarkdownParser) -> bool {
     } else if p.at(MD_TEXTUAL_LITERAL) {
         let text = p.cur_text();
         if text.len() == 1 {
-            match text.chars().next() {
-                Some('*') => '*',
-                Some('-') => '-',
-                Some('_') => '_',
-                _ => return false,
+            let b = text.as_bytes()[0];
+            if is_break_marker(b) {
+                b as char
+            } else {
+                return false;
             }
         } else {
             return false;
@@ -270,9 +288,8 @@ fn parse_thematic_break_parts(p: &mut MarkdownParser) {
         }
 
         if p.at(MD_TEXTUAL_LITERAL) {
-            let first_char = p.cur_text().as_bytes().first().copied();
-            match first_char {
-                Some(b'*' | b'-' | b'_' | b' ' | b'\t') => {
+            match p.cur_text().as_bytes().first().copied() {
+                Some(b) if is_break_marker(b) || b == b' ' || b == b'\t' => {
                     p.force_relex_thematic_break_parts();
                     relex_active = true;
                     continue;

From 81aaa78efd803c10facd9aba49131819434f1bf6 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:42:05 -0400
Subject: [PATCH 4/4] chore: update module_graph snapshot after upstream rebase

---
 .../tests/snapshots/test_optional_and_readonly_members.snap      | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap b/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
index fd8582ae19d4..c7952f45b3f7 100644
--- a/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
+++ b/crates/biome_module_graph/tests/snapshots/test_optional_and_readonly_members.snap
@@ -34,7 +34,6 @@ Imports {
 ```
 Config  => BindingTypeData {
   Types Module(0) TypeId(2),
-  Exported Ranges: (17..23)
 }
 ```