biomejs · ematipico · Apr 17, 2026 · Apr 13, 2026 · Apr 17, 2026
diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs
@@ -15,6 +15,8 @@ use biome_unicode_table::lookup_byte;
 
 use crate::syntax::{MAX_BLOCK_PREFIX_INDENT, TAB_STOP_SPACES};
 
+const MAX_ORDERED_LIST_MARKER_DIGITS: usize = 9;
+
 /// Lexer context for different markdown parsing modes
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
 pub enum MarkdownLexContext {
@@ -260,14 +262,19 @@ impl<'src> MarkdownLexer<'src> {
                     // In link definition context, whitespace separates tokens.
                     // We consume it as textual literal so it's not treated as trivia by the parser.
                     self.consume_link_definition_whitespace()
-                } else if self.after_newline && matches!(current, b' ' | b'\t') {
+                } else if self.after_newline && is_space_or_tab_byte(current) {
                     // At line start, emit single whitespace tokens to allow
                     // indentation handling and quote marker spacing.
                     self.consume_single_whitespace_as_text()
-                } else if matches!(current, b' ' | b'\t') && self.is_after_block_quote_marker() {
+                } else if is_space_or_tab_byte(current) && self.is_after_block_quote_marker() {
                     // After a block quote marker, emit a single whitespace token
                     // so the parser can skip the optional space.
                     self.consume_single_whitespace_as_text()
+                } else if is_space_or_tab_byte(current) && self.is_in_list_marker_whitespace() {
+                    // While consuming the leading whitespace after a list marker,
+                    // emit one space/tab per token so the parser can distinguish
+                    // the optional post-marker separator from content indent.
+                    self.consume_single_whitespace_as_text()
                 } else if current == b' '
                     && !matches!(context, MarkdownLexContext::HeadingContent)
                     && self.is_potential_hard_line_break()
@@ -683,6 +690,118 @@ impl<'src> MarkdownLexer<'src> {
         saw_marker
     }
 
+    /// Returns true if the current whitespace is part of the leading
+    /// space/tab run immediately following a top-level list marker.
+    fn is_in_list_marker_whitespace(&self) -> bool {
+        let bytes = self.source.as_bytes();
+        let Some(&current) = bytes.get(self.position) else {
+            return false;
+        };
+        if !is_space_or_tab_byte(current) {
+            return false;
+        }
+
+        let before = &self.source[..self.position];
+        let last_newline_pos = before.rfind(['\n', '\r']);
+        let line_start = match last_newline_pos {
+            Some(pos) => {
+                let before_bytes = before.as_bytes();
+                if before_bytes.get(pos) == Some(&b'\r')
+                    && before_bytes.get(pos + 1) == Some(&b'\n')
+                {
+                    pos + 2
+                } else {
+                    pos + 1
+                }
+            }
+            None => 0,
+        };
+
+        let prefix = &bytes[line_start..self.position];
+        let mut idx = 0usize;
+        let mut indent = 0usize;
+
+        while prefix.get(idx).copied().is_some_and(is_space_or_tab_byte) {
+            if prefix[idx] == b'\t' {
+                indent += TAB_STOP_SPACES - (indent % TAB_STOP_SPACES);
+            } else {
+                indent += 1;
+            }
+            if indent > MAX_BLOCK_PREFIX_INDENT {
+                return false;
+            }
+            idx += 1;
+        }
+
+        if idx >= prefix.len() {
+            return false;
+        }
+
+        match lookup_byte(prefix[idx]) {
+            MIN | MUL | PLS => {
+                idx += 1;
+            }
+            ZER | DIG => {
+                let digit_start = idx;
+                while prefix.get(idx).copied().is_some_and(is_ascii_digit_byte) {
+                    idx += 1;
+                    if idx - digit_start > MAX_ORDERED_LIST_MARKER_DIGITS {
+                        return false;
+                    }
+                }
+
+                let Some(delimiter) = prefix.get(idx).copied() else {
+                    return false;
+                };
+                if !matches!(lookup_byte(delimiter), PRD | PNC) {
+                    return false;
+                }
+                idx += 1;
+            }
+            _ => return false,
+        }
+
+        let trailing = &prefix[idx..];
+        if trailing.is_empty() {
+            let mut saw_tab = current == b'\t';
+            let mut next = self.position + 1;
+            while bytes.get(next).copied().is_some_and(is_space_or_tab_byte) {
+                if bytes[next] == b'\t' {
+                    saw_tab = true;
+                }
+                next += 1;
+            }
+
+            if !saw_tab {
+                return false;
+            }
+
+            if current == b'\t' {
+                return !bytes
+                    .get(self.position + 1)
+                    .copied()
+                    .is_some_and(is_space_or_tab_byte);
+            }
+
+            return true;
+        }
+
+        if !trailing.iter().copied().all(is_space_or_tab_byte) || trailing[0] != b' ' {
+            return false;
+        }
+
+        let mut saw_tab = current == b'\t' || trailing.contains(&b'\t');
+        let mut next = self.position + 1;
+        while bytes.get(next).copied().is_some_and(is_space_or_tab_byte) {
+            if bytes[next] == b'\t' {
+                saw_tab = true;
+            }
+            next += 1;
+        }
+
+        saw_tab
+    }
+
     /// Consumes thematic break, setext underline, or emphasis markers (*, -, _).
     ///
     /// For `-` at line start:
@@ -1243,6 +1362,16 @@ impl<'src> MarkdownLexer<'src> {
     }
 }
 
+#[inline]
+fn is_space_or_tab_byte(byte: u8) -> bool {
+    matches!(lookup_byte(byte), WHS) && !matches!(byte, b'\n' | b'\r')
+}
+
+#[inline]
+fn is_ascii_digit_byte(byte: u8) -> bool {
+    matches!(lookup_byte(byte), ZER | DIG)
+}
+
 impl<'src> ReLexer<'src> for MarkdownLexer<'src> {
     fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind {
         let old_position = self.position;

diff --git a/crates/biome_markdown_parser/src/syntax/list.rs b/crates/biome_markdown_parser/src/syntax/list.rs
@@ -191,15 +191,20 @@ fn emit_indent_char_list(p: &mut MarkdownParser, max_columns: usize) -> usize {
 }
 
 /// Consume the first whitespace token after the list marker as MD_LIST_POST_MARKER_SPACE.
-/// Returns true if a space was consumed.
-fn emit_list_post_marker_space(p: &mut MarkdownParser) -> bool {
+/// Returns true if a space/tab separator was recognized.
+fn emit_list_post_marker_space(p: &mut MarkdownParser, preserve_tab: bool) -> bool {
     if !p.at(MD_TEXTUAL_LITERAL) {
         return false;
     }
     let text = p.cur_text();
-    if text == " " || text == "\t" {
+    if text == " " {
         p.bump_remap(MD_LIST_POST_MARKER_SPACE);
         true
+    } else if text == "\t" {
+        if !preserve_tab {
+            p.bump_remap(MD_LIST_POST_MARKER_SPACE);
+        }
+        true
     } else {
         false
     }
@@ -834,12 +839,19 @@ fn parse_bullet(p: &mut MarkdownParser) -> (ParsedSyntax, ListItemBlankInfo) {
 
     // Post-marker space (first whitespace token after marker)
     if !setext_marker {
-        emit_list_post_marker_space(p);
+        emit_list_post_marker_space(p, spaces_after_marker > INDENT_CODE_BLOCK_SPACES);
     }
 
-    // Content indent (remaining whitespace tokens on first line)
+    // Content indent (remaining whitespace tokens on first line).
+    // For first-line indented code, only the 4-column code indent is consumed
+    // here so any additional padding remains in the code content.
     if !setext_marker && !first_line_empty && spaces_after_marker > 1 {
-        emit_indent_char_list(p, 0);
+        let max_columns = if spaces_after_marker > INDENT_CODE_BLOCK_SPACES {
+            INDENT_CODE_BLOCK_SPACES
+        } else {
+            0
+        };
+        emit_indent_char_list(p, max_columns);
     } else {
         // Empty first line or no content indent -- emit empty MdIndentTokenList
         let empty_m = p.start();
@@ -1149,11 +1161,18 @@ fn parse_ordered_bullet(p: &mut MarkdownParser) -> (ParsedSyntax, ListItemBlankI
     });
 
     // Post-marker space
-    emit_list_post_marker_space(p);
+    emit_list_post_marker_space(p, spaces_after_marker > INDENT_CODE_BLOCK_SPACES);
 
-    // Content indent
+    // Content indent.
+    // For first-line indented code, only the 4-column code indent is consumed
+    // here so any additional padding remains in the code content.
     if !first_line_empty && spaces_after_marker > 1 {
-        emit_indent_char_list(p, 0);
+        let max_columns = if spaces_after_marker > INDENT_CODE_BLOCK_SPACES {
+            INDENT_CODE_BLOCK_SPACES
+        } else {
+            0
+        };
+        emit_indent_char_list(p, max_columns);
     } else {
         let empty_m = p.start();
         empty_m.complete(p, MD_INDENT_TOKEN_LIST);

diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs
@@ -300,7 +300,10 @@ fn emit_post_marker_space(p: &mut MarkdownParser, preserve_tab: bool) -> bool {
             // When preserve_tab is true (e.g. indented code in quote), the tab still
             // semantically counts as the optional post-marker separator, but remains
             // in the stream so the child block can claim it as indentation.
-            if !preserve_tab {
+            if !preserve_tab
+                || !quote_tab_has_following_indent(p)
+                || quote_tab_starts_nested_prefix(p)
+            {
                 p.bump_remap(MD_QUOTE_POST_MARKER_SPACE);
             }
             true
@@ -558,6 +561,23 @@ pub(crate) fn at_quote_indented_code_start(p: &MarkdownParser) -> bool {
     column >= INDENT_CODE_BLOCK_SPACES
 }
 
+fn quote_tab_starts_nested_prefix(p: &mut MarkdownParser) -> bool {
+    p.lookahead(|p| {
+        p.bump(MD_TEXTUAL_LITERAL);
+        p.at(T![>]) || (p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == ">")
+    })
+}
+
+fn quote_tab_has_following_indent(p: &mut MarkdownParser) -> bool {
+    p.lookahead(|p| {
+        p.bump(MD_TEXTUAL_LITERAL);
+        p.source_after_current()
+            .chars()
+            .next()
+            .is_some_and(|c| c == ' ' || c == '\t')
+    })
+}
+
 fn parse_quote_indented_code_block(p: &mut MarkdownParser, depth: usize) -> ParsedSyntax {
     let m = p.start();
     let content = p.start();
@@ -641,7 +661,8 @@ pub(crate) fn emit_optional_marker_space(p: &mut MarkdownParser, preserve_tab: b
         return true;
     }
     if text == "\t" {
-        if !preserve_tab {
+        if !preserve_tab || !quote_tab_has_following_indent(p) || quote_tab_starts_nested_prefix(p)
+        {
             p.bump_remap(MD_QUOTE_POST_MARKER_SPACE);
         }
         return true;

diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_tab_separated.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_tab_separated.md
@@ -0,0 +1 @@
+>	>	foo
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_tab_separated.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_tab_separated.md.snap
@@ -0,0 +1,81 @@
+---
+source: crates/biome_markdown_parser/tests/spec_test.rs
+expression: snapshot
+---
+
+## Input
+
+```
+>	>	foo
+
+```
+
+
+## AST
+
+```
+MdDocument {
+    bom_token: missing (optional),
+    value: MdBlockList [
+        MdQuote {
+            prefix: MdQuotePrefix {
+                pre_marker_indent: MdQuoteIndentList [],
+                marker_token: R_ANGLE@0..1 ">" [] [],
+                post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@1..2 "\t" [] [],
+            },
+            content: MdBlockList [
+                MdQuote {
+                    prefix: MdQuotePrefix {
+                        pre_marker_indent: MdQuoteIndentList [],
+                        marker_token: R_ANGLE@2..3 ">" [] [],
+                        post_marker_space_token: MD_QUOTE_POST_MARKER_SPACE@3..4 "\t" [] [],
+                    },
+                    content: MdBlockList [
+                        MdParagraph {
+                            list: MdInlineItemList [
+                                MdTextual {
+                                    value_token: MD_TEXTUAL_LITERAL@4..7 "foo" [] [],
+                                },
+                                MdTextual {
+                                    value_token: MD_TEXTUAL_LITERAL@7..8 "\n" [] [],
+                                },
+                            ],
+                            hard_line: missing (optional),
+                        },
+                    ],
+                },
+            ],
+        },
+    ],
+    eof_token: EOF@8..8 "" [] [],
+}
+```
+
+## CST
+
+```
+0: MD_DOCUMENT@0..8
+  0: (empty)
+  1: MD_BLOCK_LIST@0..8
+    0: MD_QUOTE@0..8
+      0: MD_QUOTE_PREFIX@0..2
+        0: MD_QUOTE_INDENT_LIST@0..0
+        1: R_ANGLE@0..1 ">" [] []
+        2: MD_QUOTE_POST_MARKER_SPACE@1..2 "\t" [] []
+      1: MD_BLOCK_LIST@2..8
+        0: MD_QUOTE@2..8
+          0: MD_QUOTE_PREFIX@2..4
+            0: MD_QUOTE_INDENT_LIST@2..2
+            1: R_ANGLE@2..3 ">" [] []
+            2: MD_QUOTE_POST_MARKER_SPACE@3..4 "\t" [] []
+          1: MD_BLOCK_LIST@4..8
+            0: MD_PARAGRAPH@4..8
+              0: MD_INLINE_ITEM_LIST@4..8
+                0: MD_TEXTUAL@4..7
+                  0: MD_TEXTUAL_LITERAL@4..7 "foo" [] []
+                1: MD_TEXTUAL@7..8
+                  0: MD_TEXTUAL_LITERAL@7..8 "\n" [] []
+              1: (empty)
+  2: EOF@8..8 "" [] []
+
+```
diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list_space_tab_space.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list_space_tab_space.md
@@ -0,0 +1 @@
+- 	 foo