diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs index 92a9af1878a0..3e3c367ee1ee 100644 --- a/crates/biome_markdown_parser/src/syntax/mod.rs +++ b/crates/biome_markdown_parser/src/syntax/mod.rs @@ -831,7 +831,6 @@ fn line_has_quote_prefix(p: &MarkdownParser, depth: usize) -> bool { fn classify_quote_break_after_newline( p: &mut MarkdownParser, quote_depth: usize, - include_textual_markers: bool, ) -> QuoteBreakKind { p.lookahead(|p| { consume_quote_prefix_without_virtual(p, quote_depth); @@ -840,9 +839,7 @@ fn classify_quote_break_after_newline( || (p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p)) { QuoteBreakKind::SetextUnderline - } else if at_block_interrupt(p) - || (include_textual_markers && textual_looks_like_list_marker(p)) - { + } else if at_block_interrupt(p) || textual_looks_like_list_marker(p) { QuoteBreakKind::Other } else { QuoteBreakKind::None @@ -851,6 +848,24 @@ fn classify_quote_break_after_newline( }) } +/// Check if the current position is a paragraph break (setext underline, +/// thematic break, fence, block interrupt, or textual list marker). +/// +/// This shared predicate consolidates the duplicate break-condition checks +/// in [`handle_inline_newline`] and [`inline_list_source_len`]. +fn at_paragraph_break(p: &mut MarkdownParser, has_content: bool) -> bool { + if has_content && p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { + return true; + } + if has_content && p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + return true; + } + if line_starts_with_fence(p) { + return true; + } + at_block_interrupt(p) || textual_looks_like_list_marker(p) +} + enum InlineNewlineAction { Break, Continue, @@ -897,7 +912,7 @@ fn handle_inline_newline(p: &mut MarkdownParser, has_content: bool) -> InlineNew // If we're inside a block quote, only consume the quote prefix // when it doesn't start a new block (e.g., a nested quote). if quote_depth > 0 && has_quote_prefix(p, quote_depth) { - let break_kind = classify_quote_break_after_newline(p, quote_depth, true); + let break_kind = classify_quote_break_after_newline(p, quote_depth); match break_kind { QuoteBreakKind::SetextUnderline => { // Consume the quote prefix so the setext underline is visible @@ -932,14 +947,6 @@ fn handle_inline_newline(p: &mut MarkdownParser, has_content: bool) -> InlineNew } } - // Check if we're at a setext heading underline (already past indent) - if has_content && p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { - return InlineNewlineAction::Break; - } - if has_content && p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { - return InlineNewlineAction::Break; - } - // If we're inside a list item and the next line meets the required indent, // check for block interrupts after skipping that indent. This allows // nested list markers like "\t - baz" to break out of the paragraph. @@ -980,26 +987,16 @@ fn handle_inline_newline(p: &mut MarkdownParser, has_content: bool) -> InlineNew } } - // Check for block-level constructs that can interrupt paragraphs - if line_starts_with_fence(p) { - return InlineNewlineAction::Break; - } + // Check for block-level constructs that can interrupt paragraphs. + // Textual fence tokens (e.g. "```") may not be caught by line_starts_with_fence + // because the lexer emits them as MD_TEXTUAL_LITERAL in inline context. if p.at(MD_TEXTUAL_LITERAL) { let text = p.cur_text(); if text.starts_with("```") || text.starts_with("~~~") { return InlineNewlineAction::Break; } } - if at_block_interrupt(p) { - return InlineNewlineAction::Break; - } - - // Also check for list markers that appear as textual content. - // Inside inline content, '-' is lexed as MD_TEXTUAL_LITERAL, not MINUS, - // so at_block_interrupt won't detect them. Per CommonMark ยง5.1, list - // items can interrupt paragraphs (bullet lists always, ordered lists - // only if they start with 1). - if textual_looks_like_list_marker(p) { + if at_paragraph_break(p, has_content) { return InlineNewlineAction::Break; } @@ -1224,30 +1221,18 @@ fn inline_list_source_len(p: &mut MarkdownParser) -> usize { let quote_depth = p.state().block_quote_depth; if quote_depth > 0 && has_quote_prefix(p, quote_depth) { - let break_kind = classify_quote_break_after_newline(p, quote_depth, false); + let break_kind = classify_quote_break_after_newline(p, quote_depth); if !matches!(break_kind, QuoteBreakKind::None) { break; } consume_quote_prefix_without_virtual(p, quote_depth); } - if p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { - break; - } - - if p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { - break; - } - if quote_depth > 0 && p.at(R_ANGLE) && !has_quote_prefix(p, quote_depth) { consume_partial_quote_prefix_lookahead(p, quote_depth, &mut len); } - if line_starts_with_fence(p) { - break; - } - - if at_block_interrupt(p) { + if at_paragraph_break(p, true) { break; } @@ -1278,6 +1263,19 @@ fn inline_list_source_len(p: &mut MarkdownParser) -> usize { len += text.len(); p.bump(MD_TEXTUAL_LITERAL); } + + // After stripping list indent, re-check setext/thematic markers + // to mirror newline handling in the parse path. Without this, + // prescan would include indent bytes and stop one iteration later. + // We intentionally skip the heavier post-indent block-interrupt + // check here; the following non-NEWLINE pass still catches + // interrupts for emphasis-context length calculation. + if p.at(MD_SETEXT_UNDERLINE_LITERAL) + || (p.at(MD_THEMATIC_BREAK_LITERAL) + && is_dash_only_thematic_break_text(p.cur_text())) + { + break; + } } continue; diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md new file mode 100644 index 000000000000..f2996786606a --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md @@ -0,0 +1,8 @@ +> paragraph line +> - nested bullet + +> paragraph line +> 1. nested ordered + +> paragraph line +> still paragraph diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md.snap new file mode 100644 index 000000000000..2fea026b5b26 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/quote_textual_marker_parity.md.snap @@ -0,0 +1,203 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- + +## Input + +``` +> paragraph line +> - nested bullet + +> paragraph line +> 1. nested ordered + +> paragraph line +> still paragraph + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..16 "paragraph line" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..17 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@17..20 "-" [Skipped(">"), Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..34 " nested bullet" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@34..35 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@35..36 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@36..37 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..52 "paragraph line" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@52..53 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdOrderedListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@53..57 "1." [Skipped(">"), Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@57..72 " nested ordered" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@72..73 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@73..74 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@74..75 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@75..90 "paragraph line" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@90..91 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@91..108 "still paragraph" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..109 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + eof_token: EOF@109..109 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..109 + 0: (empty) + 1: MD_BLOCK_LIST@0..109 + 0: MD_QUOTE@0..35 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..35 + 0: MD_PARAGRAPH@1..17 + 0: MD_INLINE_ITEM_LIST@1..17 + 0: MD_TEXTUAL@1..16 + 0: MD_TEXTUAL_LITERAL@1..16 "paragraph line" [Skipped(" ")] [] + 1: MD_TEXTUAL@16..17 + 0: MD_TEXTUAL_LITERAL@16..17 "\n" [] [] + 1: (empty) + 1: MD_BULLET_LIST_ITEM@17..35 + 0: MD_BULLET_LIST@17..35 + 0: MD_BULLET@17..35 + 0: MINUS@17..20 "-" [Skipped(">"), Skipped(" ")] [] + 1: MD_BLOCK_LIST@20..35 + 0: MD_PARAGRAPH@20..35 + 0: MD_INLINE_ITEM_LIST@20..35 + 0: MD_TEXTUAL@20..34 + 0: MD_TEXTUAL_LITERAL@20..34 " nested bullet" [] [] + 1: MD_TEXTUAL@34..35 + 0: MD_TEXTUAL_LITERAL@34..35 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@35..36 + 0: NEWLINE@35..36 "\n" [] [] + 2: MD_QUOTE@36..73 + 0: R_ANGLE@36..37 ">" [] [] + 1: MD_BLOCK_LIST@37..73 + 0: MD_PARAGRAPH@37..53 + 0: MD_INLINE_ITEM_LIST@37..53 + 0: MD_TEXTUAL@37..52 + 0: MD_TEXTUAL_LITERAL@37..52 "paragraph line" [Skipped(" ")] [] + 1: MD_TEXTUAL@52..53 + 0: MD_TEXTUAL_LITERAL@52..53 "\n" [] [] + 1: (empty) + 1: MD_ORDERED_LIST_ITEM@53..73 + 0: MD_BULLET_LIST@53..73 + 0: MD_BULLET@53..73 + 0: MD_ORDERED_LIST_MARKER@53..57 "1." [Skipped(">"), Skipped(" ")] [] + 1: MD_BLOCK_LIST@57..73 + 0: MD_PARAGRAPH@57..73 + 0: MD_INLINE_ITEM_LIST@57..73 + 0: MD_TEXTUAL@57..72 + 0: MD_TEXTUAL_LITERAL@57..72 " nested ordered" [] [] + 1: MD_TEXTUAL@72..73 + 0: MD_TEXTUAL_LITERAL@72..73 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@73..74 + 0: NEWLINE@73..74 "\n" [] [] + 4: MD_QUOTE@74..109 + 0: R_ANGLE@74..75 ">" [] [] + 1: MD_BLOCK_LIST@75..109 + 0: MD_PARAGRAPH@75..109 + 0: MD_INLINE_ITEM_LIST@75..109 + 0: MD_TEXTUAL@75..90 + 0: MD_TEXTUAL_LITERAL@75..90 "paragraph line" [Skipped(" ")] [] + 1: MD_TEXTUAL@90..91 + 0: MD_TEXTUAL_LITERAL@90..91 "\n" [] [] + 2: MD_TEXTUAL@91..108 + 0: MD_TEXTUAL_LITERAL@91..108 "still paragraph" [Skipped(">"), Skipped(" ")] [] + 3: MD_TEXTUAL@108..109 + 0: MD_TEXTUAL_LITERAL@108..109 "\n" [] [] + 1: (empty) + 2: EOF@109..109 "" [] [] + +```