From 903d68adb5f406b3c68e65d3ef755d3d444e85c8 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Mon, 6 Apr 2026 18:01:17 -0400
Subject: [PATCH 1/3] refactor(markdown_parser): deduplicate link helper
 functions

Extract `is_whitespace_token` and `get_title_close_char` to
`syntax/mod.rs` as shared `pub(crate)` helpers. Both `link_block.rs`
and `inline/links.rs` maintained identical local copies that could
silently diverge.
---
 .../src/syntax/inline/links.rs                | 21 +---------
 .../src/syntax/link_block.rs                  | 25 +----------
 .../biome_markdown_parser/src/syntax/mod.rs   | 42 +++++++++++--------
 3 files changed, 29 insertions(+), 59 deletions(-)
diff --git a/crates/biome_markdown_parser/src/syntax/inline/links.rs b/crates/biome_markdown_parser/src/syntax/inline/links.rs
index dd4e746fd944..bc0f68019f1d 100644
--- a/crates/biome_markdown_parser/src/syntax/inline/links.rs
+++ b/crates/biome_markdown_parser/src/syntax/inline/links.rs
@@ -13,7 +13,8 @@ use crate::syntax::parse_error::{unclosed_image, unclosed_link};
 use crate::syntax::reference::normalize_reference_label;
 use crate::syntax::{
     LinkDestinationKind, MAX_LINK_DESTINATION_PAREN_DEPTH, ParenDepthResult,
-    ends_with_unescaped_close, try_update_paren_depth, validate_link_destination_text,
+    ends_with_unescaped_close, get_title_close_char, is_whitespace_token, try_update_paren_depth,
+    validate_link_destination_text,
 };
 
 /// Parse link starting with `[` - dispatches to inline link or reference link.
@@ -594,11 +595,6 @@ fn bump_textual_link_def(p: &mut MarkdownParser) {
     item.complete(p, MD_TEXTUAL);
 }
 
-fn is_whitespace_token(p: &MarkdownParser) -> bool {
-    let text = p.cur_text();
-    !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t')
-}
-
 fn inline_title_starts_after_whitespace_tokens(p: &mut MarkdownParser) -> bool {
     p.lookahead(|p| {
         let mut saw_whitespace = false;
@@ -778,19 +774,6 @@ fn bump_link_def_separator(p: &mut MarkdownParser) {
     }
 }
 
-fn get_title_close_char(p: &MarkdownParser) -> Option<char> {
-    let text = p.cur_text();
-    if text.starts_with('"') {
-        Some('"')
-    } else if text.starts_with('\'') {
-        Some('\'')
-    } else if p.at(L_PAREN) {
-        Some(')')
-    } else {
-        None
-    }
-}
-
 fn parse_title_content(p: &mut MarkdownParser, close_char: Option<char>) {
     let Some(close_char) = close_char else {
         return;
diff --git a/crates/biome_markdown_parser/src/syntax/link_block.rs b/crates/biome_markdown_parser/src/syntax/link_block.rs
index b8ffe716fa53..09fc2c7ed2f4 100644
--- a/crates/biome_markdown_parser/src/syntax/link_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/link_block.rs
@@ -29,8 +29,8 @@ use crate::lexer::MarkdownLexContext;
 use crate::syntax::reference::normalize_reference_label;
 use crate::syntax::{
     LinkDestinationKind, MAX_BLOCK_PREFIX_INDENT, MAX_LINK_DESTINATION_PAREN_DEPTH,
-    ParenDepthResult, ends_with_unescaped_close, try_update_paren_depth,
-    validate_link_destination_text,
+    ParenDepthResult, ends_with_unescaped_close, get_title_close_char, is_whitespace_token,
+    try_update_paren_depth, validate_link_destination_text,
 };
 
 /// Maximum label length per CommonMark spec (999 characters).
@@ -632,21 +632,6 @@ fn parse_link_title(p: &mut MarkdownParser) {
     m.complete(p, MD_LINK_TITLE);
 }
 
-/// Get the closing character for a title based on current token.
-/// Returns None if not at a title start.
-fn get_title_close_char(p: &MarkdownParser) -> Option<char> {
-    let text = p.cur_text();
-    if text.starts_with('"') {
-        Some('"')
-    } else if text.starts_with('\'') {
-        Some('\'')
-    } else if p.at(L_PAREN) {
-        Some(')')
-    } else {
-        None
-    }
-}
-
 /// Parse title content until closing delimiter, including trailing whitespace.
 ///
 /// Inside title quotes, we use Regular context so whitespace doesn't split tokens.
@@ -708,12 +693,6 @@ fn parse_title_content(p: &mut MarkdownParser, close_char: Option<char>) {
     }
 }
 
-/// Check if current token is whitespace (space or tab).
-fn is_whitespace_token(p: &MarkdownParser) -> bool {
-    let text = p.cur_text();
-    !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t')
-}
-
 /// Consume the current token as an MdTextual node.
 ///
 /// This is a helper to reduce boilerplate for the common pattern:
diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs
index 80ed9e4f53a4..9e63f95643c3 100644
--- a/crates/biome_markdown_parser/src/syntax/mod.rs
+++ b/crates/biome_markdown_parser/src/syntax/mod.rs
@@ -62,6 +62,27 @@ use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block};
 
 use crate::MarkdownParser;
 
+/// Check if current token is whitespace (space or tab).
+pub(crate) fn is_whitespace_token(p: &MarkdownParser) -> bool {
+    let text = p.cur_text();
+    !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t')
+}
+
+/// Get the closing character for a title based on current token.
+/// Returns `None` if not at a title start.
+pub(crate) fn get_title_close_char(p: &MarkdownParser) -> Option<char> {
+    let text = p.cur_text();
+    if text.starts_with('"') {
+        Some('"')
+    } else if text.starts_with('\'') {
+        Some('\'')
+    } else if p.at(L_PAREN) {
+        Some(')')
+    } else {
+        None
+    }
+}
+
 /// Maximum paren nesting allowed in link destinations per CommonMark.
 pub(crate) const MAX_LINK_DESTINATION_PAREN_DEPTH: i32 = 32;
 
@@ -644,24 +665,11 @@ pub(crate) fn is_dash_only_thematic_break_text(text: &str) -> bool {
     !text.is_empty() && text.trim().chars().all(|c| c == '-')
 }
 
-/// Token-based check: is the current line a setext underline?
-///
-/// Call after consuming a NEWLINE token. Skips 0–3 columns of leading whitespace
-/// (tabs expand to the next tab stop per CommonMark §2.2), then checks for
-/// `MD_SETEXT_UNDERLINE_LITERAL` or a dash-only `MD_THEMATIC_BREAK_LITERAL`.
-///
-/// Returns `Some(bytes_consumed)` if the line is a setext underline, `None` otherwise.
-/// The byte count includes only the whitespace tokens consumed during the indent skip,
-/// NOT the underline token itself. Callers that track byte budgets must subtract this.
-///
-/// This is the shared helper for setext detection in inline contexts.
-/// Used by `has_matching_code_span_closer`, `parse_inline_html`, and `parse_inline_item_list`.
+/// Returns `Some(indent_bytes)` if the current line is a setext underline.
 ///
-/// Context safety: this function does NOT call `allow_setext_heading` because the token
-/// stream itself encodes context. In blockquotes, `R_ANGLE` tokens appear after NEWLINE
-/// before content, so the whitespace-only skip naturally rejects those lines. In list
-/// items, the indent reflected in the token stream is the raw line indent, and the
-/// `columns < 4` check correctly rejects lines with 4+ columns of leading whitespace.
+/// Call this after consuming `NEWLINE`. It skips up to 3 columns of leading
+/// whitespace, then checks for a setext underline token or a dash-only thematic
+/// break token. The returned byte count covers only the skipped whitespace.
 pub(crate) fn at_setext_underline_after_newline(p: &mut MarkdownParser) -> Option<usize> {
     let mut columns = 0;
     let mut bytes_consumed = 0;

From e7084bb07ddc849402d4526136a95ef05aefa978 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Tue, 7 Apr 2026 16:06:05 -1000
Subject: [PATCH 2/3] docs(markdown_parser): clarify intent of link helper
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review feedback from @ematipico:
- Rename is_whitespace_token to is_space_or_tab_token to make the
  ASCII-only intent self-evident
- Document why it uses space/tab only instead of the Dispatch::WHS
  lookup table (CommonMark §4.7/§6.3 semantics)
- Add CommonMark spec references to get_title_close_char docstring
---
 .../src/syntax/inline/links.rs                | 12 +++++-----
 .../src/syntax/link_block.rs                  | 22 +++++++++----------
 .../biome_markdown_parser/src/syntax/mod.rs   | 18 +++++++++++----
 3 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/crates/biome_markdown_parser/src/syntax/inline/links.rs b/crates/biome_markdown_parser/src/syntax/inline/links.rs
index bc0f68019f1d..ae52d226c688 100644
--- a/crates/biome_markdown_parser/src/syntax/inline/links.rs
+++ b/crates/biome_markdown_parser/src/syntax/inline/links.rs
@@ -13,7 +13,7 @@ use crate::syntax::parse_error::{unclosed_image, unclosed_link};
 use crate::syntax::reference::normalize_reference_label;
 use crate::syntax::{
     LinkDestinationKind, MAX_LINK_DESTINATION_PAREN_DEPTH, ParenDepthResult,
-    ends_with_unescaped_close, get_title_close_char, is_whitespace_token, try_update_paren_depth,
+    ends_with_unescaped_close, get_title_close_char, is_space_or_tab_token, try_update_paren_depth,
     validate_link_destination_text,
 };
 
@@ -597,12 +597,12 @@ fn bump_textual_link_def(p: &mut MarkdownParser) {
 
 fn inline_title_starts_after_whitespace_tokens(p: &mut MarkdownParser) -> bool {
     p.lookahead(|p| {
-        let mut saw_whitespace = false;
+        let mut saw_separator = false;
         while is_title_separator_token(p) {
             bump_link_def_separator(p);
-            saw_whitespace = true;
+            saw_separator = true;
         }
-        saw_whitespace && get_title_close_char(p).is_some()
+        saw_separator && get_title_close_char(p).is_some()
     })
 }
 
@@ -727,7 +727,7 @@ fn parse_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSc
     }
 
     while !p.at(EOF) && !p.at(NEWLINE) {
-        if is_whitespace_token(p) {
+        if is_space_or_tab_token(p) {
             break;
         }
         let text = p.cur_text();
@@ -761,7 +761,7 @@ fn parse_inline_link_destination_tokens(p: &mut MarkdownParser) -> DestinationSc
 }
 
 fn is_title_separator_token(p: &MarkdownParser) -> bool {
-    is_whitespace_token(p) || (p.at(NEWLINE) && !p.at_blank_line())
+    is_space_or_tab_token(p) || (p.at(NEWLINE) && !p.at_blank_line())
 }
 
 fn bump_link_def_separator(p: &mut MarkdownParser) {
diff --git a/crates/biome_markdown_parser/src/syntax/link_block.rs b/crates/biome_markdown_parser/src/syntax/link_block.rs
index 09fc2c7ed2f4..314fb6099547 100644
--- a/crates/biome_markdown_parser/src/syntax/link_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/link_block.rs
@@ -29,7 +29,7 @@ use crate::lexer::MarkdownLexContext;
 use crate::syntax::reference::normalize_reference_label;
 use crate::syntax::{
     LinkDestinationKind, MAX_BLOCK_PREFIX_INDENT, MAX_LINK_DESTINATION_PAREN_DEPTH,
-    ParenDepthResult, ends_with_unescaped_close, get_title_close_char, is_whitespace_token,
+    ParenDepthResult, ends_with_unescaped_close, get_title_close_char, is_space_or_tab_token,
     try_update_paren_depth, validate_link_destination_text,
 };
 
@@ -427,7 +427,7 @@ pub(crate) fn parse_link_block(p: &mut MarkdownParser) -> ParsedSyntax {
         // Check for title on next line - need to skip trailing whitespace first
         // Also validate that the title is complete and has no trailing content
         let has_valid_title_after_newline = p.lookahead(|p| {
-            while is_whitespace_token(p) {
+            while is_space_or_tab_token(p) {
                 p.bump_link_definition();
             }
             if p.at(NEWLINE) && !p.at_blank_line() {
@@ -488,14 +488,14 @@ fn parse_link_destination(p: &mut MarkdownParser) {
     let list = p.start();
 
     // Include optional whitespace before destination in the destination node.
-    while is_whitespace_token(p) {
+    while is_space_or_tab_token(p) {
         bump_textual_link_def(p);
     }
 
     // Per CommonMark §4.7, destination can be on the next line
     if p.at(NEWLINE) && !p.at_blank_line() {
         bump_textual_link_def(p);
-        while is_whitespace_token(p) {
+        while is_space_or_tab_token(p) {
             bump_textual_link_def(p);
         }
     }
@@ -514,7 +514,7 @@ fn parse_link_destination(p: &mut MarkdownParser) {
         let mut paren_depth: i32 = 0;
 
         while !p.at(EOF) && !p.at(NEWLINE) {
-            if is_whitespace_token(p) {
+            if is_space_or_tab_token(p) {
                 break; // Bare destination stops at first whitespace
             }
 
@@ -550,7 +550,7 @@ fn bump_textual_link_def(p: &mut MarkdownParser) {
 fn at_link_title(p: &mut MarkdownParser) -> bool {
     p.lookahead(|p| {
         // Skip whitespace before title
-        while is_whitespace_token(p) {
+        while is_space_or_tab_token(p) {
             p.bump_link_definition();
         }
         let text = p.cur_text();
@@ -589,7 +589,7 @@ fn parse_link_title_with_trailing_ws(p: &mut MarkdownParser) {
     let list = p.start();
 
     // Include trailing whitespace after destination
-    while is_whitespace_token(p) {
+    while is_space_or_tab_token(p) {
         bump_textual_link_def(p);
     }
 
@@ -599,7 +599,7 @@ fn parse_link_title_with_trailing_ws(p: &mut MarkdownParser) {
     }
 
     // Include leading whitespace on title line
-    while is_whitespace_token(p) {
+    while is_space_or_tab_token(p) {
         bump_textual_link_def(p);
     }
 
@@ -620,7 +620,7 @@ fn parse_link_title(p: &mut MarkdownParser) {
     let list = p.start();
 
     // Include optional filler whitespace before title
-    while is_whitespace_token(p) {
+    while is_space_or_tab_token(p) {
         bump_textual_link_def(p);
     }
 
@@ -653,7 +653,7 @@ fn parse_title_content(p: &mut MarkdownParser, close_char: Option<char>) {
     if is_complete {
         // Consume trailing whitespace after title (before newline)
         p.re_lex_link_definition();
-        while is_whitespace_token(p) {
+        while is_space_or_tab_token(p) {
             bump_textual_link_def(p);
         }
         return;
@@ -677,7 +677,7 @@ fn parse_title_content(p: &mut MarkdownParser, close_char: Option<char>) {
             bump_textual(p);
             // Consume trailing whitespace after title (before newline)
             p.re_lex_link_definition();
-            while is_whitespace_token(p) {
+            while is_space_or_tab_token(p) {
                 bump_textual_link_def(p);
             }
             break;
diff --git a/crates/biome_markdown_parser/src/syntax/mod.rs b/crates/biome_markdown_parser/src/syntax/mod.rs
index 9e63f95643c3..63bc59d0a858 100644
--- a/crates/biome_markdown_parser/src/syntax/mod.rs
+++ b/crates/biome_markdown_parser/src/syntax/mod.rs
@@ -62,14 +62,24 @@ use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block};
 
 use crate::MarkdownParser;
 
-/// Check if current token is whitespace (space or tab).
-pub(crate) fn is_whitespace_token(p: &MarkdownParser) -> bool {
+/// Check if current token consists only of ASCII spaces and/or tabs.
+///
+/// This intentionally does **not** use `Dispatch::WHS` from the lookup table,
+/// which classifies `\n`, `\r`, and other whitespace bytes. CommonMark §4.7
+/// and §6.3 define the separator between a link destination and an optional
+/// title as spaces/tabs only — newlines are significant structure there, not
+/// whitespace. The lexer uses the same narrow rule for link definitions.
+pub(crate) fn is_space_or_tab_token(p: &MarkdownParser) -> bool {
     let text = p.cur_text();
     !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t')
 }
 
-/// Get the closing character for a title based on current token.
-/// Returns `None` if not at a title start.
+/// Get the closing delimiter for a CommonMark link title (§4.7, §6.3).
+///
+/// A link title appears after the destination in link reference definitions
+/// (`[label]: url "title"`) and inline links (`[text](url "title")`). It may
+/// be enclosed in `"…"`, `'…'`, or `(…)`. Returns the expected closing
+/// character, or `None` if the current token does not start a title.
 pub(crate) fn get_title_close_char(p: &MarkdownParser) -> Option<char> {
     let text = p.cur_text();
     if text.starts_with('"') {

From cb4fe38d7aec50023d735b71efe51f141fade6e5 Mon Sep 17 00:00:00 2001
From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com>
Date: Tue, 7 Apr 2026 16:45:06 -1000
Subject: [PATCH 3/3] refactor(markdown_parser): use is_space_or_tab_token in
 link_block lookaheads

Replace three inline `text.chars().all(|c| c == ' ' || c == '\t')`
checks in skip_whitespace_tokens_tracked and skip_destination_tokens
with the shared is_space_or_tab_token helper to prevent drift.
---
 .../src/syntax/link_block.rs                  | 24 ++++++-------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/crates/biome_markdown_parser/src/syntax/link_block.rs b/crates/biome_markdown_parser/src/syntax/link_block.rs
index 314fb6099547..8f932942dbcb 100644
--- a/crates/biome_markdown_parser/src/syntax/link_block.rs
+++ b/crates/biome_markdown_parser/src/syntax/link_block.rs
@@ -207,14 +207,9 @@ fn skip_whitespace_tokens(p: &mut MarkdownParser) {
 /// Skip whitespace tokens (spaces/tabs) in lookahead and return whether any were skipped.
 fn skip_whitespace_tokens_tracked(p: &mut MarkdownParser) -> bool {
     let mut skipped = false;
-    while !p.at(EOF) && !p.at(NEWLINE) {
-        let text = p.cur_text();
-        if text.chars().all(|c| c == ' ' || c == '\t') && !text.is_empty() {
-            p.bump_link_definition();
-            skipped = true;
-        } else {
-            break;
-        }
+    while !p.at(EOF) && !p.at(NEWLINE) && is_space_or_tab_token(p) {
+        p.bump_link_definition();
+        skipped = true;
     }
     skipped
 }
@@ -239,13 +234,8 @@ enum DestinationResult {
 /// Skip destination tokens in lookahead. Returns the destination result.
 fn skip_destination_tokens(p: &mut MarkdownParser) -> DestinationResult {
     // Skip optional leading whitespace before destination
-    while !p.at(EOF) && !p.at(NEWLINE) {
-        let text = p.cur_text();
-        if text.chars().all(|c| c == ' ' || c == '\t') && !text.is_empty() {
-            p.bump_link_definition();
-        } else {
-            break;
-        }
+    while !p.at(EOF) && !p.at(NEWLINE) && is_space_or_tab_token(p) {
+        p.bump_link_definition();
     }
 
     if p.at(L_ANGLE) {
@@ -295,9 +285,8 @@ fn skip_destination_tokens(p: &mut MarkdownParser) -> DestinationResult {
         let mut pending_escape = false;
 
         while !p.at(EOF) && !p.at(NEWLINE) {
-            let text = p.cur_text();
             // Stop at whitespace
-            if text.chars().all(|c| c == ' ' || c == '\t') && !text.is_empty() {
+            if is_space_or_tab_token(p) {
                 if has_content {
                     saw_separator = true;
                 }
@@ -312,6 +301,7 @@ fn skip_destination_tokens(p: &mut MarkdownParser) -> DestinationResult {
                 break;
             }
 
+            let text = p.cur_text();
             if !validate_link_destination_text(text, LinkDestinationKind::Raw, &mut pending_escape)
             {
                 return DestinationResult::Invalid;