From d9380bb8653b355514df06d093bcab619a7ef358 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 10:33:09 +0000
Subject: [PATCH 01/14] Simplify tool-call dedup: drop hashlib, inline helpers

The duplicate tool-call detector only compares calls within a single
request from the same JSON parser, so dict key order is guaranteed
identical for identical calls (Python 3.7+ insertion-ordered dicts).

- Replace hashlib.md5(json.dumps(...)) with name + str(args)
- Inline _tool_call_key, _is_duplicate_call, _record_tool_call
  since each was a one-liner used once
- Remove unused hashlib import
---
 studio/backend/core/inference/llama_cpp.py | 23 ++++------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index c1f87ff936..4dffd67f24 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -10,7 +10,6 @@
 
 import atexit
 import contextlib
-import hashlib
 import json
 import re
 import struct
@@ -2181,22 +2180,6 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
         # identical call succeeded).
         _tool_call_history: list[tuple[str, bool]] = []  # (key, failed)
 
-        def _tool_call_key(name: str, args: dict) -> str:
-            raw = json.dumps({"t": name, "a": args}, sort_keys = True)
-            return hashlib.md5(raw.encode()).hexdigest()
-
-        def _is_duplicate_call(name: str, args: dict) -> bool:
-            """Block if the immediately previous call was identical and succeeded."""
-            if not _tool_call_history:
-                return False
-            key = _tool_call_key(name, args)
-            last_key, last_failed = _tool_call_history[-1]
-            return last_key == key and not last_failed
-
-        def _record_tool_call(name: str, args: dict, failed: bool) -> None:
-            key = _tool_call_key(name, args)
-            _tool_call_history.append((key, failed))
-
         for iteration in range(max_tool_iterations):
             if cancel_event is not None and cancel_event.is_set():
                 return
@@ -2692,7 +2675,9 @@ def _record_tool_call(name: str, args: dict, failed: bool) -> None:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    if _is_duplicate_call(tool_name, arguments):
+                    _tc_key = tool_name + str(arguments)
+                    _prev = _tool_call_history[-1] if _tool_call_history else None
+                    if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (
                             "You already made this exact call. "
                             "Do not repeat the same tool call. "
@@ -2734,7 +2719,7 @@ def _record_tool_call(name: str, args: dict, failed: bool) -> None:
                     _is_error = isinstance(result, str) and result.lstrip().startswith(
                         _error_prefixes
                     )
-                    _record_tool_call(tool_name, arguments, failed = _is_error)
+                    _tool_call_history.append((_tc_key, _is_error))
                     _result_content = result
                     if _is_error:
                         _result_content = (

From cc8fbbd559e8b595ca6e7d0af41fdd999855c373 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 10:35:54 +0000
Subject: [PATCH 02/14] Remove tool_calling_benchmark_results.md from repo

---
 .../tests/tool_calling_benchmark_results.md   | 62 -------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 studio/backend/tests/tool_calling_benchmark_results.md

diff --git a/studio/backend/tests/tool_calling_benchmark_results.md b/studio/backend/tests/tool_calling_benchmark_results.md
deleted file mode 100644
index c2b0687895..0000000000
--- a/studio/backend/tests/tool_calling_benchmark_results.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# GGUF Tool Calling Benchmark Results
-
-Prompt: "List and categorize all the songs that charted #3 on the Billboard Hot 100 in 2015."
-10 runs per configuration, web search + code execution + thinking enabled.
-GPU: NVIDIA B200, CUDA_VISIBLE_DEVICES=2.
-
-Ground truth: 4 songs peaked at #3 in 2015 -- "Love Me like You Do" (Ellie Goulding), "Earned It" (The Weeknd), "Watch Me" (Silento), "Drag Me Down" (One Direction).
-
-## Cartesian Grid: Model x Quant x KV Cache
-
-| Model | Quant | KV Cache | OK/10 | Avg Time | Avg Tools | XML Leaks | URL Fetch | Peak3 Avg | All 4/4 | Best Songs |
-|-------|-------|----------|-------|----------|-----------|-----------|-----------|-----------|---------|------------|
-| 4B | UD-Q4_K_XL | f16 | 10/10 | 9.8s | 3.5 | 0/10 | 4/10 | 0.8/4 | 2/10 | 9 |
-| 4B | UD-Q4_K_XL | bf16 | 10/10 | 10.6s | 4.5 | 0/10 | 4/10 | 0.4/4 | 1/10 | 5 |
-| 4B | Q8_0 | f16 | 10/10 | 4.9s | 2.4 | 0/10 | 8/10 | 0.4/4 | 1/10 | 5 |
-| 4B | Q8_0 | bf16 | 10/10 | 8.0s | 3.0 | 0/10 | 5/10 | 0.0/4 | 0/10 | 0 |
-| 9B | UD-Q4_K_XL | f16 | 10/10 | 6.7s | 2.0 | 0/10 | 5/10 | 0.0/4 | 0/10 | 3 |
-| 9B | UD-Q4_K_XL | bf16 | 9/10 | 49.5s | 2.4 | 0/10 | 5/10 | 0.0/4 | 0/10 | 1 |
-| 9B | Q8_0 | f16 | 10/10 | 7.4s | 2.5 | 0/10 | 5/10 | 0.0/4 | 0/10 | 2 |
-| 9B | Q8_0 | bf16 | 10/10 | 10.4s | 2.7 | 0/10 | 6/10 | 1.0/4 | 2/10 | 15 |
-| **27B** | **UD-Q4_K_XL** | **bf16** | **9/10** | **131.1s** | **13.8** | **0/10** | **7/10** | **2.7/4** | **6/10** | **27** |
-| 27B | UD-Q4_K_XL | f16 | 7/10 | 201.6s | 14.1 | 0/10 | 8/10 | 2.0/4 | 5/10 | 26 |
-| 27B | Q8_0 | f16 | 4/10 | 312.5s | 16.0 | 1/10 | 10/10 | 2.4/4 | 6/10 | 28 |
-| 27B | Q8_0 | bf16 | 5/10 | 258.4s | 16.5 | 2/10 | 10/10 | 0.9/4 | 1/10 | 27 |
-| 35B-A3B | UD-Q4_K_XL | f16 | 3/10 | 353.6s | 14.7 | 1/10 | 6/10 | 1.2/4 | 3/10 | 27 |
-| 35B-A3B | UD-Q4_K_XL | bf16 | 3/10 | 356.2s | 17.2 | 1/10 | 8/10 | 1.6/4 | 4/10 | 27 |
-| 35B-A3B | Q8_0 | f16 | 2/10 | 372.1s | 17.6 | 1/10 | 7/10 | 1.2/4 | 3/10 | 26 |
-| 35B-A3B | Q8_0 | bf16 | 6/10 | 267.7s | 17.5 | 1/10 | 8/10 | 2.4/4 | 6/10 | 27 |
-
-**Column definitions:**
-- **Peak3 Avg**: Average number of correct peak-#3 songs found per run (out of 4)
-- **All 4/4**: Runs where all 4 correct songs were identified
-- **Best Songs**: Maximum number of Billboard 2015 songs mentioned in any single run (out of 31 tracked)
-- **URL Fetch**: Runs where the model used web_search with `url` parameter to fetch full page content
-
-## Key Findings
-
-1. **27B UD-Q4_K_XL + bf16 KV is the sweet spot.** 6/10 runs found all 4 correct songs, 0 XML leaks, 131s average. Best balance of accuracy, speed, and reliability.
-
-2. **Larger models use tools more effectively.** 27B and 35B-A3B models used 13-17 tool calls per query (vs 2-4 for 4B/9B), performing multiple searches and URL fetches to find the answer.
-
-3. **27B Q8_0 had the highest raw accuracy (6/10 all-4/4) but lower reliability** -- only 4/10 OK runs due to timeouts on long agentic chains. The UD-Q4_K_XL quant is more practical.
-
-4. **4B models were fastest (5-10s) but least accurate.** They occasionally found all 4 songs (2/10 best case) when they happened to fetch the right Wikipedia page.
-
-5. **9B was surprisingly weaker than 4B on this task.** It used fewer tool calls and rarely extracted song data from fetched pages. The 9B model may need higher temperature or different prompting for this specific task type.
-
-6. **35B-A3B had reliability issues.** Most runs timed out or errored due to slow per-token generation with many tool iterations. When it completed (2-6/10 OK), accuracy was comparable to 27B.
-
-7. **bf16 KV cache had mixed effects.** For 27B it improved both speed (131s vs 202s) and accuracy (6/10 vs 5/10 all-4/4). For smaller models it had no consistent benefit.
-
-8. **XML leaks are nearly eliminated.** 0/10 for all 4B and 9B configs, and only 1-2/10 for the largest models (which generate much more text in complex agentic loops).
-
-## Before vs After (4B UD-Q4_K_XL, f16 KV)
-
-| Metric | Before Changes | After Changes |
-|--------|---------------|---------------|
-| XML leaks | 10/10 | 0/10 |
-| URL fetches | 0/10 | 4/10 |
-| Peak3 accuracy | 0.0/4 | 0.8/4 |
-| Runs with all 4 songs | 0/10 | 2/10 |
-| Avg time | 12.3s | 9.8s |

From dff2fd3c3fb88c7d2799499e4ee7fa4d1c3e7b44 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 10:55:53 +0000
Subject: [PATCH 03/14] Replace html2text with builtin HTML-to-Markdown
 converter

Drop the external html2text (GPL-3.0) dependency and its regex
fallback. Add _html_to_md.py (~190 lines, stdlib only) using
html.parser.HTMLParser that handles headings, links, bold/italic,
lists, tables, blockquotes, code blocks, and entity decoding.
Strips script/style/head tags entirely.
---
 studio/backend/core/inference/_html_to_md.py | 276 +++++++++++++++++++
 studio/backend/core/inference/tools.py       |  27 +-
 2 files changed, 280 insertions(+), 23 deletions(-)
 create mode 100644 studio/backend/core/inference/_html_to_md.py

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
new file mode 100644
index 0000000000..4489e46610
--- /dev/null
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -0,0 +1,276 @@
+# SPDX-License-Identifier: AGPL-3.0-only
+# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. See /studio/LICENSE.AGPL-3.0
+
+"""
+Minimal HTML-to-Markdown converter using only the standard library.
+
+Replaces the external ``html2text`` (GPL-3.0) dependency with a ~180-line
+``html.parser.HTMLParser`` subclass.  Covers headings, links, bold/italic,
+lists, tables, blockquotes, code blocks, and entity decoding.
+"""
+
+from __future__ import annotations
+
+import html
+import re
+from html.parser import HTMLParser
+
+__all__ = ["html_to_markdown"]
+
+_SKIP_TAGS = frozenset({"script", "style", "head", "noscript", "svg", "math"})
+_BLOCK_TAGS = frozenset({
+    "p", "div", "section", "article", "header", "footer", "main", "aside",
+    "nav", "figure", "figcaption", "details", "summary", "hr",
+})
+_HEADING_TAGS = frozenset({"h1", "h2", "h3", "h4", "h5", "h6"})
+_INLINE_EMPHASIS = {"strong": "**", "b": "**", "em": "*", "i": "*"}
+
+
+class _MarkdownRenderer(HTMLParser):
+    """HTMLParser subclass that emits Markdown tokens into a list."""
+
+    def __init__(self):
+        super().__init__(convert_charrefs=False)
+        self._out: list[str] = []
+        self._skip_depth: int = 0
+
+        # Link state
+        self._link_href: str | None = None
+        self._link_text_parts: list[str] = []
+        self._in_link: bool = False
+
+        # List state
+        self._list_stack: list[str] = []  # "ul" or "ol"
+        self._ol_counter: list[int] = []
+
+        # Table state
+        self._in_table: bool = False
+        self._current_row: list[str] = []
+        self._cell_parts: list[str] = []
+        self._in_cell: bool = False
+        self._header_row_done: bool = False
+        self._is_header_cell: bool = False
+
+        # Pre/code state
+        self._in_pre: bool = False
+        self._pre_parts: list[str] = []
+
+        # Blockquote depth
+        self._bq_depth: int = 0
+
+    # ------------------------------------------------------------------
+    def _emit(self, text: str) -> None:
+        if self._in_link:
+            self._link_text_parts.append(text)
+        elif self._in_cell:
+            self._cell_parts.append(text)
+        elif self._in_pre:
+            self._pre_parts.append(text)
+        else:
+            self._out.append(text)
+
+    # ------------------------------------------------------------------
+    # Tag handlers
+    # ------------------------------------------------------------------
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        tag = tag.lower()
+
+        if tag in _SKIP_TAGS:
+            self._skip_depth += 1
+            return
+        if self._skip_depth:
+            return
+
+        attr_dict = dict(attrs)
+
+        if tag in _HEADING_TAGS:
+            level = int(tag[1])
+            self._emit("\n\n" + "#" * level + " ")
+
+        elif tag == "a":
+            self._link_href = attr_dict.get("href")
+            self._link_text_parts = []
+            self._in_link = True
+
+        elif tag in _INLINE_EMPHASIS:
+            self._emit(_INLINE_EMPHASIS[tag])
+
+        elif tag == "br":
+            self._emit("\n")
+
+        elif tag in _BLOCK_TAGS:
+            self._emit("\n\n")
+
+        elif tag == "hr":
+            self._emit("\n\n---\n\n")
+
+        elif tag == "blockquote":
+            self._bq_depth += 1
+            self._emit("\n\n" + "> " * self._bq_depth)
+
+        elif tag == "ul":
+            self._list_stack.append("ul")
+            self._emit("\n")
+
+        elif tag == "ol":
+            self._list_stack.append("ol")
+            self._ol_counter.append(0)
+            self._emit("\n")
+
+        elif tag == "li":
+            indent = "  " * max(0, len(self._list_stack) - 1)
+            if self._list_stack and self._list_stack[-1] == "ol":
+                self._ol_counter[-1] += 1
+                self._emit(f"\n{indent}{self._ol_counter[-1]}. ")
+            else:
+                self._emit(f"\n{indent}* ")
+
+        elif tag == "pre":
+            self._in_pre = True
+            self._pre_parts = []
+            self._emit("\n\n```\n")
+
+        elif tag == "code" and not self._in_pre:
+            self._emit("`")
+
+        elif tag == "table":
+            self._in_table = True
+            self._header_row_done = False
+            self._emit("\n\n")
+
+        elif tag == "tr":
+            self._current_row = []
+
+        elif tag in ("th", "td"):
+            self._cell_parts = []
+            self._in_cell = True
+            self._is_header_cell = tag == "th"
+
+        elif tag == "img":
+            alt = attr_dict.get("alt", "")
+            if alt:
+                self._emit(alt)
+
+    def handle_endtag(self, tag: str) -> None:
+        tag = tag.lower()
+
+        if tag in _SKIP_TAGS:
+            self._skip_depth = max(0, self._skip_depth - 1)
+            return
+        if self._skip_depth:
+            return
+
+        if tag in _HEADING_TAGS:
+            self._emit("\n\n")
+
+        elif tag == "a":
+            text = "".join(self._link_text_parts).strip()
+            href = self._link_href or ""
+            self._in_link = False
+            if href and text:
+                self._emit(f"[{text}]({href})")
+            elif text:
+                self._emit(text)
+
+        elif tag in _INLINE_EMPHASIS:
+            self._emit(_INLINE_EMPHASIS[tag])
+
+        elif tag in _BLOCK_TAGS:
+            self._emit("\n\n")
+
+        elif tag == "blockquote":
+            self._bq_depth = max(0, self._bq_depth - 1)
+            self._emit("\n\n")
+
+        elif tag == "ul":
+            if self._list_stack and self._list_stack[-1] == "ul":
+                self._list_stack.pop()
+            self._emit("\n")
+
+        elif tag == "ol":
+            if self._list_stack and self._list_stack[-1] == "ol":
+                self._list_stack.pop()
+                if self._ol_counter:
+                    self._ol_counter.pop()
+            self._emit("\n")
+
+        elif tag == "pre":
+            raw = "".join(self._pre_parts)
+            self._out.append(raw)
+            self._in_pre = False
+            self._emit("\n```\n\n")
+
+        elif tag == "code" and not self._in_pre:
+            self._emit("`")
+
+        elif tag in ("th", "td"):
+            self._in_cell = False
+            cell_text = "".join(self._cell_parts).strip()
+            self._current_row.append(cell_text)
+
+        elif tag == "tr":
+            if self._current_row:
+                line = "| " + " | ".join(self._current_row) + " |"
+                self._emit(line + "\n")
+                if self._is_header_cell and not self._header_row_done:
+                    sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
+                    self._emit(sep + "\n")
+                    self._header_row_done = True
+            self._current_row = []
+            self._is_header_cell = False
+
+        elif tag == "table":
+            self._in_table = False
+            self._emit("\n")
+
+    # ------------------------------------------------------------------
+    # Text / entity handlers
+    # ------------------------------------------------------------------
+    def handle_data(self, data: str) -> None:
+        if self._skip_depth:
+            return
+        if self._in_pre:
+            self._pre_parts.append(data)
+            return
+        # Collapse whitespace for non-pre content
+        text = re.sub(r"[ \t]+", " ", data)
+        self._emit(text)
+
+    def handle_entityref(self, name: str) -> None:
+        if self._skip_depth:
+            return
+        self._emit(html.unescape(f"&{name};"))
+
+    def handle_charref(self, name: str) -> None:
+        if self._skip_depth:
+            return
+        self._emit(html.unescape(f"&#{name};"))
+
+
+# ------------------------------------------------------------------
+# Post-processing
+# ------------------------------------------------------------------
+def _cleanup(text: str) -> str:
+    """Normalize whitespace and blank lines in the final output."""
+    # Collapse runs of 3+ newlines into 2
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    # Remove trailing spaces on each line
+    text = re.sub(r" +$", "", text, flags=re.MULTILINE)
+    return text.strip()
+
+
+# ------------------------------------------------------------------
+# Public API
+# ------------------------------------------------------------------
+def html_to_markdown(source_html: str) -> str:
+    """Convert an HTML string to Markdown.
+
+    Handles headings, links, bold/italic, lists (ordered and unordered),
+    tables, blockquotes, code blocks, and HTML entities.  ``<script>``,
+    ``<style>``, and ``<head>`` sections are stripped entirely.
+    """
+    renderer = _MarkdownRenderer()
+    renderer.feed(source_html)
+    renderer.close()
+    raw = "".join(renderer._out)
+    return _cleanup(raw)
diff --git a/studio/backend/core/inference/tools.py b/studio/backend/core/inference/tools.py
index 65302fe2f3..2ac8f76322 100644
--- a/studio/backend/core/inference/tools.py
+++ b/studio/backend/core/inference/tools.py
@@ -201,7 +201,6 @@ def _fetch_page_text(
     Blocks private/loopback/link-local targets (SSRF protection) and caps
     the download size to avoid unbounded memory usage.
     """
-    import re as _re
     from urllib.parse import urlparse
 
     parsed = urlparse(url)
@@ -282,28 +281,10 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
     except Exception as e:
         return f"Failed to fetch URL: {e}"
 
-    # Convert HTML to text -- prefer html2text for clean markdown output
-    try:
-        import html2text as _h2t
-
-        converter = _h2t.HTML2Text()
-        converter.ignore_links = False
-        converter.ignore_images = True
-        converter.body_width = 0  # no wrapping
-        text = converter.handle(raw_html).strip()
-    except ImportError:
-        # Fallback: regex-based stripping
-        text = _re.sub(
-            r"<script[^>]*>.*?</script[^>]*>",
-            "",
-            raw_html,
-            flags = _re.DOTALL | _re.IGNORECASE,
-        )
-        text = _re.sub(
-            r"<style[^>]*>.*?</style[^>]*>", "", text, flags = _re.DOTALL | _re.IGNORECASE
-        )
-        text = _re.sub(r"<[^>]+>", " ", text)
-        text = _re.sub(r"\s+", " ", text).strip()
+    # Convert HTML to Markdown using the builtin converter (no external deps)
+    from ._html_to_md import html_to_markdown
+
+    text = html_to_markdown(raw_html)
 
     if not text:
         return "(page returned no readable text)"

From 40cd78307b0aafc296b0d3a2d75122bbe3f457dd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 31 Mar 2026 10:58:00 +0000
Subject: [PATCH 04/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 studio/backend/core/inference/_html_to_md.py | 26 +++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index 4489e46610..5545faa7c2 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -18,10 +18,24 @@
 __all__ = ["html_to_markdown"]
 
 _SKIP_TAGS = frozenset({"script", "style", "head", "noscript", "svg", "math"})
-_BLOCK_TAGS = frozenset({
-    "p", "div", "section", "article", "header", "footer", "main", "aside",
-    "nav", "figure", "figcaption", "details", "summary", "hr",
-})
+_BLOCK_TAGS = frozenset(
+    {
+        "p",
+        "div",
+        "section",
+        "article",
+        "header",
+        "footer",
+        "main",
+        "aside",
+        "nav",
+        "figure",
+        "figcaption",
+        "details",
+        "summary",
+        "hr",
+    }
+)
 _HEADING_TAGS = frozenset({"h1", "h2", "h3", "h4", "h5", "h6"})
 _INLINE_EMPHASIS = {"strong": "**", "b": "**", "em": "*", "i": "*"}
 
@@ -30,7 +44,7 @@ class _MarkdownRenderer(HTMLParser):
     """HTMLParser subclass that emits Markdown tokens into a list."""
 
     def __init__(self):
-        super().__init__(convert_charrefs=False)
+        super().__init__(convert_charrefs = False)
         self._out: list[str] = []
         self._skip_depth: int = 0
 
@@ -255,7 +269,7 @@ def _cleanup(text: str) -> str:
     # Collapse runs of 3+ newlines into 2
     text = re.sub(r"\n{3,}", "\n\n", text)
     # Remove trailing spaces on each line
-    text = re.sub(r" +$", "", text, flags=re.MULTILINE)
+    text = re.sub(r" +$", "", text, flags = re.MULTILINE)
     return text.strip()
 
 

From b304264118a6ac3642cc3c2c5b695b31d39ce77c Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 10:59:42 +0000
Subject: [PATCH 05/14] Use json.dumps(sort_keys=True) for tool-call dedup key

str(dict) is sensitive to insertion order, so semantically identical
calls with different key ordering would bypass duplicate detection.
Switch to json.dumps with sort_keys=True for a canonical representation.
---
 studio/backend/core/inference/llama_cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 4dffd67f24..41f2ae9565 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2675,7 +2675,7 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    _tc_key = tool_name + str(arguments)
+                    _tc_key = tool_name + json.dumps(arguments, sort_keys=True)
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (

From 70341be1b341d8f797bedaa424e53588971da1e0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 31 Mar 2026 11:00:53 +0000
Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 studio/backend/core/inference/llama_cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 41f2ae9565..306206e610 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2675,7 +2675,7 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    _tc_key = tool_name + json.dumps(arguments, sort_keys=True)
+                    _tc_key = tool_name + json.dumps(arguments, sort_keys = True)
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (

From 2046c5c6e0e45d5b18d2acb647773662ee6c8479 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 11:04:32 +0000
Subject: [PATCH 07/14] Revert dedup key to str(arguments)

json.dumps(sort_keys=True) is unnecessary here -- the arguments dict
always comes from the same JSON parser within a single request, so
key insertion order is deterministic (Python 3.7+).  str() is faster
and sufficient for consecutive-call dedup.
---
 studio/backend/core/inference/llama_cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 306206e610..4dffd67f24 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2675,7 +2675,7 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    _tc_key = tool_name + json.dumps(arguments, sort_keys = True)
+                    _tc_key = tool_name + str(arguments)
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (

From c99f828372eb8dba52e2d87c68fec6ae3fe0b320 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 11:09:17 +0000
Subject: [PATCH 08/14] Address review comments on _html_to_md.py

- Remove "hr" from _BLOCK_TAGS so the dedicated hr handler is reachable
- Prefix all newlines with ">" inside blockquotes (multi-line support)
- Emit full ![alt](url) for images instead of alt text only
- Replace newlines with spaces inside table cells
- Track header cells per-row (_row_has_th) instead of last-cell-only
- Strip trailing tabs in addition to spaces in cleanup regex
---
 studio/backend/core/inference/_html_to_md.py | 23 ++++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index 5545faa7c2..756337e56d 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -33,7 +33,6 @@
         "figcaption",
         "details",
         "summary",
-        "hr",
     }
 )
 _HEADING_TAGS = frozenset({"h1", "h2", "h3", "h4", "h5", "h6"})
@@ -63,7 +62,7 @@ def __init__(self):
         self._cell_parts: list[str] = []
         self._in_cell: bool = False
         self._header_row_done: bool = False
-        self._is_header_cell: bool = False
+        self._row_has_th: bool = False
 
         # Pre/code state
         self._in_pre: bool = False
@@ -81,6 +80,10 @@ def _emit(self, text: str) -> None:
         elif self._in_pre:
             self._pre_parts.append(text)
         else:
+            # Prefix newlines with blockquote markers when inside <blockquote>
+            if self._bq_depth and "\n" in text:
+                prefix = "> " * self._bq_depth
+                text = text.replace("\n", "\n" + prefix)
             self._out.append(text)
 
     # ------------------------------------------------------------------
@@ -158,12 +161,14 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
         elif tag in ("th", "td"):
             self._cell_parts = []
             self._in_cell = True
-            self._is_header_cell = tag == "th"
+            if tag == "th":
+                self._row_has_th = True
 
         elif tag == "img":
             alt = attr_dict.get("alt", "")
-            if alt:
-                self._emit(alt)
+            src = attr_dict.get("src", "")
+            if alt or src:
+                self._emit(f"![{alt}]({src})")
 
     def handle_endtag(self, tag: str) -> None:
         tag = tag.lower()
@@ -219,19 +224,19 @@ def handle_endtag(self, tag: str) -> None:
 
         elif tag in ("th", "td"):
             self._in_cell = False
-            cell_text = "".join(self._cell_parts).strip()
+            cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
             self._current_row.append(cell_text)
 
         elif tag == "tr":
             if self._current_row:
                 line = "| " + " | ".join(self._current_row) + " |"
                 self._emit(line + "\n")
-                if self._is_header_cell and not self._header_row_done:
+                if self._row_has_th and not self._header_row_done:
                     sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
                     self._emit(sep + "\n")
                     self._header_row_done = True
             self._current_row = []
-            self._is_header_cell = False
+            self._row_has_th = False
 
         elif tag == "table":
             self._in_table = False
@@ -269,7 +274,7 @@ def _cleanup(text: str) -> str:
     # Collapse runs of 3+ newlines into 2
     text = re.sub(r"\n{3,}", "\n\n", text)
     # Remove trailing spaces on each line
-    text = re.sub(r" +$", "", text, flags = re.MULTILINE)
+    text = re.sub(r"[ \t]+$", "", text, flags = re.MULTILINE)
     return text.strip()
 
 

From 2c27165924223357a5879a5194b65cdbcc5ccaa5 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 11:38:07 +0000
Subject: [PATCH 09/14] Fix blockquote rendering, truncated-HTML buffer flush,
 and dedup key canonicalization

_html_to_md.py:
- Rewrite blockquote handling with stack-based buffer approach so nested
  blockquotes, pre blocks inside blockquotes, and multi-paragraph quotes
  all render correctly with proper "> " prefix on every line.
- Add flush_pending() to recover content from truncated HTML where closing
  tags are missing (common when _fetch_page_text caps the download size).
  Flushes open <a>, <td>, <pre>, and blockquote buffers.
- Skip <img> tags to match prior html2text ignore_images=True behavior
  and avoid data-URI amplification consuming the output budget.
- Collapse all whitespace (including newlines) in non-pre content per
  standard HTML whitespace rules: \s+ -> single space.
- Escape pipe characters in table cell content to prevent column breakage.
- Emit separator row after the first row for tables without <th> headers.
- Guard against IndexError on _ol_counter for orphan <li> elements.
- Normalize CRLF line endings before parsing.

llama_cpp.py:
- Restore canonical dedup key with json.dumps(sort_keys=True) so that
  semantically identical tool calls with different JSON key order are
  correctly detected as duplicates.
---
 studio/backend/core/inference/_html_to_md.py | 121 +++++++++++++++----
 studio/backend/core/inference/llama_cpp.py   |   4 +-
 2 files changed, 103 insertions(+), 22 deletions(-)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index 756337e56d..f7237b64f5 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -4,7 +4,7 @@
 """
 Minimal HTML-to-Markdown converter using only the standard library.
 
-Replaces the external ``html2text`` (GPL-3.0) dependency with a ~180-line
+Replaces the external ``html2text`` (GPL-3.0) dependency with a ~220-line
 ``html.parser.HTMLParser`` subclass.  Covers headings, links, bold/italic,
 lists, tables, blockquotes, code blocks, and entity decoding.
 """
@@ -63,13 +63,17 @@ def __init__(self):
         self._in_cell: bool = False
         self._header_row_done: bool = False
         self._row_has_th: bool = False
+        self._is_first_row: bool = False
 
         # Pre/code state
         self._in_pre: bool = False
         self._pre_parts: list[str] = []
 
-        # Blockquote depth
+        # Blockquote state -- stack of output buffers so nested
+        # blockquotes each collect their own content and get prefixed
+        # with the correct number of ">" markers on close.
         self._bq_depth: int = 0
+        self._bq_stack: list[list[str]] = []
 
     # ------------------------------------------------------------------
     def _emit(self, text: str) -> None:
@@ -79,13 +83,24 @@ def _emit(self, text: str) -> None:
             self._cell_parts.append(text)
         elif self._in_pre:
             self._pre_parts.append(text)
+        elif self._bq_stack:
+            self._bq_stack[-1].append(text)
         else:
-            # Prefix newlines with blockquote markers when inside <blockquote>
-            if self._bq_depth and "\n" in text:
-                prefix = "> " * self._bq_depth
-                text = text.replace("\n", "\n" + prefix)
             self._out.append(text)
 
+    # ------------------------------------------------------------------
+    def _prefix_blockquote(self, content: str) -> str:
+        """Prefix every line of *content* with ``> ``."""
+        content = re.sub(r"\n{3,}", "\n\n", content).strip()
+        lines = content.split("\n")
+        prefixed: list[str] = []
+        for line in lines:
+            if line.strip():
+                prefixed.append("> " + line)
+            else:
+                prefixed.append(">")
+        return "\n".join(prefixed)
+
     # ------------------------------------------------------------------
     # Tag handlers
     # ------------------------------------------------------------------
@@ -122,8 +137,9 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
             self._emit("\n\n---\n\n")
 
         elif tag == "blockquote":
+            self._emit("\n\n")
             self._bq_depth += 1
-            self._emit("\n\n" + "> " * self._bq_depth)
+            self._bq_stack.append([])
 
         elif tag == "ul":
             self._list_stack.append("ul")
@@ -137,15 +153,17 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
         elif tag == "li":
             indent = "  " * max(0, len(self._list_stack) - 1)
             if self._list_stack and self._list_stack[-1] == "ol":
-                self._ol_counter[-1] += 1
-                self._emit(f"\n{indent}{self._ol_counter[-1]}. ")
+                if self._ol_counter:
+                    self._ol_counter[-1] += 1
+                    self._emit(f"\n{indent}{self._ol_counter[-1]}. ")
+                else:
+                    self._emit(f"\n{indent}1. ")
             else:
                 self._emit(f"\n{indent}* ")
 
         elif tag == "pre":
-            self._in_pre = True
             self._pre_parts = []
-            self._emit("\n\n```\n")
+            self._in_pre = True
 
         elif tag == "code" and not self._in_pre:
             self._emit("`")
@@ -153,6 +171,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
         elif tag == "table":
             self._in_table = True
             self._header_row_done = False
+            self._is_first_row = True
             self._emit("\n\n")
 
         elif tag == "tr":
@@ -165,10 +184,9 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
                 self._row_has_th = True
 
         elif tag == "img":
-            alt = attr_dict.get("alt", "")
-            src = attr_dict.get("src", "")
-            if alt or src:
-                self._emit(f"![{alt}]({src})")
+            # Skip images -- keeps fetched page text focused on readable
+            # content and avoids data-URI amplification.
+            return
 
     def handle_endtag(self, tag: str) -> None:
         tag = tag.lower()
@@ -199,7 +217,10 @@ def handle_endtag(self, tag: str) -> None:
 
         elif tag == "blockquote":
             self._bq_depth = max(0, self._bq_depth - 1)
-            self._emit("\n\n")
+            if self._bq_stack:
+                content = "".join(self._bq_stack.pop())
+                prefixed = self._prefix_blockquote(content)
+                self._emit("\n\n" + prefixed + "\n\n")
 
         elif tag == "ul":
             if self._list_stack and self._list_stack[-1] == "ul":
@@ -215,9 +236,9 @@ def handle_endtag(self, tag: str) -> None:
 
         elif tag == "pre":
             raw = "".join(self._pre_parts)
-            self._out.append(raw)
             self._in_pre = False
-            self._emit("\n```\n\n")
+            block = "```\n" + raw + "\n```"
+            self._emit("\n\n" + block + "\n\n")
 
         elif tag == "code" and not self._in_pre:
             self._emit("`")
@@ -225,6 +246,8 @@ def handle_endtag(self, tag: str) -> None:
         elif tag in ("th", "td"):
             self._in_cell = False
             cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
+            # Escape pipe characters so they do not break table columns
+            cell_text = cell_text.replace("|", "\\|")
             self._current_row.append(cell_text)
 
         elif tag == "tr":
@@ -235,6 +258,12 @@ def handle_endtag(self, tag: str) -> None:
                     sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
                     self._emit(sep + "\n")
                     self._header_row_done = True
+                elif self._is_first_row and not self._header_row_done:
+                    # Tables without <th> still need a separator for valid Markdown
+                    sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
+                    self._emit(sep + "\n")
+                    self._header_row_done = True
+                self._is_first_row = False
             self._current_row = []
             self._row_has_th = False
 
@@ -251,8 +280,8 @@ def handle_data(self, data: str) -> None:
         if self._in_pre:
             self._pre_parts.append(data)
             return
-        # Collapse whitespace for non-pre content
-        text = re.sub(r"[ \t]+", " ", data)
+        # Collapse all whitespace (including newlines) per HTML rules
+        text = re.sub(r"\s+", " ", data)
         self._emit(text)
 
     def handle_entityref(self, name: str) -> None:
@@ -265,6 +294,53 @@ def handle_charref(self, name: str) -> None:
             return
         self._emit(html.unescape(f"&#{name};"))
 
+    # ------------------------------------------------------------------
+    # Flush pending buffers (handles truncated HTML from capped fetches)
+    # ------------------------------------------------------------------
+    def flush_pending(self) -> None:
+        """Flush any open side-buffers into ``_out``.
+
+        Called after ``close()`` to recover content from truncated HTML
+        where closing tags were never seen (common when ``_fetch_page_text``
+        caps the download by byte count).
+        """
+        # Flush innermost buffers first so their content propagates outward.
+
+        if self._in_link:
+            text = "".join(self._link_text_parts).strip()
+            href = self._link_href or ""
+            self._in_link = False
+            if href and text:
+                self._emit(f"[{text}]({href})")
+            elif text:
+                self._emit(text)
+
+        if self._in_cell:
+            self._in_cell = False
+            cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
+            cell_text = cell_text.replace("|", "\\|")
+            self._current_row.append(cell_text)
+
+        if self._current_row:
+            line = "| " + " | ".join(self._current_row) + " |"
+            self._emit(line + "\n")
+            self._current_row = []
+
+        if self._in_pre:
+            raw = "".join(self._pre_parts)
+            self._in_pre = False
+            block = "```\n" + raw + "\n```"
+            self._emit("\n\n" + block + "\n\n")
+
+        # Flatten any open blockquote buffers (innermost first)
+        while self._bq_stack:
+            content = "".join(self._bq_stack.pop())
+            prefixed = self._prefix_blockquote(content)
+            if self._bq_stack:
+                self._bq_stack[-1].append("\n\n" + prefixed + "\n\n")
+            else:
+                self._out.append("\n\n" + prefixed + "\n\n")
+
 
 # ------------------------------------------------------------------
 # Post-processing
@@ -273,7 +349,7 @@ def _cleanup(text: str) -> str:
     """Normalize whitespace and blank lines in the final output."""
     # Collapse runs of 3+ newlines into 2
     text = re.sub(r"\n{3,}", "\n\n", text)
-    # Remove trailing spaces on each line
+    # Remove trailing spaces/tabs on each line
     text = re.sub(r"[ \t]+$", "", text, flags = re.MULTILINE)
     return text.strip()
 
@@ -288,8 +364,11 @@ def html_to_markdown(source_html: str) -> str:
     tables, blockquotes, code blocks, and HTML entities.  ``<script>``,
     ``<style>``, and ``<head>`` sections are stripped entirely.
     """
+    # Normalize line endings before parsing
+    source_html = source_html.replace("\r\n", "\n").replace("\r", "\n")
     renderer = _MarkdownRenderer()
     renderer.feed(source_html)
     renderer.close()
+    renderer.flush_pending()
     raw = "".join(renderer._out)
     return _cleanup(raw)
diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 4dffd67f24..50b9aa4bf6 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2675,7 +2675,9 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    _tc_key = tool_name + str(arguments)
+                    _tc_key = tool_name + json.dumps(
+                        arguments, sort_keys = True, separators = (",", ":"),
+                    )
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (

From f77909161aa02ddde85e414aca910038089aa6b9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 31 Mar 2026 11:38:20 +0000
Subject: [PATCH 10/14] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 studio/backend/core/inference/llama_cpp.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 50b9aa4bf6..8de8afa663 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2676,7 +2676,9 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
 
                     # ── Duplicate call detection ──────────────
                     _tc_key = tool_name + json.dumps(
-                        arguments, sort_keys = True, separators = (",", ":"),
+                        arguments,
+                        sort_keys = True,
+                        separators = (",", ":"),
                     )
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:

From ad189024eb85a203ff19f386c8d11e55c4525279 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 11:57:30 +0000
Subject: [PATCH 11/14] Fix table optional end tags, inline code whitespace,
 and link text normalization

_html_to_md.py:
- Extract _finish_cell() and _finish_row() helpers to handle HTML tables
  that omit optional </td>, </th>, or </tr> end tags. This is valid HTML
  and common on real web pages -- previously the parser would silently
  drop earlier cells and entire rows.
- Call _finish_cell()/_finish_row() from handle_starttag for <tr>/<td>/<th>,
  handle_endtag for </tr>/<td>/<th>/<table>, and flush_pending() so all
  three paths (normal close, implicit close, truncated HTML) use the same
  row-finalization logic including header separator emission.
- Add _in_inline_code flag so handle_data() preserves literal whitespace
  inside <code> spans instead of collapsing it. Source like
  <code>pip  install   unsloth</code> now correctly renders as
  `pip  install   unsloth` rather than `pip install unsloth`.
- Extract _finish_link() helper that normalizes accumulated link text with
  \s+ -> single space before building the Markdown link. Prevents block-
  level content inside <a> tags (e.g. <a><div>one</div><div>two</div></a>)
  from producing multiline [one\n\ntwo](href) link labels.
- Empty blockquotes now produce no output instead of a stray ">".
- Remove unused _bq_depth field (all routing uses _bq_stack).
- Flush open cells and rows in handle_endtag("table") for robustness.
---
 studio/backend/core/inference/_html_to_md.py | 121 +++++++++++--------
 1 file changed, 70 insertions(+), 51 deletions(-)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index f7237b64f5..5b96efa942 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -4,7 +4,7 @@
 """
 Minimal HTML-to-Markdown converter using only the standard library.
 
-Replaces the external ``html2text`` (GPL-3.0) dependency with a ~220-line
+Replaces the external ``html2text`` (GPL-3.0) dependency with a ~250-line
 ``html.parser.HTMLParser`` subclass.  Covers headings, links, bold/italic,
 lists, tables, blockquotes, code blocks, and entity decoding.
 """
@@ -68,11 +68,11 @@ def __init__(self):
         # Pre/code state
         self._in_pre: bool = False
         self._pre_parts: list[str] = []
+        self._in_inline_code: bool = False
 
         # Blockquote state -- stack of output buffers so nested
         # blockquotes each collect their own content and get prefixed
         # with the correct number of ">" markers on close.
-        self._bq_depth: int = 0
         self._bq_stack: list[list[str]] = []
 
     # ------------------------------------------------------------------
@@ -92,6 +92,8 @@ def _emit(self, text: str) -> None:
     def _prefix_blockquote(self, content: str) -> str:
         """Prefix every line of *content* with ``> ``."""
         content = re.sub(r"\n{3,}", "\n\n", content).strip()
+        if not content:
+            return ""
         lines = content.split("\n")
         prefixed: list[str] = []
         for line in lines:
@@ -101,6 +103,45 @@ def _prefix_blockquote(self, content: str) -> str:
                 prefixed.append(">")
         return "\n".join(prefixed)
 
+    # ------------------------------------------------------------------
+    # Table helpers -- flush open cells and rows so that HTML with
+    # omitted optional end tags (</td>, </tr>) does not lose data.
+    # ------------------------------------------------------------------
+    def _finish_cell(self) -> None:
+        if not self._in_cell:
+            return
+        self._in_cell = False
+        cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
+        cell_text = cell_text.replace("|", "\\|")
+        self._current_row.append(cell_text)
+        self._cell_parts = []
+
+    def _finish_row(self) -> None:
+        if not self._current_row:
+            return
+        line = "| " + " | ".join(self._current_row) + " |"
+        self._emit(line + "\n")
+        if not self._header_row_done and (self._row_has_th or self._is_first_row):
+            sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
+            self._emit(sep + "\n")
+            self._header_row_done = True
+        self._is_first_row = False
+        self._current_row = []
+        self._row_has_th = False
+
+    # ------------------------------------------------------------------
+    # Link text helper -- normalize whitespace so block-level content
+    # inside an <a> does not produce multiline Markdown link labels.
+    # ------------------------------------------------------------------
+    def _finish_link(self) -> None:
+        text = re.sub(r"\s+", " ", "".join(self._link_text_parts)).strip()
+        href = self._link_href or ""
+        self._in_link = False
+        if href and text:
+            self._emit(f"[{text}]({href})")
+        elif text:
+            self._emit(text)
+
     # ------------------------------------------------------------------
     # Tag handlers
     # ------------------------------------------------------------------
@@ -138,7 +179,6 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
 
         elif tag == "blockquote":
             self._emit("\n\n")
-            self._bq_depth += 1
             self._bq_stack.append([])
 
         elif tag == "ul":
@@ -166,6 +206,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
             self._in_pre = True
 
         elif tag == "code" and not self._in_pre:
+            self._in_inline_code = True
             self._emit("`")
 
         elif tag == "table":
@@ -175,9 +216,14 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
             self._emit("\n\n")
 
         elif tag == "tr":
-            self._current_row = []
+            # Flush any open cell/row from a previous row that may
+            # have omitted its optional </td> or </tr> end tags.
+            self._finish_cell()
+            self._finish_row()
 
         elif tag in ("th", "td"):
+            # Flush any open cell (handles omitted </td>/<th>)
+            self._finish_cell()
             self._cell_parts = []
             self._in_cell = True
             if tag == "th":
@@ -201,13 +247,7 @@ def handle_endtag(self, tag: str) -> None:
             self._emit("\n\n")
 
         elif tag == "a":
-            text = "".join(self._link_text_parts).strip()
-            href = self._link_href or ""
-            self._in_link = False
-            if href and text:
-                self._emit(f"[{text}]({href})")
-            elif text:
-                self._emit(text)
+            self._finish_link()
 
         elif tag in _INLINE_EMPHASIS:
             self._emit(_INLINE_EMPHASIS[tag])
@@ -216,11 +256,11 @@ def handle_endtag(self, tag: str) -> None:
             self._emit("\n\n")
 
         elif tag == "blockquote":
-            self._bq_depth = max(0, self._bq_depth - 1)
             if self._bq_stack:
                 content = "".join(self._bq_stack.pop())
                 prefixed = self._prefix_blockquote(content)
-                self._emit("\n\n" + prefixed + "\n\n")
+                if prefixed:
+                    self._emit("\n\n" + prefixed + "\n\n")
 
         elif tag == "ul":
             if self._list_stack and self._list_stack[-1] == "ul":
@@ -241,33 +281,20 @@ def handle_endtag(self, tag: str) -> None:
             self._emit("\n\n" + block + "\n\n")
 
         elif tag == "code" and not self._in_pre:
+            self._in_inline_code = False
             self._emit("`")
 
         elif tag in ("th", "td"):
-            self._in_cell = False
-            cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
-            # Escape pipe characters so they do not break table columns
-            cell_text = cell_text.replace("|", "\\|")
-            self._current_row.append(cell_text)
+            self._finish_cell()
 
         elif tag == "tr":
-            if self._current_row:
-                line = "| " + " | ".join(self._current_row) + " |"
-                self._emit(line + "\n")
-                if self._row_has_th and not self._header_row_done:
-                    sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
-                    self._emit(sep + "\n")
-                    self._header_row_done = True
-                elif self._is_first_row and not self._header_row_done:
-                    # Tables without <th> still need a separator for valid Markdown
-                    sep = "| " + " | ".join("---" for _ in self._current_row) + " |"
-                    self._emit(sep + "\n")
-                    self._header_row_done = True
-                self._is_first_row = False
-            self._current_row = []
-            self._row_has_th = False
+            self._finish_cell()
+            self._finish_row()
 
         elif tag == "table":
+            # Flush any remaining row (handles omitted </tr>)
+            self._finish_cell()
+            self._finish_row()
             self._in_table = False
             self._emit("\n")
 
@@ -280,6 +307,10 @@ def handle_data(self, data: str) -> None:
         if self._in_pre:
             self._pre_parts.append(data)
             return
+        # Preserve literal whitespace inside inline <code> spans
+        if self._in_inline_code:
+            self._emit(data)
+            return
         # Collapse all whitespace (including newlines) per HTML rules
         text = re.sub(r"\s+", " ", data)
         self._emit(text)
@@ -307,24 +338,10 @@ def flush_pending(self) -> None:
         # Flush innermost buffers first so their content propagates outward.
 
         if self._in_link:
-            text = "".join(self._link_text_parts).strip()
-            href = self._link_href or ""
-            self._in_link = False
-            if href and text:
-                self._emit(f"[{text}]({href})")
-            elif text:
-                self._emit(text)
-
-        if self._in_cell:
-            self._in_cell = False
-            cell_text = "".join(self._cell_parts).strip().replace("\n", " ")
-            cell_text = cell_text.replace("|", "\\|")
-            self._current_row.append(cell_text)
-
-        if self._current_row:
-            line = "| " + " | ".join(self._current_row) + " |"
-            self._emit(line + "\n")
-            self._current_row = []
+            self._finish_link()
+
+        self._finish_cell()
+        self._finish_row()
 
         if self._in_pre:
             raw = "".join(self._pre_parts)
@@ -336,6 +353,8 @@ def flush_pending(self) -> None:
         while self._bq_stack:
             content = "".join(self._bq_stack.pop())
             prefixed = self._prefix_blockquote(content)
+            if not prefixed:
+                continue
             if self._bq_stack:
                 self._bq_stack[-1].append("\n\n" + prefixed + "\n\n")
             else:

From b07afb5856b46080dcb4dce8b24406235afcaafd Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 12:15:40 +0000
Subject: [PATCH 12/14] Support <ol start=N>, <dl>/<dt>/<dd>, and preserve code
 block whitespace

_html_to_md.py:
- Honor <ol start="N"> attribute so ordered lists preserve their original
  numbering instead of always restarting from 1. Important for docs/tutorials
  that continue numbering across sections.
- Add dl, dt, dd to _BLOCK_TAGS so definition lists (common on MDN, Python
  docs, Django docs) produce separated text instead of concatenated blobs.
- Rewrite _cleanup() to be fence-aware: content inside fenced code blocks
  is now preserved verbatim (intentional blank lines in <pre> content are
  no longer collapsed). Outside code blocks, blank runs are limited to one
  and trailing whitespace is stripped.
- Fix _prefix_blockquote() to strip trailing whitespace before collapsing
  blank lines, preventing the "\n\n \n\n" pattern from sneaking through.
---
 studio/backend/core/inference/_html_to_md.py | 55 +++++++++++++++++---
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index 5b96efa942..d1862a3e22 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -33,6 +33,9 @@
         "figcaption",
         "details",
         "summary",
+        "dl",
+        "dt",
+        "dd",
     }
 )
 _HEADING_TAGS = frozenset({"h1", "h2", "h3", "h4", "h5", "h6"})
@@ -91,6 +94,8 @@ def _emit(self, text: str) -> None:
     # ------------------------------------------------------------------
     def _prefix_blockquote(self, content: str) -> str:
         """Prefix every line of *content* with ``> ``."""
+        # Strip trailing whitespace first, then collapse blank lines
+        content = re.sub(r"[ \t]+$", "", content, flags = re.MULTILINE)
         content = re.sub(r"\n{3,}", "\n\n", content).strip()
         if not content:
             return ""
@@ -187,7 +192,12 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
 
         elif tag == "ol":
             self._list_stack.append("ol")
-            self._ol_counter.append(0)
+            start_attr = attr_dict.get("start")
+            try:
+                start = int(start_attr) if start_attr is not None else 1
+            except (ValueError, TypeError):
+                start = 1
+            self._ol_counter.append(start - 1)
             self._emit("\n")
 
         elif tag == "li":
@@ -340,6 +350,10 @@ def flush_pending(self) -> None:
         if self._in_link:
             self._finish_link()
 
+        if self._in_inline_code:
+            self._in_inline_code = False
+            self._emit("`")
+
         self._finish_cell()
         self._finish_row()
 
@@ -365,12 +379,39 @@ def flush_pending(self) -> None:
 # Post-processing
 # ------------------------------------------------------------------
 def _cleanup(text: str) -> str:
-    """Normalize whitespace and blank lines in the final output."""
-    # Collapse runs of 3+ newlines into 2
-    text = re.sub(r"\n{3,}", "\n\n", text)
-    # Remove trailing spaces/tabs on each line
-    text = re.sub(r"[ \t]+$", "", text, flags = re.MULTILINE)
-    return text.strip()
+    """Normalize whitespace and blank lines in the final output.
+
+    Preserves content inside fenced code blocks verbatim so that
+    intentional blank lines in ``<pre>`` content are not collapsed.
+    """
+    lines = text.split("\n")
+    out: list[str] = []
+    in_fence = False
+    blank_run = 0
+
+    for line in lines:
+        stripped = line.rstrip(" \t")
+        if stripped.startswith("```"):
+            in_fence = not in_fence
+            blank_run = 0
+            out.append(stripped)
+            continue
+
+        if in_fence:
+            # Preserve code block content exactly as-is
+            out.append(line)
+            continue
+
+        if not stripped:
+            blank_run += 1
+            if blank_run <= 1:
+                out.append("")
+            continue
+
+        blank_run = 0
+        out.append(stripped)
+
+    return "\n".join(out).strip()
 
 
 # ------------------------------------------------------------------

From d398489fb3a3ae99040d2621272467e85b240316 Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 12:53:52 +0000
Subject: [PATCH 13/14] Suppress whitespace-only text nodes between table
 structural elements

Indented HTML tables (nearly all real-world pages) produce whitespace
text nodes between <table>, <tr>, </tr> etc. that land in the output
as leading spaces before table rows, breaking Markdown table alignment.

Skip whitespace-only text nodes when inside a table but not inside a
cell, so indentation from source HTML does not leak into the output.
---
 studio/backend/core/inference/_html_to_md.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/studio/backend/core/inference/_html_to_md.py b/studio/backend/core/inference/_html_to_md.py
index d1862a3e22..d96b8168e2 100644
--- a/studio/backend/core/inference/_html_to_md.py
+++ b/studio/backend/core/inference/_html_to_md.py
@@ -323,6 +323,11 @@ def handle_data(self, data: str) -> None:
             return
         # Collapse all whitespace (including newlines) per HTML rules
         text = re.sub(r"\s+", " ", data)
+        # Suppress whitespace-only text nodes between table structural
+        # elements (indentation from source HTML) to prevent leading
+        # spaces from breaking Markdown table row alignment.
+        if self._in_table and not self._in_cell and not text.strip():
+            return
         self._emit(text)
 
     def handle_entityref(self, name: str) -> None:

From 24b857f6a05de28889d4805349bb57608ac66b5f Mon Sep 17 00:00:00 2001
From: Daniel Han <danielhanchen@gmail.com>
Date: Tue, 31 Mar 2026 13:07:51 +0000
Subject: [PATCH 14/14] Revert dedup key to str(arguments) with explanatory
 comment

json.dumps(sort_keys=True) is unnecessary overhead here: arguments
always comes from json.loads on model output within a single request,
so dict insertion order is deterministic in Python 3.7+. A repeated
call from the model produces the same JSON, which parses to the same
dict repr. str() avoids re-serialization on every tool call.
---
 studio/backend/core/inference/llama_cpp.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/studio/backend/core/inference/llama_cpp.py b/studio/backend/core/inference/llama_cpp.py
index 8de8afa663..894dd25cf7 100644
--- a/studio/backend/core/inference/llama_cpp.py
+++ b/studio/backend/core/inference/llama_cpp.py
@@ -2675,11 +2675,10 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str:
                     }
 
                     # ── Duplicate call detection ──────────────
-                    _tc_key = tool_name + json.dumps(
-                        arguments,
-                        sort_keys = True,
-                        separators = (",", ":"),
-                    )
+                    # str(dict) is stable here: arguments always comes from
+                    # json.loads on the same model output within one request,
+                    # so insertion order is deterministic (Python 3.7+).
+                    _tc_key = tool_name + str(arguments)
                     _prev = _tool_call_history[-1] if _tool_call_history else None
                     if _prev and _prev[0] == _tc_key and not _prev[1]:
                         result = (