Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1e0def3
Add elapsed timer to tool status pill in Studio
danielhanchen Mar 18, 2026
d024103
Fix tool call parsing, add tool outputs panel and reasoning copy button
danielhanchen Mar 18, 2026
f1f1901
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 18, 2026
9975db6
Add interactive HTML preview with fullscreen toggle for code blocks
danielhanchen Mar 18, 2026
b4eed53
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
5bef595
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
ef5fa93
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
24c1e4a
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
2c194b4
Add tool call settings: auto-heal toggle, max iterations, timeout
danielhanchen Mar 18, 2026
bfd5da1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 18, 2026
9d525df
Fix tool call timeout: respect no-limit and apply to web search
danielhanchen Mar 18, 2026
24c4a07
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 18, 2026
82ad666
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
fd7ea1b
Add input validation bounds and per-thread sandbox isolation
danielhanchen Mar 18, 2026
278d948
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 18, 2026
7e370cc
Fix non-monotonic streaming and Python temp script path
danielhanchen Mar 18, 2026
ad4aa5c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 18, 2026
93088a5
Sanitize session_id to prevent path traversal in sandbox
danielhanchen Mar 18, 2026
df93c21
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
00de9c3
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
f3def91
feat(chat): proper assistant-ui tool call UIs with sources
wasimysaid Mar 18, 2026
e43cb7c
fix(inference): respect empty enabled_tools allowlist
wasimysaid Mar 18, 2026
f585caa
Merge branch 'main' into studio-tool-elapsed-timer
danielhanchen Mar 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1154,8 +1154,8 @@ rocm711-torch2100 = [
]

[project.urls]
homepage = "http://www.unsloth.ai"
documentation = "https://github.com/unslothai/unsloth"
homepage = "https://unsloth.ai"
documentation = "https://unsloth.ai/docs"
repository = "https://github.com/unslothai/unsloth"

[tool.ruff]
Expand Down
243 changes: 198 additions & 45 deletions studio/backend/core/inference/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,50 +1173,113 @@ def _parse_tool_calls_from_text(content: str) -> list[dict]:
Handles formats like:
<tool_call>{"name":"web_search","arguments":{"query":"..."}}</tool_call>
<tool_call><function=web_search><parameter=query>...</parameter></function></tool_call>
Closing </tool_call> tag is optional (models sometimes omit it).
Closing tags (</tool_call>, </function>, </parameter>) are all optional
since models frequently omit them.
"""
import re

tool_calls = []
# Pattern 1: JSON inside <tool_call> tags (closing tag optional)
for match in re.finditer(
r"<tool_call>\s*(\{.*?\})\s*(?:</tool_call>)?", content, re.DOTALL
):
try:
obj = json.loads(match.group(1))
tc = {
"id": f"call_{len(tool_calls)}",
"type": "function",
"function": {
"name": obj.get("name", ""),
"arguments": obj.get("arguments", {}),
},
}
if isinstance(tc["function"]["arguments"], dict):
tc["function"]["arguments"] = json.dumps(
tc["function"]["arguments"]
)
tool_calls.append(tc)
except (json.JSONDecodeError, ValueError):
pass

# Pattern 1: JSON inside <tool_call> tags.
# Use balanced-brace extraction that skips braces inside JSON strings.
for m in re.finditer(r"<tool_call>\s*\{", content):
brace_start = m.end() - 1 # position of the opening {
depth, i = 0, brace_start
in_string = False
while i < len(content):
ch = content[i]
if in_string:
if ch == "\\" and i + 1 < len(content):
i += 2 # skip escaped character
continue
if ch == '"':
in_string = False
elif ch == '"':
in_string = True
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
break
i += 1
Comment on lines +1185 to +1205

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The manual character-by-character iteration for balanced-brace JSON extraction is robust but makes the _parse_tool_calls_from_text function quite long and complex. Consider extracting this logic into a dedicated helper function to improve readability and maintainability. This would make the main function's flow easier to follow.

if depth == 0:
json_str = content[brace_start : i + 1]
try:
obj = json.loads(json_str)
tc = {
"id": f"call_{len(tool_calls)}",
"type": "function",
"function": {
"name": obj.get("name", ""),
"arguments": obj.get("arguments", {}),
},
}
if isinstance(tc["function"]["arguments"], dict):
tc["function"]["arguments"] = json.dumps(
tc["function"]["arguments"]
)
tool_calls.append(tc)
except (json.JSONDecodeError, ValueError):
pass

# Pattern 2: XML-style <function=name><parameter=key>value</parameter></function>
# Closing </tool_call> optional
# All closing tags optional -- models frequently omit </parameter>,
# </function>, and/or </tool_call>.
if not tool_calls:
for match in re.finditer(
r"<tool_call>\s*<function=(\w+)>(.*?)</function>\s*(?:</tool_call>)?",
content,
re.DOTALL,
):
func_name = match.group(1)
params_text = match.group(2)
# Step 1: Find all <function=name> positions and extract their bodies.
# Body boundary: use only </tool_call> or next <function= as hard
# boundaries. We avoid using </function> as a boundary because
# code parameter values can contain that literal string.
# After extracting, we trim a trailing </function> if present.
func_starts = list(re.finditer(r"<function=(\w+)>\s*", content))
for idx, fm in enumerate(func_starts):
func_name = fm.group(1)
body_start = fm.end()
# Hard boundaries: next <function= tag or </tool_call>
next_func = (
func_starts[idx + 1].start()
if idx + 1 < len(func_starts)
else len(content)
)
end_tag = re.search(r"</tool_call>", content[body_start:])
if end_tag:
body_end = body_start + end_tag.start()
else:
body_end = len(content)
body_end = min(body_end, next_func)
Comment on lines +1240 to +1250

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for determining the body_end in the XML parsing (next_func, end_tag, min) is quite intricate. While it appears correct, it could benefit from additional comments explaining the priority or flow of these boundary conditions, or perhaps a small helper function to encapsulate this boundary calculation.

body = content[body_start:body_end]
# Trim trailing </function> if present (it's the real closing tag)
body = re.sub(r"\s*</function>\s*$", "", body)

# Step 2: Extract parameters from body.
# For single-parameter functions (the common case: code, command,
# query), use body end as the only boundary to avoid false matches
# on </parameter> inside code strings.
arguments = {}
for param_match in re.finditer(
r"<parameter=(\w+)>\s*(.*?)\s*</parameter>",
params_text,
re.DOTALL,
):
arguments[param_match.group(1)] = param_match.group(2)
param_starts = list(re.finditer(r"<parameter=(\w+)>\s*", body))
if len(param_starts) == 1:
# Single parameter: value is everything from after the tag
# to end of body, trimming any trailing </parameter>.
pm = param_starts[0]
val = body[pm.end() :]
val = re.sub(r"\s*</parameter>\s*$", "", val)
arguments[pm.group(1)] = val.strip()
Comment on lines +1265 to +1267

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Stop bare-function parameter capture at closing function tag

In the bare <function=...> fallback path, single-parameter parsing takes everything from <parameter=...> to the end of body, and </function> is only removed if it is the final suffix. If the model emits a valid closing </function> followed by normal text, that closing tag and trailing prose are included in the tool argument, which produces malformed query/code/command values and incorrect tool execution.

Useful? React with 👍 / 👎.

else:
for pidx, pm in enumerate(param_starts):
param_name = pm.group(1)
val_start = pm.end()
# Value ends at next <parameter= or end of body
next_param = (
param_starts[pidx + 1].start()
if pidx + 1 < len(param_starts)
else len(body)
)
val = body[val_start:next_param]
# Trim trailing </parameter> if present
val = re.sub(r"\s*</parameter>\s*$", "", val)
arguments[param_name] = val.strip()

tc = {
"id": f"call_{len(tool_calls)}",
"type": "function",
Expand Down Expand Up @@ -1531,7 +1594,10 @@ def generate_chat_completion_with_tools(
stop: Optional[list[str]] = None,
cancel_event: Optional[threading.Event] = None,
enable_thinking: Optional[bool] = None,
max_tool_iterations: int = 5,
max_tool_iterations: int = 10,
auto_heal_tool_calls: bool = True,
tool_call_timeout: int = 300,
session_id: Optional[str] = None,
) -> Generator[dict, None, None]:
"""
Agentic loop: let the model call tools, execute them, and continue.
Expand Down Expand Up @@ -1596,16 +1662,45 @@ def generate_chat_completion_with_tools(
tool_calls = message.get("tool_calls")

# Fallback: detect tool calls embedded as XML/text in content
# Some models output <tool_call> XML instead of structured tool_calls
# Some models output <tool_call> XML instead of structured tool_calls,
# or bare <function=...> tags without <tool_call> wrapper.
content_text = message.get("content", "") or ""
if not tool_calls and "<tool_call>" in content_text:
if (
auto_heal_tool_calls
and not tool_calls
and ("<tool_call>" in content_text or "<function=" in content_text)
):
tool_calls = self._parse_tool_calls_from_text(content_text)
if tool_calls:
# Strip the tool call markup from content
# Strip the tool call markup from content.
# Use greedy match within <tool_call> blocks since they
# can contain arbitrary content including code.
import re

# Strip <tool_call>...</tool_call> blocks (greedy inside)
content_text = re.sub(
r"<tool_call>.*?(?:</tool_call>|$)",
r"<tool_call>.*?</tool_call>",
"",
content_text,
flags = re.DOTALL,
)
# Strip unterminated <tool_call>... to end
content_text = re.sub(
Comment on lines +1685 to +1688

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using re.sub with re.DOTALL to remove potentially large blocks of text could be inefficient. Consider a more targeted approach if performance becomes an issue, perhaps by finding the start and end indices and slicing the string.

                    content_text = re.sub(
                        r"<tool_call>.*$",
                        "",
                        content_text,
                        flags = re.DOTALL,
                    ).strip()

r"<tool_call>.*$",
"",
content_text,
flags = re.DOTALL,
)
# Strip bare <function=...>...</function> blocks
content_text = re.sub(
r"<function=\w+>.*?</function>",
"",
content_text,
flags = re.DOTALL,
)
# Strip unterminated bare <function=...> to end
content_text = re.sub(
r"<function=\w+>.*$",
"",
content_text,
flags = re.DOTALL,
Expand All @@ -1632,7 +1727,10 @@ def generate_chat_completion_with_tools(
try:
arguments = json.loads(raw_args)
except (json.JSONDecodeError, ValueError):
arguments = {"query": raw_args}
if auto_heal_tool_calls:
arguments = {"query": raw_args}
else:
arguments = {"raw": raw_args}
else:
arguments = raw_args

Expand All @@ -1659,11 +1757,34 @@ def generate_chat_completion_with_tools(
status_text = f"Calling: {tool_name}"
yield {"type": "status", "text": status_text}

# Emit tool_start so the frontend can record inputs
yield {
"type": "tool_start",
"tool_name": tool_name,
"tool_call_id": tc.get("id", ""),
"arguments": arguments,
}

# Execute the tool
_effective_timeout = (
None if tool_call_timeout >= 9999 else tool_call_timeout
)
result = execute_tool(
tool_name, arguments, cancel_event = cancel_event
tool_name,
arguments,
cancel_event = cancel_event,
timeout = _effective_timeout,
session_id = session_id,
)

# Emit tool_end so the frontend can record outputs
yield {
"type": "tool_end",
"tool_name": tool_name,
"tool_call_id": tc.get("id", ""),
"result": result,
}

# Append tool result to conversation
tool_msg = {
"role": "tool",
Expand Down Expand Up @@ -1714,7 +1835,30 @@ def generate_chat_completion_with_tools(
if stop:
stream_payload["stop"] = stop

import re as _re_final

# Closed blocks only -- safe to strip mid-stream without shrinking later.
_TOOL_CLOSED_PATTERNS = [
_re_final.compile(r"<tool_call>.*?</tool_call>", _re_final.DOTALL),
_re_final.compile(r"<function=\w+>.*?</function>", _re_final.DOTALL),
]
# Open-ended patterns strip from an opening tag to end-of-string.
# Only applied on the final flush to avoid non-monotonic shrinking.
_TOOL_ALL_PATTERNS = _TOOL_CLOSED_PATTERNS + [
_re_final.compile(r"<tool_call>.*$", _re_final.DOTALL),
_re_final.compile(r"<function=\w+>.*$", _re_final.DOTALL),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Limit tool-markup stripping to confirmed tool call blocks

The final streaming cleanup applies r"<function=\w+>.*$" (and the analogous <tool_call> pattern) to every assistant response after tools run, so any legitimate text that contains these literals is truncated from that point onward. In practice, answers that explain XML/examples like <function=foo> lose the remainder of the message. This should only strip markup when the segment is an actual tool-call payload, not any raw substring match.

Useful? React with 👍 / 👎.

]

def _strip_tool_markup(text: str, *, final: bool = False) -> str:
if not auto_heal_tool_calls:
return text
patterns = _TOOL_ALL_PATTERNS if final else _TOOL_CLOSED_PATTERNS
for pat in patterns:
text = pat.sub("", text)
return text.strip() if final else text

cumulative = ""
_last_emitted = ""
in_thinking = False
has_content_tokens = False
reasoning_text = ""
Expand Down Expand Up @@ -1746,7 +1890,12 @@ def generate_chat_completion_with_tools(
if in_thinking:
if has_content_tokens:
cumulative += "</think>"
yield {"type": "content", "text": cumulative}
yield {
"type": "content",
Comment on lines 1890 to +1894

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Run final tool-markup cleanup for non-thinking streams

The end-of-stream path only applies _strip_tool_markup(..., final=True) inside if in_thinking; when in_thinking is false, the function returns without a final cleanup pass. Since incremental stripping only removes closed blocks, any unterminated <tool_call> or <function=...> markup that appears at the end of a normal response can leak into user-visible output.

Useful? React with 👍 / 👎.

"text": _strip_tool_markup(
cumulative, final = True
),
}
else:
cumulative = reasoning_text
yield {"type": "content", "text": cumulative}
Expand Down Expand Up @@ -1776,7 +1925,11 @@ def generate_chat_completion_with_tools(
cumulative += "</think>"
in_thinking = False
cumulative += token
yield {"type": "content", "text": cumulative}
cleaned = _strip_tool_markup(cumulative)
# Only emit when cleaned text grows (monotonic).
if len(cleaned) > len(_last_emitted):
_last_emitted = cleaned
yield {"type": "content", "text": cleaned}
except json.JSONDecodeError:
logger.debug(
f"Skipping malformed SSE line: {line[:100]}"
Expand Down
Loading
Loading