-
-
Notifications
You must be signed in to change notification settings - Fork 5.9k
studio: auto-continue when model stops mid-plan #5549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3a118e5
af31c54
8503fb3
25acc20
fd53dab
66d59e0
7dbe18a
0cc69de
bb33346
a1d79b7
5afd6c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
|
|
||
| import atexit | ||
| import contextlib | ||
| import itertools as _itertools | ||
| import json | ||
| import os | ||
| import re | ||
|
|
@@ -56,6 +57,51 @@ | |
| ) | ||
| _MAX_REPROMPTS = 3 | ||
|
|
||
| # Mid-plan EOS detectors. Three shapes: trailing intent, list under a | ||
| # "Let me ...:" header, bare trailing colon. "let me know" is a closer, | ||
| # not a plan signal -- excluded via negative lookahead. | ||
| _TRAILING_PLAN_INTENT = re.compile( | ||
| r"(?i)(" | ||
| r"(?:now\s+)?let me(?!\s+know\b)|i['’]ll now|next,?\s*i['’]ll|" | ||
| r"i['’]m going to|i will now|let['’]s now" | ||
| r")[^.!?\n]*[.!?]?\s*$" | ||
| ) | ||
| _TRAILING_PLAN_LIST = re.compile( | ||
| # `\Z` (not `$`) so the regex only fires when the list is the last | ||
| # thing in the buffer, not when a closing paragraph follows. | ||
| # `[ \t]*` before each marker keeps it line-anchored so a mid-prose | ||
| # "42." cannot pose as a phantom list item. | ||
| r"(?i)" | ||
| r"(?:let me|i['’]ll|i will|i['’]m going to|i am going to|" | ||
| r"here['’]?s (?:my |the |a )?(?:plan|approach|steps?)|" | ||
| r"as follows|the (?:plan|steps?) (?:is|are))" | ||
| r"[^:\n]{0,160}:\s*\n" | ||
| r"(?:[ \t]*(?:[-*•]|\d+\.)[ \t]+[^\n]+(?:\n|\Z))+" | ||
| r"\s*\Z" | ||
| ) | ||
| _TRAILING_PLAN_COLON = re.compile( | ||
| r"(?i)(?:let me|i['’]ll|i will|i['’]m going to|i am going to|" | ||
| r"now i['’]ll|now i will)" | ||
| r"[^\n:]{0,200}:\s*$" | ||
| ) | ||
| _TRAILING_PLAN_WINDOW = 600 | ||
| _MAX_CONTINUES = 3 | ||
|
|
||
|
|
||
| def _trailing_plan_hit(stripped: str) -> bool: | ||
| """True if the last `_TRAILING_PLAN_WINDOW` chars look mid-plan.""" | ||
| if not stripped: | ||
| return False | ||
| tail = stripped[-_TRAILING_PLAN_WINDOW:] | ||
| if _TRAILING_PLAN_INTENT.search(tail) is not None: | ||
| return True | ||
| if _TRAILING_PLAN_LIST.search(tail) is not None: | ||
| return True | ||
| if _TRAILING_PLAN_COLON.search(tail) is not None: | ||
| return True | ||
| return False | ||
|
|
||
|
|
||
| # Without max_tokens, llama-server defaults to n_predict = n_ctx (up to | ||
| # 262144 for Qwen3.5), producing many-minute zombie decodes when cancel | ||
| # fails. t_max_predict_ms is a wall-clock backstop applied unconditionally, | ||
|
|
@@ -4015,12 +4061,16 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str: | |
| # direct answer like "4" or "Hello!" will not match. | ||
| # Pattern is compiled once at module level (_INTENT_SIGNAL). | ||
| _reprompt_count = 0 | ||
|
|
||
| # Reserve extra iterations for re-prompts so they don't | ||
| # consume the caller's tool-call budget. Only add the | ||
| # extra slot when tool iterations are actually allowed. | ||
| _extra = _MAX_REPROMPTS if max_tool_iterations > 0 else 0 | ||
| for iteration in range(max_tool_iterations + _extra): | ||
| # Separate counter so auto-continue doesn't steal reprompt budget. | ||
| _continue_count = 0 | ||
|
|
||
| # Dynamic cap: caller's max_tool_iterations is honored exactly | ||
| # until a reprompt/continue actually fires; each consumed event | ||
| # earns its own slot back. itertools.count preserves loop-body | ||
| # `continue` semantics. | ||
| for iteration in _itertools.count(): | ||
| if iteration >= (max_tool_iterations + _reprompt_count + _continue_count): | ||
| break | ||
| if cancel_event is not None and cancel_event.is_set(): | ||
| return | ||
|
|
||
|
|
@@ -4353,18 +4403,41 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str: | |
| _stripped = content_accum.strip() | ||
| if not _stripped: | ||
| _stripped = reasoning_accum.strip() | ||
| if ( | ||
|
|
||
| # Tool-coercive reprompt: intent text without a tool call. | ||
| _tool_intent_hit = ( | ||
| tools | ||
| and _reprompt_count < _MAX_REPROMPTS | ||
| and 0 < len(_stripped) < _REPROMPT_MAX_CHARS | ||
| and _INTENT_SIGNAL.search(_stripped) | ||
| ): | ||
| _reprompt_count += 1 | ||
| logger.info( | ||
| f"Re-prompt {_reprompt_count}/{_MAX_REPROMPTS}: " | ||
| f"model responded without calling tools " | ||
| f"({len(_stripped)} chars)" | ||
| ) | ||
| and _INTENT_SIGNAL.search(_stripped) is not None | ||
| ) | ||
| # Neutral auto-continue on mid-plan EOS. Works without tools. | ||
| _trailing_hit = ( | ||
| _continue_count < _MAX_CONTINUES | ||
| and _trailing_plan_hit(_stripped) | ||
| ) | ||
|
|
||
| if _tool_intent_hit or _trailing_hit: | ||
| if _tool_intent_hit: | ||
| _reprompt_count += 1 | ||
| logger.info( | ||
| f"Re-prompt {_reprompt_count}/{_MAX_REPROMPTS}: " | ||
| f"model responded without calling tools " | ||
| f"({len(_stripped)} chars)" | ||
| ) | ||
| _nudge = ( | ||
| "STOP. Do NOT write code or explain. " | ||
| "You MUST call a tool NOW. " | ||
| "Call web_search or python immediately." | ||
| ) | ||
| else: | ||
| _continue_count += 1 | ||
| logger.info( | ||
| f"Auto-continue {_continue_count}/{_MAX_CONTINUES}: " | ||
| f"model ended turn mid-plan " | ||
| f"({len(_stripped)} chars)" | ||
| ) | ||
| _nudge = "Continue." | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For Anthropic Messages requests with tools enabled, this new Useful? React with 👍 / 👎. |
||
| conversation.append( | ||
| { | ||
| "role": "assistant", | ||
|
|
@@ -4374,11 +4447,7 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str: | |
| conversation.append( | ||
| { | ||
| "role": "user", | ||
| "content": ( | ||
| "STOP. Do NOT write code or explain. " | ||
| "You MUST call a tool NOW. " | ||
| "Call web_search or python immediately." | ||
| ), | ||
| "content": _nudge, | ||
| } | ||
| ) | ||
| # Accumulate tokens and timing from this iteration | ||
|
|
@@ -4389,7 +4458,9 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str: | |
| _it_r = _iter_timings or {} | ||
| _accumulated_predicted_ms += _it_r.get("predicted_ms", 0) | ||
| _accumulated_predicted_n += _it_r.get("predicted_n", 0) | ||
| yield {"type": "status", "text": ""} | ||
| # boundary=True: next iter starts a fresh | ||
| # turn, so adapters must reset their cursor. | ||
| yield {"type": "status", "text": "", "boundary": True} | ||
| continue | ||
|
|
||
| # Content was already streamed. Yield metadata. | ||
|
|
@@ -4657,9 +4728,10 @@ def _strip_tool_markup(text: str, *, final: bool = False) -> str: | |
| tool_msg["tool_call_id"] = tool_call_id | ||
| conversation.append(tool_msg) | ||
|
|
||
| # Clear tool status badge before next generation iteration | ||
| # UI badge clear. NOT a boundary: tool_end already | ||
| # reset adapter cursors (would emit a spurious empty | ||
| # block if we flagged it). | ||
| yield {"type": "status", "text": ""} | ||
| # Continue the loop to let model respond with context | ||
| continue | ||
|
|
||
| except httpx.ConnectError: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For improved conciseness and readability, you can use any() with a generator expression to check against the multiple regex patterns. This avoids the series of if statements and combines checks into a single iteration, aligning with repository efficiency guidelines regarding redundant data iterations.
References