-
Notifications
You must be signed in to change notification settings - Fork 3.4k
support for the DeepSeek model by enabling streaming response parsing #5592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -491,6 +491,7 @@ def __init__(self): | |
| self.eot_token = "<|tool▁calls▁end|>" | ||
| self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>" | ||
| self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>" | ||
| self._last_arguments = "" | ||
|
|
||
| def has_tool_call(self, text: str) -> bool: | ||
| """Check if the text contains a deepseek format tool call.""" | ||
|
|
@@ -528,13 +529,84 @@ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult | |
|
|
||
| def structure_info(self) -> _GetInfoFunc: | ||
| return lambda name: StructureInfo( | ||
| begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>" | ||
| + name | ||
| + "\n```json\n", | ||
| end="\n```<|tool▁call▁end|><|tool▁calls▁end|>", | ||
| trigger="<|tool▁calls▁begin|>", | ||
| begin=">" + name + "\n```json\n", | ||
| end="\n```<", | ||
| trigger=">" + name + "\n```json\n", | ||
| ) | ||
|
|
||
| def parse_streaming_increment( | ||
| self, new_text: str, tools: List[Tool] | ||
| ) -> StreamingParseResult: | ||
| """ | ||
| Streaming incremental parsing tool calls for DeepSeekV3 format. | ||
| """ | ||
| self._buffer += new_text | ||
| current_text = self._buffer | ||
|
|
||
| if self.bot_token not in current_text: | ||
| self._buffer = "" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add some comments? I understand that this means that we will ignore these tokens |
||
| for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]: | ||
| if e_token in new_text: | ||
| new_text = new_text.replace(e_token, "") | ||
| return StreamingParseResult(normal_text=new_text) | ||
|
|
||
| if not hasattr(self, "_tool_indices"): | ||
| self._tool_indices = { | ||
| tool.function.name: i | ||
| for i, tool in enumerate(tools) | ||
| if tool.function and tool.function.name | ||
| } | ||
|
|
||
| calls: list[ToolCallItem] = [] | ||
This comment was marked as resolved.
Sorry, something went wrong.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for comment.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for your reply, I think the current code is good to me |
||
| try: | ||
| partial_match = re.search( | ||
| pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)", | ||
| string=current_text, | ||
| flags=re.DOTALL, | ||
| ) | ||
| if partial_match: | ||
| func_name = partial_match.group(2).strip() | ||
| func_args_raw = partial_match.group(3).strip() | ||
|
|
||
| if not self.current_tool_name_sent: | ||
| calls.append( | ||
| ToolCallItem( | ||
| tool_index=self._tool_indices.get(func_name, 0), | ||
| name=func_name, | ||
| parameters="", | ||
| ) | ||
| ) | ||
| self.current_tool_name_sent = True | ||
| else: | ||
| argument_diff = ( | ||
| func_args_raw[len(self._last_arguments) :] | ||
| if func_args_raw.startswith(self._last_arguments) | ||
| else func_args_raw | ||
| ) | ||
|
|
||
| if argument_diff: | ||
| calls.append( | ||
| ToolCallItem( | ||
| tool_index=self._tool_indices.get(func_name, 0), | ||
| name=None, | ||
| parameters=argument_diff, | ||
| ) | ||
| ) | ||
| self._last_arguments += argument_diff | ||
|
|
||
| if _is_complete_json(func_args_raw): | ||
| result = StreamingParseResult(normal_text="", calls=calls) | ||
| self._buffer = "" | ||
| self._last_arguments = "" | ||
| self.current_tool_name_sent = False | ||
| return result | ||
|
|
||
| return StreamingParseResult(normal_text="", calls=calls) | ||
|
|
||
| except Exception as e: | ||
| logger.error(f"Error in parse_streaming_increment: {e}") | ||
| return StreamingParseResult(normal_text=current_text) | ||
|
|
||
|
|
||
| class MultiFormatParser: | ||
| def __init__(self, detectors: List[BaseFormatDetector]): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is clearer. Do you like it?