Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions docs/references/deepseek.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,31 @@ Expected Response
{"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}}

```

Sample Streaming Request:
```
curl "http://127.0.0.1:30000/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324","stream":true,"tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}'
```
Expected Streamed Chunks (simplified for clarity):
```
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"{\""}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"city"}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\":\""}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"Q"}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"ing"}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"dao"}}]}}]}
data: {"choices":[{"delta":{"tool_calls":[{"function":{"arguments":"\"}"}}]}}]}
data: {"choices":[{"delta":{"tool_calls":null}}], "finish_reason": "tool_calls"}
data: [DONE]
```
The client needs to concatenate all arguments fragments to reconstruct the complete tool call:
```
{"city": "Qingdao"}
```
Important Notes:
1. Use a lower `"temperature"` value for better results.
2. Currently, the function calling implementation for deepseek is incompatible with streaming requests.



## FAQ
Expand Down
82 changes: 77 additions & 5 deletions python/sglang/srt/function_call_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ def __init__(self):
self.eot_token = "<|tool▁calls▁end|>"
self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"
self._last_arguments = ""

def has_tool_call(self, text: str) -> bool:
"""Check if the text contains a deepseek format tool call."""
Expand Down Expand Up @@ -528,13 +529,84 @@ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult

def structure_info(self) -> _GetInfoFunc:
return lambda name: StructureInfo(
begin="<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
+ name
+ "\n```json\n",
end="\n```<|tool▁call▁end|><|tool▁calls▁end|>",
trigger="<|tool▁calls▁begin|>",
begin=">" + name + "\n```json\n",
end="\n```<",
trigger=">" + name + "\n```json\n",
Comment on lines +532 to +534
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
begin=">" + name + "\n```json\n",
end="\n```<",
trigger=">" + name + "\n```json\n",
begin=f"> {name} \n```json\n",
end="\n```<",
trigger=f"> {name} \n```json\n",

It is clearer. Do you like it?

)

def parse_streaming_increment(
self, new_text: str, tools: List[Tool]
) -> StreamingParseResult:
"""
Streaming incremental parsing tool calls for DeepSeekV3 format.
"""
self._buffer += new_text
current_text = self._buffer

if self.bot_token not in current_text:
self._buffer = ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add some comments? I understand that this means that we will ignore these tokens self.eot_token, "```", "<|tool▁call▁end|>" for now because they are not complete.

for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]:
if e_token in new_text:
new_text = new_text.replace(e_token, "")
return StreamingParseResult(normal_text=new_text)

if not hasattr(self, "_tool_indices"):
self._tool_indices = {
tool.function.name: i
for i, tool in enumerate(tools)
if tool.function and tool.function.name
}

calls: list[ToolCallItem] = []

This comment was marked as resolved.

Copy link
Contributor Author

@Frank-Jie Frank-Jie Apr 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for comment.
First, i believe both list[ToolCallItem] and List[ToolCallItem] are valid after python3.9
However according to: https://peps.python.org/pep-0585
with content in it:
This PEP proposes to enable support for the generics syntax in all standard collections currently available in the typing module.
list[ToolCallItem] is more recommended after python3.9
while py3.9 can be seen in current yml flie such as:
https://github.com/sgl-project/sglang/blob/main/.github/workflows/release-whl-kernel.yml#L25

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your reply, I think the current code is good to me

try:
partial_match = re.search(
pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)",
string=current_text,
flags=re.DOTALL,
)
if partial_match:
func_name = partial_match.group(2).strip()
func_args_raw = partial_match.group(3).strip()

if not self.current_tool_name_sent:
calls.append(
ToolCallItem(
tool_index=self._tool_indices.get(func_name, 0),
name=func_name,
parameters="",
)
)
self.current_tool_name_sent = True
else:
argument_diff = (
func_args_raw[len(self._last_arguments) :]
if func_args_raw.startswith(self._last_arguments)
else func_args_raw
)

if argument_diff:
calls.append(
ToolCallItem(
tool_index=self._tool_indices.get(func_name, 0),
name=None,
parameters=argument_diff,
)
)
self._last_arguments += argument_diff

if _is_complete_json(func_args_raw):
result = StreamingParseResult(normal_text="", calls=calls)
self._buffer = ""
self._last_arguments = ""
self.current_tool_name_sent = False
return result

return StreamingParseResult(normal_text="", calls=calls)

except Exception as e:
logger.error(f"Error in parse_streaming_increment: {e}")
return StreamingParseResult(normal_text=current_text)


class MultiFormatParser:
def __init__(self, detectors: List[BaseFormatDetector]):
Expand Down
2 changes: 0 additions & 2 deletions python/sglang/srt/openai_api/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,8 +964,6 @@ def v1_chat_generate_request(
),
}
)
# TODO fix the compatible issues with xgrammar
strict_tag = None

for message in request.messages:
if isinstance(message.content, str):
Expand Down
Loading