sgl-project · merrymercy · Apr 21, 2025 · Apr 3, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/docs/references/deepseek.md b/docs/references/deepseek.md
@@ -163,6 +163,35 @@ When using FlashInfer MLA wrapper (`--attention-backend flashinfer`) with specul
 
 See [Separate Reasoning](https://docs.sglang.ai/backend/separate_reasoning.html).
 
+
+### Function calling for DeepSeek Models
+
+Add arguments `--tool-call-parser deepseekv3` to enable this feature. For example (running on 1 * H20 node):
+
+```
+python3 -m sglang.launch_server --model deepseek-ai/DeepSeek-V3-0324 --tp 8 --port 30000 --host 0.0.0.0 --mem-fraction-static 0.9 --disable-cuda-graph --tool-call-parser deepseekv3
+```
+
+Sample Request:
+
+```
+curl "http://127.0.0.1:30000/v1/chat/completions" \
+-H "Content-Type: application/json" \
+-d '{"temperature": 0, "max_tokens": 100, "model": "deepseek-ai/DeepSeek-V3-0324", "tools": [{"type": "function", "function": {"name": "query_weather", "description": "Get weather of an city, the user should supply a city first", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g. Beijing"}}, "required": ["city"]}}}], "messages": [{"role": "user", "content": "Hows the weather like in Qingdao today"}]}'
+```
+
+Expected Response
+
+```
+{"id": "62af80528930423a82c806651ec66e7c", "object": "chat.completion", "created": 1744431333, "model": "deepseek-ai/DeepSeek-V3-0324", "choices": [{"index": 0, "message": {"role": "assistant", "content": null, "reasoning_content": null, "tool_calls": [{"id": "0", "type": "function", "function": {"name": "query_weather", "arguments": "{\\"city\\": \\"Guangzhou\\"}"}}]}, "logprobs": null, "finish_reason": "tool_calls", "matched_stop": null}], "usage": {"prompt_tokens": 118, "total_tokens": 140, "completion_tokens": 22, "prompt_tokens_details": null}}
+
+```
+
+Important Notes:
+1. Use a lower `"temperature"` value for better results.
+2. Currently, the function calling implementation for deepseek is incompatible with streaming requests.
+
+
 ## FAQ
 
 1. **Question**: What should I do if model loading takes too long and NCCL timeout occurs?

diff --git a/python/sglang/srt/function_call_parser.py b/python/sglang/srt/function_call_parser.py
@@ -25,6 +25,7 @@
     "<tool_call>",
     "<|python_tag|>",
     "[TOOL_CALLS]",
+    "<｜tool▁calls▁begin｜>",
 ]
 
 
@@ -477,6 +478,64 @@ def structure_info(self) -> _GetInfoFunc:
         )
 
 
+class DeepSeekV3Detector(BaseFormatDetector):
+    """
+    Detector for DeepSeek models.
+    Assumes function call format:
+      '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Tokyo"}\n```<｜tool▁call▁end｜>\n<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Paris"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜><｜end▁of▁sentence｜>
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.bot_token = "<｜tool▁calls▁begin｜>"
+        self.eot_token = "<｜tool▁calls▁end｜>"
+        self.func_call_regex = r"<｜tool▁call▁begin｜>.*?<｜tool▁call▁end｜>"
+        self.func_detail_regex = r"<｜tool▁call▁begin｜>(.*)<｜tool▁sep｜>(.*)\n```json\n(.*)\n```<｜tool▁call▁end｜>"
+
+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a deepseek format tool call."""
+        return self.bot_token in text
+
+    def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
+        """
+        One-time parsing: Detects and parses tool calls in the provided text.
+
+        :param text: The complete text to parse.
+        :param tools: List of available tools.
+        :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
+        """
+        idx = text.find(self.bot_token)
+        normal_text = text[:idx].strip() if idx != -1 else text
+        if self.bot_token not in text:
+            return StreamingParseResult(normal_text=normal_text, calls=[])
+        match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
+        calls = []
+        try:
+            for match_result in match_result_list:
+                # Get function name
+                func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
+                func_name = func_detail.group(2)
+                func_args = func_detail.group(3)
+                func_args = json.loads(func_args)
+                # construct match_result for parse_base_json
+                match_result = {"name": func_name, "parameters": func_args}
+                calls.extend(self.parse_base_json(match_result, tools))
+            return StreamingParseResult(normal_text=normal_text, calls=calls)
+        except Exception as e:
+            logger.error(f"Error in detect_and_parse: {e}")
+            # return the normal text if parsing fails
+            return StreamingParseResult(normal_text=text)
+
+    def structure_info(self) -> _GetInfoFunc:
+        return lambda name: StructureInfo(
+            begin="<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>"
+            + name
+            + "\n```json\n",
+            end="\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
+            trigger="<｜tool▁calls▁begin｜>",
+        )
+
+
 class MultiFormatParser:
     def __init__(self, detectors: List[BaseFormatDetector]):
         """
@@ -543,6 +602,7 @@ class FunctionCallParser:
         "llama3": Llama32Detector,
         "qwen25": Qwen25Detector,
         "mistral": MistralDetector,
+        "deepseekv3": DeepSeekV3Detector,
     }
 
     def __init__(self, tools: List[Tool], tool_call_parser: str):

diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py
@@ -938,6 +938,35 @@ def v1_chat_generate_request(
 
             if chat_template_name is None:
                 openai_compatible_messages = []
+                if (
+                    tools
+                    and tokenizer_manager.server_args.tool_call_parser == "deepseekv3"
+                ):
+                    # add function call prompt to deepseekv3
+                    openai_compatible_messages.append(
+                        {
+                            "role": "system",
+                            "content": """You are a helpful Assistant.
+                    ## Tools
+                    ### Function
+                    You have the following functions available:
+                    """
+                            + "".join(
+                                [
+                                    f"""
+                        - `{tool['name']}`:
+                        ```json
+                        {json.dumps(tool)}
+                        ```
+                        """
+                                    for tool in tools
+                                ]
+                            ),
+                        }
+                    )
+                    # TODO fix the compatible issues with xgrammar
+                    strict_tag = None
+
                 for message in request.messages:
                     if isinstance(message.content, str):
                         openai_compatible_messages.append(

diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
@@ -1080,7 +1080,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
         parser.add_argument(
             "--tool-call-parser",
             type=str,
-            choices=["qwen25", "mistral", "llama3"],
+            choices=["qwen25", "mistral", "llama3", "deepseekv3"],
             default=ServerArgs.tool_call_parser,
             help="Specify the parser for handling tool-call interactions. Options include: 'qwen25', 'mistral', and 'llama3'.",
         )