vllm-project · chaunceyjiang · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · gemini-code-assist
diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/online_serving/openai_chat_completion_client_with_tools.py
@@ -56,6 +56,18 @@
                 "required": ["city", "state", "unit"],
             },
         },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather_forecast",
+            "description": "Get the weather forecast for a given location",
+            "parameters": {
+                "type": "object",
+                "properties": properties,
+                "required": ["city", "state", "unit"],
+            },
+        },
     }
 ]
 
@@ -65,7 +77,8 @@
     {
         "role": "user",
         "content": (
-            "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+            "Can you tell me what the temperate will be in Dallas, in fahrenheit? "
+            "Also, can you provide a weather forecast for the next few days?"
         ),
     },
 ]
@@ -141,54 +154,54 @@ def main():
 
     print("-" * 70)
     print("Chat completion results:")
-    print(chat_completion)
+    print(chat_completion.choices[0].message.content)
     print("-" * 70)
 
-    # Stream tool calls
-    chunks = handle_tool_calls_stream(client, messages, model, tools)
-    print("-" * 70)
+    # # Stream tool calls
+    # chunks = handle_tool_calls_stream(client, messages, model, tools)
+    # print("-" * 70)
 
-    # Handle arguments from streamed tool calls
-    arguments = handle_tool_calls_arguments(chunks)
+    # # Handle arguments from streamed tool calls
+    # arguments = handle_tool_calls_arguments(chunks)
 
-    if len(arguments):
-        print(f"streamed tool call arguments: {arguments[-1]}\n")
+    # if len(arguments):
+    #     print(f"streamed tool call arguments: {arguments[-1]}\n")
 
-    print("-" * 70)
+    # print("-" * 70)
 
     # Add tool call results to the conversation
-    messages.append(
-        {
-            "role": "assistant",
-            "tool_calls": chat_completion.choices[0].message.tool_calls,
-            "reasoning": chat_completion.choices[0].message.reasoning,
-        }
-    )
-
-    # Now, simulate a tool call
-    available_tools = {"get_current_weather": get_current_weather}
-
-    completion_tool_calls = chat_completion.choices[0].message.tool_calls
-    for call in completion_tool_calls:
-        tool_to_call = available_tools[call.function.name]
-        args = json.loads(call.function.arguments)
-        result = tool_to_call(**args)
-        print("tool_to_call result: ", result)
-        messages.append(
-            {
-                "role": "tool",
-                "content": result,
-                "tool_call_id": call.id,
-                "name": call.function.name,
-            }
-        )
-
-    chat_completion_2 = client.chat.completions.create(
-        messages=messages, model=model, tools=tools, stream=False
-    )
-    print("Chat completion2 results:")
-    print(chat_completion_2)
-    print("-" * 70)
+    # messages.append(
+    #     {
+    #         "role": "assistant",
+    #         "tool_calls": chat_completion.choices[0].message.tool_calls,
+    #         "reasoning": chat_completion.choices[0].message.reasoning,
+    #     }
+    # )
+
+    # # Now, simulate a tool call
+    # available_tools = {"get_current_weather": get_current_weather}
+
+    # completion_tool_calls = chat_completion.choices[0].message.tool_calls
+    # for call in completion_tool_calls:
+    #     tool_to_call = available_tools[call.function.name]
+    #     args = json.loads(call.function.arguments)
+    #     result = tool_to_call(**args)
+    #     print("tool_to_call result: ", result)
+    #     messages.append(
+    #         {
+    #             "role": "tool",
+    #             "content": result,
+    #             "tool_call_id": call.id,
+    #             "name": call.function.name,
+    #         }
+    #     )
+
+    # chat_completion_2 = client.chat.completions.create(
+    #     messages=messages, model=model, tools=tools, stream=False
+    # )
+    # print("Chat completion2 results:")
+    # print(chat_completion_2)
+    # print("-" * 70)
 
 
 if __name__ == "__main__":

@@ -813,7 +813,8 @@ def to_sampling_params(
                 )
                 s_tag_obj = structural_tag.model_dump(by_alias=True)
                 self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
-
+        print("Structured outputs params:")
+        print(self.structured_outputs)
         extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
         if self.kv_transfer_params:
             # Pass in kv_transfer_params via extra_args

@@ -51,12 +51,24 @@ def vocab(self) -> dict[str, int]:
         # whereas all tokenizers have .get_vocab()
         return self.model_tokenizer.get_vocab()
 
+    def prepare_structured_tags(
+        self,
+        request: ChatCompletionRequest,
+    ) -> ChatCompletionRequest | None:
+        """
+        Instance method that can be used to prepare any structured tags
+        needed for tool parsing.
+        """
+        return None
+
     def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
         """
         Static method that used to adjust the request parameters.
         """
         if not request.tools:
             return request
+        if req := self.prepare_structured_tags(request):
+            return req
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )

@@ -15,6 +15,7 @@
     DeltaToolCall,
     ExtractedToolCallInformation,
     FunctionCall,
+    StructuralTagResponseFormat,
     ToolCall,
 )
 from vllm.logger import init_logger
@@ -24,6 +25,33 @@
 )
 
 logger = init_logger(__name__)
+MINIMAX_M2_TOOL_CALLING_SCHEMA = {
+    "type": "structural_tag",
+    "format": {
+        "type": "triggered_tags",
+        "triggers": ["<minimax:tool_call>"],
+        "tags": [
+            {
+                "begin": "<minimax:tool_call>",
+                "content": {
+                    "type": "tags_with_separator",
+                    "separator": "\n",
+                    "tags": [
+                        # {
+                        #     "type": "tag",
+                        #     "begin": '<invoke name="get_current_weather">',
+                        #     "end": "</invoke>",
+                        #     "content": {"type": "any_text"},
+                        # },
+                    ],
+                    "at_least_one": True,
+                    "stop_after_first": False,
+                },
+                "end": "</minimax:tool_call>",
+            }
+        ],
+    },
+}
 
 
 class MinimaxM2ToolParser(ToolParser):
@@ -359,6 +387,9 @@ def extract_tool_calls(
         request: ChatCompletionRequest,
     ) -> ExtractedToolCallInformation:
         """Extract tool calls from complete model output (non-streaming)."""
+        return ExtractedToolCallInformation(
+            tools_called=False, tool_calls=[], content=model_output
+        )
         # Quick check
         if self.tool_call_start_token not in model_output:
             return ExtractedToolCallInformation(
@@ -774,3 +805,52 @@ def extract_tool_calls_streaming(
                         )
 
         return None
+
+    def prepare_structured_tags(
+        self, request: ChatCompletionRequest
+    ) -> ChatCompletionRequest | None:
+        """Prepare structured tags for MiniMax M2 tool calls."""
+        if not request.tools or len(request.tools) == 0:
+            return None
+
+        # Set the structured tags for tool calls
+        structured_tags = MINIMAX_M2_TOOL_CALLING_SCHEMA.copy()
+        for tool in request.tools:
+            if hasattr(tool, "function") and hasattr(tool.function, "name"):
+                func_tag = {
+                    "type": "tag",
+                    "begin": f'<invoke name="{tool.function.name}">',
+                    "end": "</invoke>",
+                    "content": {
+                        "type": "tags_with_separator",
+                        "separator": "\n",
+                        "tags": [],
+                        "at_least_one": False,
+                        "stop_after_first": False,
+                    },
+                }
+                # Add parameters
+                if (
+                    hasattr(tool.function, "parameters")
+                    and isinstance(tool.function.parameters, dict)
+                    and "properties" in tool.function.parameters
+                ):
+                    for param_name in tool.function.parameters["properties"]:
+                        param_tag = {
+                            "type": "tag",
+                            "begin": f'<parameter name="{param_name}">',
+                            "end": "</parameter>",
+                            # "content": {
+                            #     "type": "const_string",
+                            #     "value": "...",
+                            # },  # debug
+                            "content": {"type": "any_text"},
+                        }
+                        func_tag["content"]["tags"].append(param_tag)
+
+                structured_tags["format"]["tags"][0]["content"]["tags"].append(func_tag)
+        request.response_format = StructuralTagResponseFormat(
+            type="structural_tag", format=structured_tags["format"]
+        )
+        print(structured_tags)
+        return request