From 79c21f009222b831f1882fe0d9dbb7282222791e Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 13 Jan 2026 11:58:58 +0800
Subject: [PATCH 1/3] [Frontend]: minimax_m2 supports structural_tag

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 ...penai_chat_completion_client_with_tools.py | 97 +++++++++++--------
 vllm/entrypoints/openai/protocol.py           |  3 +-
 vllm/tool_parsers/abstract_tool_parser.py     | 12 +++
 vllm/tool_parsers/minimax_m2_tool_parser.py   | 80 +++++++++++++++
 vllm/v1/structured_output/backend_xgrammar.py |  5 +-
 5 files changed, 153 insertions(+), 44 deletions(-)

diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/online_serving/openai_chat_completion_client_with_tools.py
index 0bd1d05322f8..20a51fb39fb1 100644
--- a/examples/online_serving/openai_chat_completion_client_with_tools.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools.py
@@ -56,6 +56,18 @@
                 "required": ["city", "state", "unit"],
             },
         },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather_forecast",
+            "description": "Get the weather forecast for a given location",
+            "parameters": {
+                "type": "object",
+                "properties": properties,
+                "required": ["city", "state", "unit"],
+            },
+        },
     }
 ]
 
@@ -65,7 +77,8 @@
     {
         "role": "user",
         "content": (
-            "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
+            "Can you tell me what the temperate will be in Dallas, in fahrenheit? "
+            "Also, can you provide a weather forecast for the next few days?"
         ),
     },
 ]
@@ -141,54 +154,54 @@ def main():
 
     print("-" * 70)
     print("Chat completion results:")
-    print(chat_completion)
+    print(chat_completion.choices[0].message.content)
     print("-" * 70)
 
-    # Stream tool calls
-    chunks = handle_tool_calls_stream(client, messages, model, tools)
-    print("-" * 70)
+    # # Stream tool calls
+    # chunks = handle_tool_calls_stream(client, messages, model, tools)
+    # print("-" * 70)
 
-    # Handle arguments from streamed tool calls
-    arguments = handle_tool_calls_arguments(chunks)
+    # # Handle arguments from streamed tool calls
+    # arguments = handle_tool_calls_arguments(chunks)
 
-    if len(arguments):
-        print(f"streamed tool call arguments: {arguments[-1]}\n")
+    # if len(arguments):
+    #     print(f"streamed tool call arguments: {arguments[-1]}\n")
 
-    print("-" * 70)
+    # print("-" * 70)
 
     # Add tool call results to the conversation
-    messages.append(
-        {
-            "role": "assistant",
-            "tool_calls": chat_completion.choices[0].message.tool_calls,
-            "reasoning": chat_completion.choices[0].message.reasoning,
-        }
-    )
-
-    # Now, simulate a tool call
-    available_tools = {"get_current_weather": get_current_weather}
-
-    completion_tool_calls = chat_completion.choices[0].message.tool_calls
-    for call in completion_tool_calls:
-        tool_to_call = available_tools[call.function.name]
-        args = json.loads(call.function.arguments)
-        result = tool_to_call(**args)
-        print("tool_to_call result: ", result)
-        messages.append(
-            {
-                "role": "tool",
-                "content": result,
-                "tool_call_id": call.id,
-                "name": call.function.name,
-            }
-        )
-
-    chat_completion_2 = client.chat.completions.create(
-        messages=messages, model=model, tools=tools, stream=False
-    )
-    print("Chat completion2 results:")
-    print(chat_completion_2)
-    print("-" * 70)
+    # messages.append(
+    #     {
+    #         "role": "assistant",
+    #         "tool_calls": chat_completion.choices[0].message.tool_calls,
+    #         "reasoning": chat_completion.choices[0].message.reasoning,
+    #     }
+    # )
+
+    # # Now, simulate a tool call
+    # available_tools = {"get_current_weather": get_current_weather}
+
+    # completion_tool_calls = chat_completion.choices[0].message.tool_calls
+    # for call in completion_tool_calls:
+    #     tool_to_call = available_tools[call.function.name]
+    #     args = json.loads(call.function.arguments)
+    #     result = tool_to_call(**args)
+    #     print("tool_to_call result: ", result)
+    #     messages.append(
+    #         {
+    #             "role": "tool",
+    #             "content": result,
+    #             "tool_call_id": call.id,
+    #             "name": call.function.name,
+    #         }
+    #     )
+
+    # chat_completion_2 = client.chat.completions.create(
+    #     messages=messages, model=model, tools=tools, stream=False
+    # )
+    # print("Chat completion2 results:")
+    # print(chat_completion_2)
+    # print("-" * 70)
 
 
 if __name__ == "__main__":
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 845dae7c1bf1..9a0b6e4d1557 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -813,7 +813,8 @@ def to_sampling_params(
                 )
                 s_tag_obj = structural_tag.model_dump(by_alias=True)
                 self.structured_outputs.structural_tag = json.dumps(s_tag_obj)
-
+        print("Structured outputs params:")
+        print(self.structured_outputs)
         extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
         if self.kv_transfer_params:
             # Pass in kv_transfer_params via extra_args
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index e2ccb1dad990..b4594763d110 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -51,12 +51,24 @@ def vocab(self) -> dict[str, int]:
         # whereas all tokenizers have .get_vocab()
         return self.model_tokenizer.get_vocab()
 
+    def prepare_structured_tags(
+        self,
+        request: ChatCompletionRequest,
+    ) -> ChatCompletionRequest | None:
+        """
+        Instance method that can be used to prepare any structured tags
+        needed for tool parsing.
+        """
+        return None
+
     def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
         """
         Static method that used to adjust the request parameters.
         """
         if not request.tools:
             return request
+        if req := self.prepare_structured_tags(request):
+            return req
         json_schema_from_tool = get_json_schema_from_tools(
             tool_choice=request.tool_choice, tools=request.tools
         )
diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py
index 67bd0e61620d..6d83d0656e2e 100644
--- a/vllm/tool_parsers/minimax_m2_tool_parser.py
+++ b/vllm/tool_parsers/minimax_m2_tool_parser.py
@@ -15,6 +15,7 @@
     DeltaToolCall,
     ExtractedToolCallInformation,
     FunctionCall,
+    StructuralTagResponseFormat,
     ToolCall,
 )
 from vllm.logger import init_logger
@@ -24,6 +25,33 @@
 )
 
 logger = init_logger(__name__)
+MINIMAX_M2_TOOL_CALLING_SCHEMA = {
+    "type": "structural_tag",
+    "format": {
+        "type": "triggered_tags",
+        "triggers": ["<minimax:tool_call>"],
+        "tags": [
+            {
+                "begin": "<minimax:tool_call>",
+                "content": {
+                    "type": "tags_with_separator",
+                    "separator": "\n",
+                    "tags": [
+                        # {
+                        #     "type": "tag",
+                        #     "begin": '<invoke name="get_current_weather">',
+                        #     "end": "</invoke>",
+                        #     "content": {"type": "any_text"},
+                        # },
+                    ],
+                    "at_least_one": True,
+                    "stop_after_first": False,
+                },
+                "end": "</minimax:tool_call>",
+            }
+        ],
+    },
+}
 
 
 class MinimaxM2ToolParser(ToolParser):
@@ -359,6 +387,9 @@ def extract_tool_calls(
         request: ChatCompletionRequest,
     ) -> ExtractedToolCallInformation:
         """Extract tool calls from complete model output (non-streaming)."""
+        return ExtractedToolCallInformation(
+            tools_called=False, tool_calls=[], content=model_output
+        )
         # Quick check
         if self.tool_call_start_token not in model_output:
             return ExtractedToolCallInformation(
@@ -774,3 +805,52 @@ def extract_tool_calls_streaming(
                         )
 
         return None
+
+    def prepare_structured_tags(
+        self, request: ChatCompletionRequest
+    ) -> ChatCompletionRequest | None:
+        """Prepare structured tags for MiniMax M2 tool calls."""
+        if not request.tools or len(request.tools) == 0:
+            return None
+
+        # Set the structured tags for tool calls
+        structured_tags = MINIMAX_M2_TOOL_CALLING_SCHEMA.copy()
+        for tool in request.tools:
+            if hasattr(tool, "function") and hasattr(tool.function, "name"):
+                func_tag = {
+                    "type": "tag",
+                    "begin": f'<invoke name="{tool.function.name}">',
+                    "end": "</invoke>",
+                    "content": {
+                        "type": "tags_with_separator",
+                        "separator": "\n",
+                        "tags": [],
+                        "at_least_one": False,
+                        "stop_after_first": False,
+                    },
+                }
+                # Add parameters
+                if (
+                    hasattr(tool.function, "parameters")
+                    and isinstance(tool.function.parameters, dict)
+                    and "properties" in tool.function.parameters
+                ):
+                    for param_name in tool.function.parameters["properties"]:
+                        param_tag = {
+                            "type": "tag",
+                            "begin": f'<parameter name="{param_name}">',
+                            "end": "</parameter>",
+                            # "content": {
+                            #     "type": "const_string",
+                            #     "value": "...",
+                            # },  # debug
+                            "content": {"type": "any_text"},
+                        }
+                        func_tag["content"]["tags"].append(param_tag)
+
+                structured_tags["format"]["tags"][0]["content"]["tags"].append(func_tag)
+        request.response_format = StructuralTagResponseFormat(
+            type="structural_tag", format=structured_tags["format"]
+        )
+        print(structured_tags)
+        return request
diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
index 9dd506880389..b7087f4f08e1 100644
--- a/vllm/v1/structured_output/backend_xgrammar.py
+++ b/vllm/v1/structured_output/backend_xgrammar.py
@@ -113,6 +113,8 @@ def compile_grammar(
             ctx = self.compiler.compile_regex(grammar_spec)
         elif request_type == StructuredOutputOptions.STRUCTURAL_TAG:
             s_tag = json.loads(grammar_spec)
+            print("1*1" * 20)
+            print(s_tag)
             if "structures" in s_tag:
                 # Falling back to deprecated method of compiling structural tag
                 tags = [
@@ -360,7 +362,8 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
     if so_params.structural_tag:
         try:
             s_tag = json.loads(so_params.structural_tag)
-
+            print("*" * 20)
+            print(s_tag)
             # Using the deprecated method of compiling structural tag
             if "structures" in s_tag:
                 tags = [

From ab77bfa9c368fb5d5dc140ca1a79f190d3613141 Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 13 Jan 2026 12:00:53 +0800
Subject: [PATCH 2/3] [Frontend]: minimax_m2 supports structural_tag

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/v1/structured_output/backend_xgrammar.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
index b7087f4f08e1..cf3e99597d93 100644
--- a/vllm/v1/structured_output/backend_xgrammar.py
+++ b/vllm/v1/structured_output/backend_xgrammar.py
@@ -113,8 +113,6 @@ def compile_grammar(
             ctx = self.compiler.compile_regex(grammar_spec)
         elif request_type == StructuredOutputOptions.STRUCTURAL_TAG:
             s_tag = json.loads(grammar_spec)
-            print("1*1" * 20)
-            print(s_tag)
             if "structures" in s_tag:
                 # Falling back to deprecated method of compiling structural tag
                 tags = [
@@ -362,8 +360,6 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
     if so_params.structural_tag:
         try:
             s_tag = json.loads(so_params.structural_tag)
-            print("*" * 20)
-            print(s_tag)
             # Using the deprecated method of compiling structural tag
             if "structures" in s_tag:
                 tags = [

From 54cd09ea37975708935b2802194c859e8890363e Mon Sep 17 00:00:00 2001
From: chaunceyjiang <chaunceyjiang@gmail.com>
Date: Tue, 13 Jan 2026 12:01:55 +0800
Subject: [PATCH 3/3] [Frontend]: minimax_m2 supports structural_tag

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
---
 vllm/v1/structured_output/backend_xgrammar.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
index cf3e99597d93..9dd506880389 100644
--- a/vllm/v1/structured_output/backend_xgrammar.py
+++ b/vllm/v1/structured_output/backend_xgrammar.py
@@ -360,6 +360,7 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
     if so_params.structural_tag:
         try:
             s_tag = json.loads(so_params.structural_tag)
+
             # Using the deprecated method of compiling structural tag
             if "structures" in s_tag:
                 tags = [