From 79c21f009222b831f1882fe0d9dbb7282222791e Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 13 Jan 2026 11:58:58 +0800 Subject: [PATCH 1/3] [Frontend]: minimax_m2 supports structural_tag Signed-off-by: chaunceyjiang --- ...penai_chat_completion_client_with_tools.py | 97 +++++++++++-------- vllm/entrypoints/openai/protocol.py | 3 +- vllm/tool_parsers/abstract_tool_parser.py | 12 +++ vllm/tool_parsers/minimax_m2_tool_parser.py | 80 +++++++++++++++ vllm/v1/structured_output/backend_xgrammar.py | 5 +- 5 files changed, 153 insertions(+), 44 deletions(-) diff --git a/examples/online_serving/openai_chat_completion_client_with_tools.py b/examples/online_serving/openai_chat_completion_client_with_tools.py index 0bd1d05322f8..20a51fb39fb1 100644 --- a/examples/online_serving/openai_chat_completion_client_with_tools.py +++ b/examples/online_serving/openai_chat_completion_client_with_tools.py @@ -56,6 +56,18 @@ "required": ["city", "state", "unit"], }, }, + }, + { + "type": "function", + "function": { + "name": "get_weather_forecast", + "description": "Get the weather forecast for a given location", + "parameters": { + "type": "object", + "properties": properties, + "required": ["city", "state", "unit"], + }, + }, } ] @@ -65,7 +77,8 @@ { "role": "user", "content": ( - "Can you tell me what the temperate will be in Dallas, in fahrenheit?" + "Can you tell me what the temperate will be in Dallas, in fahrenheit? " + "Also, can you provide a weather forecast for the next few days?" ), }, ] @@ -141,54 +154,54 @@ def main(): print("-" * 70) print("Chat completion results:") - print(chat_completion) + print(chat_completion.choices[0].message.content) print("-" * 70) - # Stream tool calls - chunks = handle_tool_calls_stream(client, messages, model, tools) - print("-" * 70) + # # Stream tool calls + # chunks = handle_tool_calls_stream(client, messages, model, tools) + # print("-" * 70) - # Handle arguments from streamed tool calls - arguments = handle_tool_calls_arguments(chunks) + # # Handle arguments from streamed tool calls + # arguments = handle_tool_calls_arguments(chunks) - if len(arguments): - print(f"streamed tool call arguments: {arguments[-1]}\n") + # if len(arguments): + # print(f"streamed tool call arguments: {arguments[-1]}\n") - print("-" * 70) + # print("-" * 70) # Add tool call results to the conversation - messages.append( - { - "role": "assistant", - "tool_calls": chat_completion.choices[0].message.tool_calls, - "reasoning": chat_completion.choices[0].message.reasoning, - } - ) - - # Now, simulate a tool call - available_tools = {"get_current_weather": get_current_weather} - - completion_tool_calls = chat_completion.choices[0].message.tool_calls - for call in completion_tool_calls: - tool_to_call = available_tools[call.function.name] - args = json.loads(call.function.arguments) - result = tool_to_call(**args) - print("tool_to_call result: ", result) - messages.append( - { - "role": "tool", - "content": result, - "tool_call_id": call.id, - "name": call.function.name, - } - ) - - chat_completion_2 = client.chat.completions.create( - messages=messages, model=model, tools=tools, stream=False - ) - print("Chat completion2 results:") - print(chat_completion_2) - print("-" * 70) + # messages.append( + # { + # "role": "assistant", + # "tool_calls": chat_completion.choices[0].message.tool_calls, + # "reasoning": chat_completion.choices[0].message.reasoning, + # } + # ) + + # # Now, simulate a tool call + # available_tools = {"get_current_weather": get_current_weather} + + # completion_tool_calls = chat_completion.choices[0].message.tool_calls + # for call in completion_tool_calls: + # tool_to_call = available_tools[call.function.name] + # args = json.loads(call.function.arguments) + # result = tool_to_call(**args) + # print("tool_to_call result: ", result) + # messages.append( + # { + # "role": "tool", + # "content": result, + # "tool_call_id": call.id, + # "name": call.function.name, + # } + # ) + + # chat_completion_2 = client.chat.completions.create( + # messages=messages, model=model, tools=tools, stream=False + # ) + # print("Chat completion2 results:") + # print(chat_completion_2) + # print("-" * 70) if __name__ == "__main__": diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 845dae7c1bf1..9a0b6e4d1557 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -813,7 +813,8 @@ def to_sampling_params( ) s_tag_obj = structural_tag.model_dump(by_alias=True) self.structured_outputs.structural_tag = json.dumps(s_tag_obj) - + print("Structured outputs params:") + print(self.structured_outputs) extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {} if self.kv_transfer_params: # Pass in kv_transfer_params via extra_args diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py index e2ccb1dad990..b4594763d110 100644 --- a/vllm/tool_parsers/abstract_tool_parser.py +++ b/vllm/tool_parsers/abstract_tool_parser.py @@ -51,12 +51,24 @@ def vocab(self) -> dict[str, int]: # whereas all tokenizers have .get_vocab() return self.model_tokenizer.get_vocab() + def prepare_structured_tags( + self, + request: ChatCompletionRequest, + ) -> ChatCompletionRequest | None: + """ + Instance method that can be used to prepare any structured tags + needed for tool parsing. + """ + return None + def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest: """ Static method that used to adjust the request parameters. """ if not request.tools: return request + if req := self.prepare_structured_tags(request): + return req json_schema_from_tool = get_json_schema_from_tools( tool_choice=request.tool_choice, tools=request.tools ) diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py index 67bd0e61620d..6d83d0656e2e 100644 --- a/vllm/tool_parsers/minimax_m2_tool_parser.py +++ b/vllm/tool_parsers/minimax_m2_tool_parser.py @@ -15,6 +15,7 @@ DeltaToolCall, ExtractedToolCallInformation, FunctionCall, + StructuralTagResponseFormat, ToolCall, ) from vllm.logger import init_logger @@ -24,6 +25,33 @@ ) logger = init_logger(__name__) +MINIMAX_M2_TOOL_CALLING_SCHEMA = { + "type": "structural_tag", + "format": { + "type": "triggered_tags", + "triggers": [""], + "tags": [ + { + "begin": "", + "content": { + "type": "tags_with_separator", + "separator": "\n", + "tags": [ + # { + # "type": "tag", + # "begin": '', + # "end": "", + # "content": {"type": "any_text"}, + # }, + ], + "at_least_one": True, + "stop_after_first": False, + }, + "end": "", + } + ], + }, +} class MinimaxM2ToolParser(ToolParser): @@ -359,6 +387,9 @@ def extract_tool_calls( request: ChatCompletionRequest, ) -> ExtractedToolCallInformation: """Extract tool calls from complete model output (non-streaming).""" + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) # Quick check if self.tool_call_start_token not in model_output: return ExtractedToolCallInformation( @@ -774,3 +805,52 @@ def extract_tool_calls_streaming( ) return None + + def prepare_structured_tags( + self, request: ChatCompletionRequest + ) -> ChatCompletionRequest | None: + """Prepare structured tags for MiniMax M2 tool calls.""" + if not request.tools or len(request.tools) == 0: + return None + + # Set the structured tags for tool calls + structured_tags = MINIMAX_M2_TOOL_CALLING_SCHEMA.copy() + for tool in request.tools: + if hasattr(tool, "function") and hasattr(tool.function, "name"): + func_tag = { + "type": "tag", + "begin": f'', + "end": "", + "content": { + "type": "tags_with_separator", + "separator": "\n", + "tags": [], + "at_least_one": False, + "stop_after_first": False, + }, + } + # Add parameters + if ( + hasattr(tool.function, "parameters") + and isinstance(tool.function.parameters, dict) + and "properties" in tool.function.parameters + ): + for param_name in tool.function.parameters["properties"]: + param_tag = { + "type": "tag", + "begin": f'', + "end": "", + # "content": { + # "type": "const_string", + # "value": "...", + # }, # debug + "content": {"type": "any_text"}, + } + func_tag["content"]["tags"].append(param_tag) + + structured_tags["format"]["tags"][0]["content"]["tags"].append(func_tag) + request.response_format = StructuralTagResponseFormat( + type="structural_tag", format=structured_tags["format"] + ) + print(structured_tags) + return request diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 9dd506880389..b7087f4f08e1 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -113,6 +113,8 @@ def compile_grammar( ctx = self.compiler.compile_regex(grammar_spec) elif request_type == StructuredOutputOptions.STRUCTURAL_TAG: s_tag = json.loads(grammar_spec) + print("1*1" * 20) + print(s_tag) if "structures" in s_tag: # Falling back to deprecated method of compiling structural tag tags = [ @@ -360,7 +362,8 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: if so_params.structural_tag: try: s_tag = json.loads(so_params.structural_tag) - + print("*" * 20) + print(s_tag) # Using the deprecated method of compiling structural tag if "structures" in s_tag: tags = [ From ab77bfa9c368fb5d5dc140ca1a79f190d3613141 Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 13 Jan 2026 12:00:53 +0800 Subject: [PATCH 2/3] [Frontend]: minimax_m2 supports structural_tag Signed-off-by: chaunceyjiang --- vllm/v1/structured_output/backend_xgrammar.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index b7087f4f08e1..cf3e99597d93 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -113,8 +113,6 @@ def compile_grammar( ctx = self.compiler.compile_regex(grammar_spec) elif request_type == StructuredOutputOptions.STRUCTURAL_TAG: s_tag = json.loads(grammar_spec) - print("1*1" * 20) - print(s_tag) if "structures" in s_tag: # Falling back to deprecated method of compiling structural tag tags = [ @@ -362,8 +360,6 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: if so_params.structural_tag: try: s_tag = json.loads(so_params.structural_tag) - print("*" * 20) - print(s_tag) # Using the deprecated method of compiling structural tag if "structures" in s_tag: tags = [ From 54cd09ea37975708935b2802194c859e8890363e Mon Sep 17 00:00:00 2001 From: chaunceyjiang Date: Tue, 13 Jan 2026 12:01:55 +0800 Subject: [PATCH 3/3] [Frontend]: minimax_m2 supports structural_tag Signed-off-by: chaunceyjiang --- vllm/v1/structured_output/backend_xgrammar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index cf3e99597d93..9dd506880389 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -360,6 +360,7 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: if so_params.structural_tag: try: s_tag = json.loads(so_params.structural_tag) + # Using the deprecated method of compiling structural tag if "structures" in s_tag: tags = [