Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 55 additions & 42 deletions examples/online_serving/openai_chat_completion_client_with_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@
"required": ["city", "state", "unit"],
},
},
},
{
"type": "function",
"function": {
"name": "get_weather_forecast",
"description": "Get the weather forecast for a given location",
"parameters": {
"type": "object",
"properties": properties,
"required": ["city", "state", "unit"],
},
},
}
]

Expand All @@ -65,7 +77,8 @@
{
"role": "user",
"content": (
"Can you tell me what the temperate will be in Dallas, in fahrenheit?"
"Can you tell me what the temperate will be in Dallas, in fahrenheit? "
"Also, can you provide a weather forecast for the next few days?"
),
},
]
Expand Down Expand Up @@ -141,54 +154,54 @@ def main():

print("-" * 70)
print("Chat completion results:")
print(chat_completion)
print(chat_completion.choices[0].message.content)
print("-" * 70)

# Stream tool calls
chunks = handle_tool_calls_stream(client, messages, model, tools)
print("-" * 70)
# # Stream tool calls
# chunks = handle_tool_calls_stream(client, messages, model, tools)
# print("-" * 70)

# Handle arguments from streamed tool calls
arguments = handle_tool_calls_arguments(chunks)
# # Handle arguments from streamed tool calls
# arguments = handle_tool_calls_arguments(chunks)

if len(arguments):
print(f"streamed tool call arguments: {arguments[-1]}\n")
# if len(arguments):
# print(f"streamed tool call arguments: {arguments[-1]}\n")

print("-" * 70)
# print("-" * 70)

# Add tool call results to the conversation
messages.append(
{
"role": "assistant",
"tool_calls": chat_completion.choices[0].message.tool_calls,
"reasoning": chat_completion.choices[0].message.reasoning,
}
)

# Now, simulate a tool call
available_tools = {"get_current_weather": get_current_weather}

completion_tool_calls = chat_completion.choices[0].message.tool_calls
for call in completion_tool_calls:
tool_to_call = available_tools[call.function.name]
args = json.loads(call.function.arguments)
result = tool_to_call(**args)
print("tool_to_call result: ", result)
messages.append(
{
"role": "tool",
"content": result,
"tool_call_id": call.id,
"name": call.function.name,
}
)

chat_completion_2 = client.chat.completions.create(
messages=messages, model=model, tools=tools, stream=False
)
print("Chat completion2 results:")
print(chat_completion_2)
print("-" * 70)
# messages.append(
# {
# "role": "assistant",
# "tool_calls": chat_completion.choices[0].message.tool_calls,
# "reasoning": chat_completion.choices[0].message.reasoning,
# }
# )

# # Now, simulate a tool call
# available_tools = {"get_current_weather": get_current_weather}

# completion_tool_calls = chat_completion.choices[0].message.tool_calls
# for call in completion_tool_calls:
# tool_to_call = available_tools[call.function.name]
# args = json.loads(call.function.arguments)
# result = tool_to_call(**args)
# print("tool_to_call result: ", result)
# messages.append(
# {
# "role": "tool",
# "content": result,
# "tool_call_id": call.id,
# "name": call.function.name,
# }
# )

# chat_completion_2 = client.chat.completions.create(
# messages=messages, model=model, tools=tools, stream=False
# )
# print("Chat completion2 results:")
# print(chat_completion_2)
# print("-" * 70)


if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,8 @@ def to_sampling_params(
)
s_tag_obj = structural_tag.model_dump(by_alias=True)
self.structured_outputs.structural_tag = json.dumps(s_tag_obj)

print("Structured outputs params:")
print(self.structured_outputs)
Comment on lines +816 to +817
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

These appear to be debug print statements. They should be removed before merging to avoid polluting production logs.

extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
if self.kv_transfer_params:
# Pass in kv_transfer_params via extra_args
Expand Down
12 changes: 12 additions & 0 deletions vllm/tool_parsers/abstract_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,24 @@ def vocab(self) -> dict[str, int]:
# whereas all tokenizers have .get_vocab()
return self.model_tokenizer.get_vocab()

def prepare_structured_tags(
self,
request: ChatCompletionRequest,
) -> ChatCompletionRequest | None:
"""
Instance method that can be used to prepare any structured tags
needed for tool parsing.
"""
return None

def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
"""
Static method that used to adjust the request parameters.
"""
if not request.tools:
return request
if req := self.prepare_structured_tags(request):
return req
json_schema_from_tool = get_json_schema_from_tools(
tool_choice=request.tool_choice, tools=request.tools
)
Expand Down
80 changes: 80 additions & 0 deletions vllm/tool_parsers/minimax_m2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DeltaToolCall,
ExtractedToolCallInformation,
FunctionCall,
StructuralTagResponseFormat,
ToolCall,
)
from vllm.logger import init_logger
Expand All @@ -24,6 +25,33 @@
)

logger = init_logger(__name__)
MINIMAX_M2_TOOL_CALLING_SCHEMA = {
"type": "structural_tag",
"format": {
"type": "triggered_tags",
"triggers": ["<minimax:tool_call>"],
"tags": [
{
"begin": "<minimax:tool_call>",
"content": {
"type": "tags_with_separator",
"separator": "\n",
"tags": [
# {
# "type": "tag",
# "begin": '<invoke name="get_current_weather">',
# "end": "</invoke>",
# "content": {"type": "any_text"},
# },
],
"at_least_one": True,
"stop_after_first": False,
},
"end": "</minimax:tool_call>",
}
],
},
}


class MinimaxM2ToolParser(ToolParser):
Expand Down Expand Up @@ -359,6 +387,9 @@ def extract_tool_calls(
request: ChatCompletionRequest,
) -> ExtractedToolCallInformation:
"""Extract tool calls from complete model output (non-streaming)."""
return ExtractedToolCallInformation(
tools_called=False, tool_calls=[], content=model_output
)
Comment on lines +390 to +392
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This early return effectively disables the entire extract_tool_calls method, causing non-streaming tool call extraction to fail. The method will always return an empty list of tool calls, regardless of the model output. This appears to be a critical issue that breaks existing functionality.

# Quick check
if self.tool_call_start_token not in model_output:
return ExtractedToolCallInformation(
Expand Down Expand Up @@ -774,3 +805,52 @@ def extract_tool_calls_streaming(
)

return None

def prepare_structured_tags(
self, request: ChatCompletionRequest
) -> ChatCompletionRequest | None:
"""Prepare structured tags for MiniMax M2 tool calls."""
if not request.tools or len(request.tools) == 0:
return None

# Set the structured tags for tool calls
structured_tags = MINIMAX_M2_TOOL_CALLING_SCHEMA.copy()
for tool in request.tools:
if hasattr(tool, "function") and hasattr(tool.function, "name"):
func_tag = {
"type": "tag",
"begin": f'<invoke name="{tool.function.name}">',
"end": "</invoke>",
"content": {
"type": "tags_with_separator",
"separator": "\n",
"tags": [],
"at_least_one": False,
"stop_after_first": False,
},
}
# Add parameters
if (
hasattr(tool.function, "parameters")
and isinstance(tool.function.parameters, dict)
and "properties" in tool.function.parameters
):
for param_name in tool.function.parameters["properties"]:
param_tag = {
"type": "tag",
"begin": f'<parameter name="{param_name}">',
"end": "</parameter>",
# "content": {
# "type": "const_string",
# "value": "...",
# }, # debug
"content": {"type": "any_text"},
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
func_tag["content"]["tags"].append(param_tag)

structured_tags["format"]["tags"][0]["content"]["tags"].append(func_tag)
request.response_format = StructuralTagResponseFormat(
type="structural_tag", format=structured_tags["format"]
)
print(structured_tags)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This print statement seems to be for debugging purposes. Please remove it before merging to keep the logs clean.

return request
Loading