From 0cf29d5ca595e6ab0433a8a9dc9836d8b6aa2ac6 Mon Sep 17 00:00:00 2001 From: ShaikAbdulHafeez03 Date: Sat, 22 Nov 2025 16:29:29 +0530 Subject: [PATCH 1/4] Fix: enhance OpenAIToolParser and Harmony parser tool-calling robustness Signed-off-by: ShaikAbdulHafeez03 --- vllm/entrypoints/harmony_utils.py | 611 +++--------------- .../openai/tool_parsers/openai_tool_parser.py | 63 +- 2 files changed, 135 insertions(+), 539 deletions(-) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 47a252348c10..045ae7e95693 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -1,535 +1,114 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import datetime import json -from collections.abc import Iterable, Sequence -from typing import Literal - -from openai.types.responses import ( - ResponseFunctionToolCall, - ResponseOutputItem, - ResponseOutputMessage, - ResponseOutputText, - ResponseReasoningItem, -) -from openai.types.responses.response_function_web_search import ( - ActionFind, - ActionOpenPage, - ActionSearch, - ResponseFunctionWebSearch, -) -from openai.types.responses.response_reasoning_item import ( - Content as ResponseReasoningTextContent, -) -from openai.types.responses.tool import Tool -from openai_harmony import ( - Author, - ChannelConfig, - Conversation, - DeveloperContent, - HarmonyEncodingName, - Message, - ReasoningEffort, - Role, - StreamableParser, - SystemContent, - TextContent, - ToolDescription, - load_harmony_encoding, -) -from openai_harmony import Message as OpenAIHarmonyMessage -from openai_harmony import Role as OpenAIHarmonyRole +from collections.abc import Sequence +from typing import TYPE_CHECKING -from vllm import envs +from vllm.entrypoints.harmony_utils import parse_output_into_messages from vllm.entrypoints.openai.protocol import ( - ChatCompletionToolsParam, - ResponseInputOutputItem, - ResponsesRequest, + ChatCompletionRequest, + DeltaMessage, + ExtractedToolCallInformation, + FunctionCall, + ToolCall, ) -from vllm.utils import random_uuid - -REASONING_EFFORT = { - "high": ReasoningEffort.HIGH, - "medium": ReasoningEffort.MEDIUM, - "low": ReasoningEffort.LOW, -} - -_harmony_encoding = None - -# Builtin tools that should be included in the system message when -# they are available and requested by the user. -# Tool args are provided by MCP tool descriptions. Output -# of the tools are stringified. -MCP_BUILTIN_TOOLS: set[str] = { - "web_search_preview", - "code_interpreter", - "container", -} - - -def has_custom_tools(tool_types: set[str]) -> bool: - """ - Checks if the given tool types are custom tools - (i.e. any tool other than MCP buildin tools) - """ - return not tool_types.issubset(MCP_BUILTIN_TOOLS) - - -def get_encoding(): - global _harmony_encoding - if _harmony_encoding is None: - _harmony_encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) - return _harmony_encoding - - -def get_system_message( - model_identity: str | None = None, - reasoning_effort: Literal["high", "medium", "low"] | None = None, - start_date: str | None = None, - browser_description: str | None = None, - python_description: str | None = None, - container_description: str | None = None, - instructions: str | None = None, - with_custom_tools: bool = False, -) -> Message: - sys_msg_content = SystemContent.new() - if model_identity is not None: - sys_msg_content = sys_msg_content.with_model_identity(model_identity) - if instructions is not None and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: - current_identity = sys_msg_content.model_identity - new_identity = ( - f"{current_identity}\n{instructions}" if current_identity else instructions - ) - sys_msg_content = sys_msg_content.with_model_identity(new_identity) - if reasoning_effort is not None: - sys_msg_content = sys_msg_content.with_reasoning_effort( - REASONING_EFFORT[reasoning_effort] - ) - if start_date is None: - # NOTE(woosuk): This brings non-determinism in vLLM. Be careful. - start_date = datetime.datetime.now().strftime("%Y-%m-%d") - sys_msg_content = sys_msg_content.with_conversation_start_date(start_date) - if browser_description is not None: - sys_msg_content = sys_msg_content.with_tools(browser_description) - if python_description is not None: - sys_msg_content = sys_msg_content.with_tools(python_description) - if container_description is not None: - sys_msg_content = sys_msg_content.with_tools(container_description) - if not with_custom_tools: - channel_config = sys_msg_content.channel_config - invalid_channel = "commentary" - new_config = ChannelConfig.require_channels( - [c for c in channel_config.valid_channels if c != invalid_channel] - ) - sys_msg_content = sys_msg_content.with_channel_config(new_config) - sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content) - return sys_msg +from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( + ToolParser, +) +from vllm.logger import init_logger +if TYPE_CHECKING: + from vllm.transformers_utils.tokenizer import AnyTokenizer +else: + AnyTokenizer = object -def create_tool_definition(tool: ChatCompletionToolsParam | Tool): - if isinstance(tool, ChatCompletionToolsParam): - return ToolDescription.new( - name=tool.function.name, - description=tool.function.description, - parameters=tool.function.parameters, - ) - return ToolDescription.new( - name=tool.name, - description=tool.description, - parameters=tool.parameters, - ) +logger = init_logger(__name__) -def get_developer_message( - instructions: str | None = None, - tools: list[Tool | ChatCompletionToolsParam] | None = None, -) -> Message: - dev_msg_content = DeveloperContent.new() - if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: - dev_msg_content = dev_msg_content.with_instructions(instructions) - if tools is not None: - function_tools: list[Tool | ChatCompletionToolsParam] = [] - for tool in tools: - if tool.type in ( - "web_search_preview", - "code_interpreter", - "container", - "mcp", - ): - # These are built-in tools that are added to the system message. - # Adding in MCP for now until we support MCP tools executed - # server side - pass +class OpenAIToolParser(ToolParser): + def __init__(self, tokenizer: "AnyTokenizer"): + super().__init__(tokenizer) - elif tool.type == "function": - function_tools.append(tool) - else: - raise ValueError(f"tool type {tool.type} not supported") - if function_tools: - function_tool_descriptions = [ - create_tool_definition(tool) for tool in function_tools - ] - dev_msg_content = dev_msg_content.with_function_tools( - function_tool_descriptions + def extract_tool_calls( + self, + model_output: str, + request: ChatCompletionRequest, + token_ids: Sequence[int] | None = None, + ) -> ExtractedToolCallInformation: + if token_ids is None: + raise NotImplementedError( + "OpenAIToolParser requires token IDs and does not support text-based extraction." # noqa: E501 ) - dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content) - return dev_msg - - -def get_user_message(content: str) -> Message: - return Message.from_role_and_content(Role.USER, content) + parser = parse_output_into_messages(token_ids) + tool_calls = [] + final_content = None -def parse_response_input( - response_msg: ResponseInputOutputItem, - prev_responses: list[ResponseOutputItem | ResponseReasoningItem], -) -> Message: - if not isinstance(response_msg, dict): - response_msg = response_msg.model_dump() - if "type" not in response_msg or response_msg["type"] == "message": - role = response_msg["role"] - content = response_msg["content"] - if role == "system": - # User is trying to set a system message. Change it to: - # <|start|>developer<|message|># Instructions - # {instructions}<|end|> - role = "developer" - text_prefix = "Instructions:\n" - else: - text_prefix = "" - if isinstance(content, str): - msg = Message.from_role_and_content(role, text_prefix + content) - else: - contents = [TextContent(text=text_prefix + c["text"]) for c in content] - msg = Message.from_role_and_contents(role, contents) - if role == "assistant": - msg = msg.with_channel("final") - elif response_msg["type"] == "function_call_output": - call_id = response_msg["call_id"] - call_response: ResponseFunctionToolCall | None = None - for prev_response in reversed(prev_responses): - if ( - isinstance(prev_response, ResponseFunctionToolCall) - and prev_response.call_id == call_id - ): - call_response = prev_response - break - if call_response is None: - raise ValueError(f"No call message found for {call_id}") - msg = Message.from_author_and_content( - Author.new(Role.TOOL, f"functions.{call_response.name}"), - response_msg["output"], - ) - elif response_msg["type"] == "reasoning": - content = response_msg["content"] - assert len(content) == 1 - msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"]) - elif response_msg["type"] == "function_call": - msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"]) - msg = msg.with_channel("commentary") - msg = msg.with_recipient(f"functions.{response_msg['name']}") - msg = msg.with_content_type("json") - else: - raise ValueError(f"Unknown input type: {response_msg['type']}") - return msg - - -def parse_input_to_harmony_message(chat_msg) -> list[Message]: - if not isinstance(chat_msg, dict): - # Handle Pydantic models - chat_msg = chat_msg.model_dump(exclude_none=True) - - role = chat_msg.get("role") - - # Assistant message with tool calls - tool_calls = chat_msg.get("tool_calls") - if role == "assistant" and tool_calls: - msgs: list[Message] = [] - for call in tool_calls: - func = call.get("function", {}) - name = func.get("name", "") - arguments = func.get("arguments", "") or "" - msg = Message.from_role_and_content(Role.ASSISTANT, arguments) - msg = msg.with_channel("commentary") - msg = msg.with_recipient(f"functions.{name}") - msg = msg.with_content_type("json") - msgs.append(msg) - return msgs - - # Tool role message (tool output) - if role == "tool": - name = chat_msg.get("name", "") - content = chat_msg.get("content", "") or "" - if isinstance(content, list): - # Handle array format for tool message content - # by concatenating all text parts. - content = "".join( - item.get("text", "") - for item in content - if isinstance(item, dict) and item.get("type") == "text" + def _create_tool_call(function_name: str, arguments: str) -> ToolCall: + # Sanitize the function name to remove leaked tags (e.g. <|channel|>) + clean_name = function_name.split("<")[0].strip() + + try: + clean_args = json.dumps(json.loads(arguments)) + except json.JSONDecodeError: + logger.debug("Partial or invalid JSON tool call detected.") + clean_args = arguments + + return ToolCall( + type="function", + function=FunctionCall( + name=clean_name, + arguments=clean_args, + ), ) - msg = Message.from_author_and_content( - Author.new(Role.TOOL, f"functions.{name}"), content - ).with_channel("commentary") - return [msg] - - # Default: user/assistant/system messages with content - content = chat_msg.get("content", "") - if isinstance(content, str): - contents = [TextContent(text=content)] - else: - # TODO: Support refusal. - contents = [TextContent(text=c.get("text", "")) for c in content] - msg = Message.from_role_and_contents(role, contents) - return [msg] - - -def construct_harmony_previous_input_messages( - request: ResponsesRequest, -) -> list[OpenAIHarmonyMessage]: - messages: list[OpenAIHarmonyMessage] = [] - if request.previous_input_messages: - for message in request.previous_input_messages: - # Handle both OpenAIHarmonyMessage objects and dictionary inputs - if isinstance(message, OpenAIHarmonyMessage): - message_role = message.author.role - # To match OpenAI, instructions, reasoning and tools are - # always taken from the most recent Responses API request - # not carried over from previous requests - if ( - message_role == OpenAIHarmonyRole.SYSTEM - or message_role == OpenAIHarmonyRole.DEVELOPER - ): + if len(parser.messages) > 0: + for msg in parser.messages: + if len(msg.content) < 1: continue - messages.append(message) - else: - harmony_messages = parse_input_to_harmony_message(message) - for harmony_msg in harmony_messages: - message_role = harmony_msg.author.role - # To match OpenAI, instructions, reasoning and tools are - # always taken from the most recent Responses API request - # not carried over from previous requests - if ( - message_role == OpenAIHarmonyRole.SYSTEM - or message_role == OpenAIHarmonyRole.DEVELOPER - ): - continue - messages.append(harmony_msg) - return messages - - -def render_for_completion(messages: list[Message]) -> list[int]: - conversation = Conversation.from_messages(messages) - token_ids = get_encoding().render_conversation_for_completion( - conversation, Role.ASSISTANT - ) - return token_ids - - -def parse_output_message(message: Message) -> list[ResponseOutputItem]: - """ - Parse a Harmony message into a list of output response items. - """ - if message.author.role != "assistant": - # This is a message from a tool to the assistant (e.g., search result). - # Don't include it in the final output for now. This aligns with - # OpenAI's behavior on models like o4-mini. - return [] - - output_items: list[ResponseOutputItem] = [] - recipient = message.recipient - if recipient is not None and recipient.startswith("browser."): - if len(message.content) != 1: - raise ValueError("Invalid number of contents in browser message") - content = message.content[0] - # We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY - # env variable since if it is not set, we are certain the json is valid - # The use of Actions for web search will be removed entirely in - # the future, so this is only necessary temporarily - try: - browser_call = json.loads(content.text) - except json.JSONDecodeError: - # If the content is not valid JSON, then it was - # caught and retried by vLLM, which means we - # need to make note of that so the user is aware - json_retry_output_message = ( - f"Invalid JSON args, caught and retried: {content.text}" - ) - browser_call = { - "query": json_retry_output_message, - "url": json_retry_output_message, - "pattern": json_retry_output_message, - } - # TODO: translate to url properly! - if recipient == "browser.search": - action = ActionSearch( - query=f"cursor:{browser_call.get('query', '')}", type="search" - ) - elif recipient == "browser.open": - action = ActionOpenPage( - url=f"cursor:{browser_call.get('url', '')}", type="open_page" - ) - elif recipient == "browser.find": - action = ActionFind( - pattern=browser_call["pattern"], - url=f"cursor:{browser_call.get('url', '')}", - type="find", - ) - else: - raise ValueError(f"Unknown browser action: {recipient}") - web_search_item = ResponseFunctionWebSearch( - id=f"ws_{random_uuid()}", - action=action, - status="completed", - type="web_search_call", - ) - output_items.append(web_search_item) - elif message.channel == "analysis": - for content in message.content: - reasoning_item = ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent( - text=content.text, type="reasoning_text" - ) - ], - status=None, - ) - output_items.append(reasoning_item) - elif message.channel == "commentary": - if recipient is not None and recipient.startswith("functions."): - function_name = recipient.split(".")[-1] - for content in message.content: - random_id = random_uuid() - response_item = ResponseFunctionToolCall( - arguments=content.text, - call_id=f"call_{random_id}", - type="function_call", - name=function_name, - id=f"fc_{random_id}", - ) - output_items.append(response_item) - elif recipient is not None and ( - recipient.startswith("python") - or recipient.startswith("browser") - or recipient.startswith("container") - ): - for content in message.content: - reasoning_item = ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent( - text=content.text, type="reasoning_text" - ) - ], - status=None, - ) - output_items.append(reasoning_item) - else: - raise ValueError(f"Unknown recipient: {recipient}") - elif message.channel == "final": - contents = [] - for content in message.content: - output_text = ResponseOutputText( - text=content.text, - annotations=[], # TODO - type="output_text", - logprobs=None, # TODO - ) - contents.append(output_text) - text_item = ResponseOutputMessage( - id=f"msg_{random_uuid()}", - content=contents, - role=message.author.role, - status="completed", - type="message", + msg_text = msg.content[0].text + + if msg.recipient and msg.recipient.startswith("functions."): + if not msg.content_type or "json" in msg.content_type: + func_name = msg.recipient.split("functions.")[1] + tool_calls.append(_create_tool_call(func_name, msg_text)) + elif msg.channel == "final": + final_content = msg_text + + if parser.current_content: + curr_text = parser.current_content + curr_channel = parser.current_channel + curr_recipient = parser.current_recipient + + if (curr_channel == "commentary" + and curr_recipient + and curr_recipient.startswith("functions.")): + + func_name = curr_recipient.split("functions.")[1] + tool_calls.append(_create_tool_call(func_name, curr_text)) + + elif curr_channel == "final": + if final_content: + final_content += curr_text + else: + final_content = curr_text + + return ExtractedToolCallInformation( + tools_called=len(tool_calls) > 0, + tool_calls=tool_calls, + content=final_content, ) - output_items.append(text_item) - else: - raise ValueError(f"Unknown channel: {message.channel}") - return output_items - -def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]: - if not parser.current_content: - return [] - if parser.current_role != Role.ASSISTANT: - return [] - current_recipient = parser.current_recipient - if current_recipient is not None and current_recipient.startswith("browser."): - return [] - - if parser.current_channel == "analysis": - reasoning_item = ResponseReasoningItem( - id=f"rs_{random_uuid()}", - summary=[], - type="reasoning", - content=[ - ResponseReasoningTextContent( - text=parser.current_content, type="reasoning_text" - ) - ], - status=None, - ) - return [reasoning_item] - elif parser.current_channel == "final": - output_text = ResponseOutputText( - text=parser.current_content, - annotations=[], # TODO - type="output_text", - logprobs=None, # TODO - ) - text_item = ResponseOutputMessage( - id=f"msg_{random_uuid()}", - content=[output_text], - role="assistant", - # if the parser still has messages (ie if the generator got cut - # abruptly), this should be incomplete - status="incomplete", - type="message", - ) - return [text_item] - return [] - - -def get_stop_tokens_for_assistant_actions() -> list[int]: - return get_encoding().stop_tokens_for_assistant_actions() - - -def get_streamable_parser_for_assistant() -> StreamableParser: - return StreamableParser(get_encoding(), role=Role.ASSISTANT) - - -def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: - parser = get_streamable_parser_for_assistant() - for token_id in token_ids: - parser.process(token_id) - return parser - - -def parse_chat_output( - token_ids: Sequence[int], -) -> tuple[str | None, str | None, bool]: - parser = parse_output_into_messages(token_ids) - output_msgs = parser.messages - is_tool_call = False # TODO: update this when tool call is supported - if len(output_msgs) == 0: - # The generation has stopped during reasoning. - reasoning = parser.current_content - final_content = None - elif len(output_msgs) == 1: - # The generation has stopped during final message. - reasoning = output_msgs[0].content[0].text - final_content = parser.current_content - else: - reasoning_msg = output_msgs[:-1] - final_msg = output_msgs[-1] - reasoning = "\n".join([msg.content[0].text for msg in reasoning_msg]) - final_content = final_msg.content[0].text - return reasoning, final_content, is_tool_call + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + raise NotImplementedError( + "Not being used, manual parsing in serving_chat.py" # noqa: E501 + ) \ No newline at end of file diff --git a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py index d1b36a297e0b..045ae7e95693 100644 --- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py @@ -44,38 +44,55 @@ def extract_tool_calls( tool_calls = [] final_content = None + def _create_tool_call(function_name: str, arguments: str) -> ToolCall: + # Sanitize the function name to remove leaked tags (e.g. <|channel|>) + clean_name = function_name.split("<")[0].strip() + + try: + clean_args = json.dumps(json.loads(arguments)) + except json.JSONDecodeError: + logger.debug("Partial or invalid JSON tool call detected.") + clean_args = arguments + + return ToolCall( + type="function", + function=FunctionCall( + name=clean_name, + arguments=clean_args, + ), + ) + if len(parser.messages) > 0: for msg in parser.messages: if len(msg.content) < 1: continue msg_text = msg.content[0].text + if msg.recipient and msg.recipient.startswith("functions."): - # If no content-type is given assume JSON, as that's the - # most common case with gpt-oss models. if not msg.content_type or "json" in msg.content_type: - # load and dump the JSON text to check validity and - # remove any extra newlines or other odd formatting - try: - tool_args = json.dumps(json.loads(msg_text)) - except json.JSONDecodeError: - logger.exception( - "Error decoding JSON tool call from response." - ) - tool_args = msg_text - else: - tool_args = msg_text - tool_calls.append( - ToolCall( - type="function", - function=FunctionCall( - name=msg.recipient.split("functions.")[1], - arguments=tool_args, - ), - ) - ) + func_name = msg.recipient.split("functions.")[1] + tool_calls.append(_create_tool_call(func_name, msg_text)) elif msg.channel == "final": final_content = msg_text + if parser.current_content: + curr_text = parser.current_content + curr_channel = parser.current_channel + curr_recipient = parser.current_recipient + + if (curr_channel == "commentary" + and curr_recipient + and curr_recipient.startswith("functions.")): + + func_name = curr_recipient.split("functions.")[1] + tool_calls.append(_create_tool_call(func_name, curr_text)) + + elif curr_channel == "final": + if final_content: + final_content += curr_text + else: + final_content = curr_text + return ExtractedToolCallInformation( tools_called=len(tool_calls) > 0, tool_calls=tool_calls, @@ -94,4 +111,4 @@ def extract_tool_calls_streaming( ) -> DeltaMessage | None: raise NotImplementedError( "Not being used, manual parsing in serving_chat.py" # noqa: E501 - ) + ) \ No newline at end of file From 31af7b8a9e10cdfbda0b0b985f3ef8a13bc1238d Mon Sep 17 00:00:00 2001 From: ShaikAbdulHafeez03 Date: Sat, 22 Nov 2025 16:55:35 +0530 Subject: [PATCH 2/4] Fix: Improve tool call parsing for Harmony/GPT-OSS models Signed-off-by: ShaikAbdulHafeez03 --- vllm/entrypoints/harmony_utils.py | 630 +++++++++++++++++++++++++----- 1 file changed, 538 insertions(+), 92 deletions(-) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 045ae7e95693..649a7396ad4c 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -1,114 +1,560 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import datetime import json -from collections.abc import Sequence -from typing import TYPE_CHECKING +from collections.abc import Iterable, Sequence +from typing import Literal -from vllm.entrypoints.harmony_utils import parse_output_into_messages -from vllm.entrypoints.openai.protocol import ( - ChatCompletionRequest, - DeltaMessage, - ExtractedToolCallInformation, - FunctionCall, - ToolCall, +from openai.types.responses import ( + ResponseFunctionToolCall, + ResponseOutputItem, + ResponseOutputMessage, + ResponseOutputText, + ResponseReasoningItem, +) +from openai.types.responses.response_function_web_search import ( + ActionFind, + ActionOpenPage, + ActionSearch, + ResponseFunctionWebSearch, ) -from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( - ToolParser, +from openai.types.responses.response_reasoning_item import ( + Content as ResponseReasoningTextContent, ) -from vllm.logger import init_logger +from openai.types.responses.tool import Tool +from openai_harmony import ( + Author, + ChannelConfig, + Conversation, + DeveloperContent, + HarmonyEncodingName, + Message, + ReasoningEffort, + Role, + StreamableParser, + SystemContent, + TextContent, + ToolDescription, + load_harmony_encoding, +) +from openai_harmony import Message as OpenAIHarmonyMessage +from openai_harmony import Role as OpenAIHarmonyRole + +from vllm import envs +from vllm.entrypoints.openai.protocol import ( + ChatCompletionToolsParam, + ResponseInputOutputItem, + ResponsesRequest, +) +from vllm.utils import random_uuid + +REASONING_EFFORT = { + "high": ReasoningEffort.HIGH, + "medium": ReasoningEffort.MEDIUM, + "low": ReasoningEffort.LOW, +} + +_harmony_encoding = None -if TYPE_CHECKING: - from vllm.transformers_utils.tokenizer import AnyTokenizer -else: - AnyTokenizer = object +MCP_BUILTIN_TOOLS: set[str] = { + "web_search_preview", + "code_interpreter", + "container", +} + + +def has_custom_tools(tool_types: set[str]) -> bool: + return not tool_types.issubset(MCP_BUILTIN_TOOLS) + + +def get_encoding(): + global _harmony_encoding + if _harmony_encoding is None: + _harmony_encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) + return _harmony_encoding + + +def get_system_message( + model_identity: str | None = None, + reasoning_effort: Literal["high", "medium", "low"] | None = None, + start_date: str | None = None, + browser_description: str | None = None, + python_description: str | None = None, + container_description: str | None = None, + instructions: str | None = None, + with_custom_tools: bool = False, +) -> Message: + sys_msg_content = SystemContent.new() + if model_identity is not None: + sys_msg_content = sys_msg_content.with_model_identity(model_identity) + if instructions is not None and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: + current_identity = sys_msg_content.model_identity + new_identity = ( + f"{current_identity}\n{instructions}" if current_identity else instructions + ) + sys_msg_content = sys_msg_content.with_model_identity(new_identity) + if reasoning_effort is not None: + sys_msg_content = sys_msg_content.with_reasoning_effort( + REASONING_EFFORT[reasoning_effort] + ) + if start_date is None: + start_date = datetime.datetime.now().strftime("%Y-%m-%d") + sys_msg_content = sys_msg_content.with_conversation_start_date(start_date) + if browser_description is not None: + sys_msg_content = sys_msg_content.with_tools(browser_description) + if python_description is not None: + sys_msg_content = sys_msg_content.with_tools(python_description) + if container_description is not None: + sys_msg_content = sys_msg_content.with_tools(container_description) + if not with_custom_tools: + channel_config = sys_msg_content.channel_config + invalid_channel = "commentary" + new_config = ChannelConfig.require_channels( + [c for c in channel_config.valid_channels if c != invalid_channel] + ) + sys_msg_content = sys_msg_content.with_channel_config(new_config) + sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content) + return sys_msg -logger = init_logger(__name__) +def create_tool_definition(tool: ChatCompletionToolsParam | Tool): + if isinstance(tool, ChatCompletionToolsParam): + return ToolDescription.new( + name=tool.function.name, + description=tool.function.description, + parameters=tool.function.parameters, + ) + return ToolDescription.new( + name=tool.name, + description=tool.description, + parameters=tool.parameters, + ) -class OpenAIToolParser(ToolParser): - def __init__(self, tokenizer: "AnyTokenizer"): - super().__init__(tokenizer) - def extract_tool_calls( - self, - model_output: str, - request: ChatCompletionRequest, - token_ids: Sequence[int] | None = None, - ) -> ExtractedToolCallInformation: - if token_ids is None: - raise NotImplementedError( - "OpenAIToolParser requires token IDs and does not support text-based extraction." # noqa: E501 +def get_developer_message( + instructions: str | None = None, + tools: list[Tool | ChatCompletionToolsParam] | None = None, +) -> Message: + dev_msg_content = DeveloperContent.new() + if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: + dev_msg_content = dev_msg_content.with_instructions(instructions) + if tools is not None: + function_tools: list[Tool | ChatCompletionToolsParam] = [] + for tool in tools: + if tool.type in ( + "web_search_preview", + "code_interpreter", + "container", + "mcp", + ): + pass + elif tool.type == "function": + function_tools.append(tool) + else: + raise ValueError(f"tool type {tool.type} not supported") + if function_tools: + function_tool_descriptions = [ + create_tool_definition(tool) for tool in function_tools + ] + dev_msg_content = dev_msg_content.with_function_tools( + function_tool_descriptions ) + dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content) + return dev_msg - parser = parse_output_into_messages(token_ids) - tool_calls = [] - final_content = None - def _create_tool_call(function_name: str, arguments: str) -> ToolCall: - # Sanitize the function name to remove leaked tags (e.g. <|channel|>) - clean_name = function_name.split("<")[0].strip() - - try: - clean_args = json.dumps(json.loads(arguments)) - except json.JSONDecodeError: - logger.debug("Partial or invalid JSON tool call detected.") - clean_args = arguments +def get_user_message(content: str) -> Message: + return Message.from_role_and_content(Role.USER, content) + + +def parse_response_input( + response_msg: ResponseInputOutputItem, + prev_responses: list[ResponseOutputItem | ResponseReasoningItem], +) -> Message: + if not isinstance(response_msg, dict): + response_msg = response_msg.model_dump() + if "type" not in response_msg or response_msg["type"] == "message": + role = response_msg["role"] + content = response_msg["content"] + if role == "system": + role = "developer" + text_prefix = "Instructions:\n" + else: + text_prefix = "" + if isinstance(content, str): + msg = Message.from_role_and_content(role, text_prefix + content) + else: + contents = [TextContent(text=text_prefix + c["text"]) for c in content] + msg = Message.from_role_and_contents(role, contents) + if role == "assistant": + msg = msg.with_channel("final") + elif response_msg["type"] == "function_call_output": + call_id = response_msg["call_id"] + call_response: ResponseFunctionToolCall | None = None + for prev_response in reversed(prev_responses): + if ( + isinstance(prev_response, ResponseFunctionToolCall) + and prev_response.call_id == call_id + ): + call_response = prev_response + break + if call_response is None: + raise ValueError(f"No call message found for {call_id}") + msg = Message.from_author_and_content( + Author.new(Role.TOOL, f"functions.{call_response.name}"), + response_msg["output"], + ) + elif response_msg["type"] == "reasoning": + content = response_msg["content"] + assert len(content) == 1 + msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"]) + elif response_msg["type"] == "function_call": + msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"]) + msg = msg.with_channel("commentary") + msg = msg.with_recipient(f"functions.{response_msg['name']}") + msg = msg.with_content_type("json") + else: + raise ValueError(f"Unknown input type: {response_msg['type']}") + return msg + + +def parse_input_to_harmony_message(chat_msg) -> list[Message]: + if not isinstance(chat_msg, dict): + chat_msg = chat_msg.model_dump(exclude_none=True) + + role = chat_msg.get("role") + tool_calls = chat_msg.get("tool_calls") + if role == "assistant" and tool_calls: + msgs: list[Message] = [] + for call in tool_calls: + func = call.get("function", {}) + name = func.get("name", "") + arguments = func.get("arguments", "") or "" + if isinstance(arguments, dict): + arguments = json.dumps(arguments) - return ToolCall( - type="function", - function=FunctionCall( - name=clean_name, - arguments=clean_args, - ), + msg = Message.from_role_and_content(Role.ASSISTANT, arguments) + msg = msg.with_channel("commentary") + msg = msg.with_recipient(f"functions.{name}") + msg = msg.with_content_type("json") + msgs.append(msg) + return msgs + + if role == "tool": + name = chat_msg.get("name", "") + content = chat_msg.get("content", "") or "" + if isinstance(content, list): + content = "".join( + item.get("text", "") + for item in content + if isinstance(item, dict) and item.get("type") == "text" ) - if len(parser.messages) > 0: - for msg in parser.messages: - if len(msg.content) < 1: + msg = Message.from_author_and_content( + Author.new(Role.TOOL, f"functions.{name}"), content + ).with_channel("commentary") + return [msg] + + content = chat_msg.get("content", "") + if isinstance(content, str): + contents = [TextContent(text=content)] + else: + contents = [TextContent(text=c.get("text", "")) for c in content] + msg = Message.from_role_and_contents(role, contents) + return [msg] + + +def construct_harmony_previous_input_messages( + request: ResponsesRequest, +) -> list[OpenAIHarmonyMessage]: + messages: list[OpenAIHarmonyMessage] = [] + if request.previous_input_messages: + for message in request.previous_input_messages: + if isinstance(message, OpenAIHarmonyMessage): + message_role = message.author.role + if ( + message_role == OpenAIHarmonyRole.SYSTEM + or message_role == OpenAIHarmonyRole.DEVELOPER + ): continue - msg_text = msg.content[0].text - - if msg.recipient and msg.recipient.startswith("functions."): - if not msg.content_type or "json" in msg.content_type: - func_name = msg.recipient.split("functions.")[1] - tool_calls.append(_create_tool_call(func_name, msg_text)) - elif msg.channel == "final": - final_content = msg_text - - if parser.current_content: - curr_text = parser.current_content - curr_channel = parser.current_channel - curr_recipient = parser.current_recipient - - if (curr_channel == "commentary" - and curr_recipient - and curr_recipient.startswith("functions.")): + messages.append(message) + else: + harmony_messages = parse_input_to_harmony_message(message) + for harmony_msg in harmony_messages: + message_role = harmony_msg.author.role + if ( + message_role == OpenAIHarmonyRole.SYSTEM + or message_role == OpenAIHarmonyRole.DEVELOPER + ): + continue + messages.append(harmony_msg) + return messages + + +def render_for_completion(messages: list[Message]) -> list[int]: + conversation = Conversation.from_messages(messages) + token_ids = get_encoding().render_conversation_for_completion( + conversation, Role.ASSISTANT + ) + return token_ids + + +def parse_output_message(message: Message) -> list[ResponseOutputItem]: + if message.author.role != "assistant": + return [] + + output_items: list[ResponseOutputItem] = [] + recipient = message.recipient + + if recipient is not None and recipient.startswith("browser."): + if len(message.content) != 1: + raise ValueError("Invalid number of contents in browser message") + content = message.content[0] + try: + browser_call = json.loads(content.text) + except json.JSONDecodeError: + json_retry_output_message = ( + f"Invalid JSON args, caught and retried: {content.text}" + ) + browser_call = { + "query": json_retry_output_message, + "url": json_retry_output_message, + "pattern": json_retry_output_message, + } + + if recipient == "browser.search": + action = ActionSearch( + query=f"cursor:{browser_call.get('query', '')}", type="search" + ) + elif recipient == "browser.open": + action = ActionOpenPage( + url=f"cursor:{browser_call.get('url', '')}", type="open_page" + ) + elif recipient == "browser.find": + action = ActionFind( + pattern=browser_call["pattern"], + url=f"cursor:{browser_call.get('url', '')}", + type="find", + ) + else: + raise ValueError(f"Unknown browser action: {recipient}") + web_search_item = ResponseFunctionWebSearch( + id=f"ws_{random_uuid()}", + action=action, + status="completed", + type="web_search_call", + ) + output_items.append(web_search_item) + + elif message.channel == "analysis": + for content in message.content: + reasoning_item = ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent( + text=content.text, type="reasoning_text" + ) + ], + status=None, + ) + output_items.append(reasoning_item) + + elif message.channel == "commentary": + if recipient is not None and recipient.startswith("functions."): + # FIX: Strict name sanitization to remove leaked tags like <|channel|> + raw_name = recipient.split("functions.")[1] + function_name = raw_name.split("<")[0].strip() + + for content in message.content: + random_id = random_uuid() + response_item = ResponseFunctionToolCall( + arguments=content.text, + call_id=f"call_{random_id}", + type="function_call", + name=function_name, + id=f"fc_{random_id}", + ) + output_items.append(response_item) - func_name = curr_recipient.split("functions.")[1] - tool_calls.append(_create_tool_call(func_name, curr_text)) + elif recipient is not None and ( + recipient.startswith("python") + or recipient.startswith("browser") + or recipient.startswith("container") + ): + for content in message.content: + reasoning_item = ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent( + text=content.text, type="reasoning_text" + ) + ], + status=None, + ) + output_items.append(reasoning_item) + else: + raise ValueError(f"Unknown recipient: {recipient}") + + elif message.channel == "final": + contents = [] + for content in message.content: + output_text = ResponseOutputText( + text=content.text, + annotations=[], + type="output_text", + logprobs=None, + ) + contents.append(output_text) + text_item = ResponseOutputMessage( + id=f"msg_{random_uuid()}", + content=contents, + role=message.author.role, + status="completed", + type="message", + ) + output_items.append(text_item) + else: + raise ValueError(f"Unknown channel: {message.channel}") + return output_items + + +def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]: + if not parser.current_content: + return [] + if parser.current_role != Role.ASSISTANT: + return [] + current_recipient = parser.current_recipient + if current_recipient is not None and current_recipient.startswith("browser."): + return [] + + if parser.current_channel == "analysis": + reasoning_item = ResponseReasoningItem( + id=f"rs_{random_uuid()}", + summary=[], + type="reasoning", + content=[ + ResponseReasoningTextContent( + text=parser.current_content, type="reasoning_text" + ) + ], + status=None, + ) + return [reasoning_item] + + elif parser.current_channel == "commentary": + if current_recipient is not None and current_recipient.startswith("functions."): + # FIX: Strict name sanitization here as well + raw_name = current_recipient.split("functions.")[1] + function_name = raw_name.split("<")[0].strip() - elif curr_channel == "final": - if final_content: - final_content += curr_text - else: - final_content = curr_text - - return ExtractedToolCallInformation( - tools_called=len(tool_calls) > 0, - tool_calls=tool_calls, - content=final_content, + random_id = random_uuid() + response_item = ResponseFunctionToolCall( + arguments=parser.current_content, + call_id=f"call_{random_id}", + type="function_call", + name=function_name, + id=f"fc_{random_id}", + ) + return [response_item] + + elif parser.current_channel == "final": + output_text = ResponseOutputText( + text=parser.current_content, + annotations=[], + type="output_text", + logprobs=None, + ) + text_item = ResponseOutputMessage( + id=f"msg_{random_uuid()}", + content=[output_text], + role="assistant", + status="incomplete", + type="message", ) + return [text_item] + return [] + + +def get_stop_tokens_for_assistant_actions() -> list[int]: + return get_encoding().stop_tokens_for_assistant_actions() + + +def get_streamable_parser_for_assistant() -> StreamableParser: + return StreamableParser(get_encoding(), role=Role.ASSISTANT) + + +def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: + parser = get_streamable_parser_for_assistant() + + tokens = list(token_ids) + if not tokens: + return parser + + encoding = get_encoding() + + # FIX: Use allowed_special="all" to avoid Tokenizer errors + start_token = encoding.encode("<|start|>", allowed_special="all")[0] + + if tokens[0] != start_token: + def get_id(text): + return encoding.encode(text, allowed_special="all")[0] + + header_tokens = [ + start_token, + get_id("assistant"), + get_id("<|channel|>"), + get_id("analysis"), + get_id("<|message|>") + ] + tokens = header_tokens + tokens + + for token_id in tokens: + try: + parser.process(token_id) + except Exception: + break + + return parser + + +def parse_chat_output( + token_ids: Sequence[int], +) -> tuple[str | None, str | None, bool]: + parser = parse_output_into_messages(token_ids) + output_msgs = parser.messages + + reasoning_parts = [] + final_content = None + is_tool_call = False + + for msg in output_msgs: + if msg.channel == "analysis": + for content in msg.content: + reasoning_parts.append(content.text) + elif msg.channel == "final": + for content in msg.content: + final_content = content.text + elif msg.channel == "commentary" and msg.recipient and msg.recipient.startswith("functions."): + is_tool_call = True + if not final_content: + final_content = "" + for content in msg.content: + final_content = content.text + + if parser.current_content: + if parser.current_channel == "analysis": + reasoning_parts.append(parser.current_content) + elif parser.current_channel == "final": + final_content = parser.current_content + elif parser.current_channel == "commentary" and parser.current_recipient and parser.current_recipient.startswith("functions."): + is_tool_call = True + final_content = parser.current_content + + reasoning = "\n".join(reasoning_parts) if reasoning_parts else None - def extract_tool_calls_streaming( - self, - previous_text: str, - current_text: str, - delta_text: str, - previous_token_ids: Sequence[int], - current_token_ids: Sequence[int], - delta_token_ids: Sequence[int], - request: ChatCompletionRequest, - ) -> DeltaMessage | None: - raise NotImplementedError( - "Not being used, manual parsing in serving_chat.py" # noqa: E501 - ) \ No newline at end of file + return reasoning, final_content, is_tool_call \ No newline at end of file From 8d94e30313ed854a9ccb2a0cee155e3811af7716 Mon Sep 17 00:00:00 2001 From: ShaikAbdulHafeez03 Date: Sat, 22 Nov 2025 17:03:48 +0530 Subject: [PATCH 3/4] DCO Check headder Signed-off-by: ShaikAbdulHafeez03 --- vllm/entrypoints/harmony_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 649a7396ad4c..899b8f6de150 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -497,7 +497,7 @@ def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: encoding = get_encoding() - # FIX: Use allowed_special="all" to avoid Tokenizer errors + # FIX: Use allowed_special="all" to avoid Tokenizer error start_token = encoding.encode("<|start|>", allowed_special="all")[0] if tokens[0] != start_token: From fb93ad8a080247f9740650329b2ad07704883b21 Mon Sep 17 00:00:00 2001 From: ShaikAbdulHafeez03 Date: Sat, 22 Nov 2025 21:12:30 +0530 Subject: [PATCH 4/4] Fix ruff formatting and linting errors Signed-off-by: ShaikAbdulHafeez03 --- vllm/entrypoints/harmony_utils.py | 67 +++++++++++-------- .../openai/tool_parsers/openai_tool_parser.py | 19 +++--- 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 899b8f6de150..fc29cc4c97aa 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -233,7 +233,7 @@ def parse_input_to_harmony_message(chat_msg) -> list[Message]: arguments = func.get("arguments", "") or "" if isinstance(arguments, dict): arguments = json.dumps(arguments) - + msg = Message.from_role_and_content(Role.ASSISTANT, arguments) msg = msg.with_channel("commentary") msg = msg.with_recipient(f"functions.{name}") @@ -306,7 +306,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: output_items: list[ResponseOutputItem] = [] recipient = message.recipient - + if recipient is not None and recipient.startswith("browser."): if len(message.content) != 1: raise ValueError("Invalid number of contents in browser message") @@ -322,7 +322,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: "url": json_retry_output_message, "pattern": json_retry_output_message, } - + if recipient == "browser.search": action = ActionSearch( query=f"cursor:{browser_call.get('query', '')}", type="search" @@ -346,7 +346,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: type="web_search_call", ) output_items.append(web_search_item) - + elif message.channel == "analysis": for content in message.content: reasoning_item = ResponseReasoningItem( @@ -361,13 +361,13 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: status=None, ) output_items.append(reasoning_item) - + elif message.channel == "commentary": if recipient is not None and recipient.startswith("functions."): # FIX: Strict name sanitization to remove leaked tags like <|channel|> raw_name = recipient.split("functions.")[1] function_name = raw_name.split("<")[0].strip() - + for content in message.content: random_id = random_uuid() response_item = ResponseFunctionToolCall( @@ -378,7 +378,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: id=f"fc_{random_id}", ) output_items.append(response_item) - + elif recipient is not None and ( recipient.startswith("python") or recipient.startswith("browser") @@ -399,13 +399,13 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: output_items.append(reasoning_item) else: raise ValueError(f"Unknown recipient: {recipient}") - + elif message.channel == "final": contents = [] for content in message.content: output_text = ResponseOutputText( text=content.text, - annotations=[], + annotations=[], type="output_text", logprobs=None, ) @@ -445,13 +445,13 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]: status=None, ) return [reasoning_item] - + elif parser.current_channel == "commentary": if current_recipient is not None and current_recipient.startswith("functions."): # FIX: Strict name sanitization here as well raw_name = current_recipient.split("functions.")[1] function_name = raw_name.split("<")[0].strip() - + random_id = random_uuid() response_item = ResponseFunctionToolCall( arguments=parser.current_content, @@ -461,13 +461,13 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]: id=f"fc_{random_id}", ) return [response_item] - + elif parser.current_channel == "final": output_text = ResponseOutputText( text=parser.current_content, - annotations=[], + annotations=[], type="output_text", - logprobs=None, + logprobs=None, ) text_item = ResponseOutputMessage( id=f"msg_{random_uuid()}", @@ -490,17 +490,18 @@ def get_streamable_parser_for_assistant() -> StreamableParser: def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser: parser = get_streamable_parser_for_assistant() - + tokens = list(token_ids) if not tokens: return parser encoding = get_encoding() - + # FIX: Use allowed_special="all" to avoid Tokenizer error start_token = encoding.encode("<|start|>", allowed_special="all")[0] - + if tokens[0] != start_token: + def get_id(text): return encoding.encode(text, allowed_special="all")[0] @@ -509,7 +510,7 @@ def get_id(text): get_id("assistant"), get_id("<|channel|>"), get_id("analysis"), - get_id("<|message|>") + get_id("<|message|>"), ] tokens = header_tokens + tokens @@ -518,7 +519,7 @@ def get_id(text): parser.process(token_id) except Exception: break - + return parser @@ -527,11 +528,11 @@ def parse_chat_output( ) -> tuple[str | None, str | None, bool]: parser = parse_output_into_messages(token_ids) output_msgs = parser.messages - + reasoning_parts = [] final_content = None is_tool_call = False - + for msg in output_msgs: if msg.channel == "analysis": for content in msg.content: @@ -539,22 +540,30 @@ def parse_chat_output( elif msg.channel == "final": for content in msg.content: final_content = content.text - elif msg.channel == "commentary" and msg.recipient and msg.recipient.startswith("functions."): + elif ( + msg.channel == "commentary" + and msg.recipient + and msg.recipient.startswith("functions.") + ): is_tool_call = True if not final_content: final_content = "" for content in msg.content: - final_content = content.text + final_content = content.text if parser.current_content: if parser.current_channel == "analysis": - reasoning_parts.append(parser.current_content) + reasoning_parts.append(parser.current_content) elif parser.current_channel == "final": - final_content = parser.current_content - elif parser.current_channel == "commentary" and parser.current_recipient and parser.current_recipient.startswith("functions."): - is_tool_call = True - final_content = parser.current_content + final_content = parser.current_content + elif ( + parser.current_channel == "commentary" + and parser.current_recipient + and parser.current_recipient.startswith("functions.") + ): + is_tool_call = True + final_content = parser.current_content reasoning = "\n".join(reasoning_parts) if reasoning_parts else None - return reasoning, final_content, is_tool_call \ No newline at end of file + return reasoning, final_content, is_tool_call diff --git a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py index 045ae7e95693..69727b1a68a0 100644 --- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py @@ -47,13 +47,13 @@ def extract_tool_calls( def _create_tool_call(function_name: str, arguments: str) -> ToolCall: # Sanitize the function name to remove leaked tags (e.g. <|channel|>) clean_name = function_name.split("<")[0].strip() - + try: clean_args = json.dumps(json.loads(arguments)) except json.JSONDecodeError: logger.debug("Partial or invalid JSON tool call detected.") clean_args = arguments - + return ToolCall( type="function", function=FunctionCall( @@ -67,7 +67,7 @@ def _create_tool_call(function_name: str, arguments: str) -> ToolCall: if len(msg.content) < 1: continue msg_text = msg.content[0].text - + if msg.recipient and msg.recipient.startswith("functions."): if not msg.content_type or "json" in msg.content_type: func_name = msg.recipient.split("functions.")[1] @@ -80,13 +80,14 @@ def _create_tool_call(function_name: str, arguments: str) -> ToolCall: curr_channel = parser.current_channel curr_recipient = parser.current_recipient - if (curr_channel == "commentary" - and curr_recipient - and curr_recipient.startswith("functions.")): - + if ( + curr_channel == "commentary" + and curr_recipient + and curr_recipient.startswith("functions.") + ): func_name = curr_recipient.split("functions.")[1] tool_calls.append(_create_tool_call(func_name, curr_text)) - + elif curr_channel == "final": if final_content: final_content += curr_text @@ -111,4 +112,4 @@ def extract_tool_calls_streaming( ) -> DeltaMessage | None: raise NotImplementedError( "Not being used, manual parsing in serving_chat.py" # noqa: E501 - ) \ No newline at end of file + )