From 0cf29d5ca595e6ab0433a8a9dc9836d8b6aa2ac6 Mon Sep 17 00:00:00 2001
From: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
Date: Sat, 22 Nov 2025 16:29:29 +0530
Subject: [PATCH 1/4] Fix: enhance OpenAIToolParser and Harmony parser
 tool-calling robustness

Signed-off-by: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
---
 vllm/entrypoints/harmony_utils.py             | 611 +++---------------
 .../openai/tool_parsers/openai_tool_parser.py |  63 +-
 2 files changed, 135 insertions(+), 539 deletions(-)

diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 47a252348c10..045ae7e95693 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -1,535 +1,114 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-import datetime
 import json
-from collections.abc import Iterable, Sequence
-from typing import Literal
-
-from openai.types.responses import (
-    ResponseFunctionToolCall,
-    ResponseOutputItem,
-    ResponseOutputMessage,
-    ResponseOutputText,
-    ResponseReasoningItem,
-)
-from openai.types.responses.response_function_web_search import (
-    ActionFind,
-    ActionOpenPage,
-    ActionSearch,
-    ResponseFunctionWebSearch,
-)
-from openai.types.responses.response_reasoning_item import (
-    Content as ResponseReasoningTextContent,
-)
-from openai.types.responses.tool import Tool
-from openai_harmony import (
-    Author,
-    ChannelConfig,
-    Conversation,
-    DeveloperContent,
-    HarmonyEncodingName,
-    Message,
-    ReasoningEffort,
-    Role,
-    StreamableParser,
-    SystemContent,
-    TextContent,
-    ToolDescription,
-    load_harmony_encoding,
-)
-from openai_harmony import Message as OpenAIHarmonyMessage
-from openai_harmony import Role as OpenAIHarmonyRole
+from collections.abc import Sequence
+from typing import TYPE_CHECKING
 
-from vllm import envs
+from vllm.entrypoints.harmony_utils import parse_output_into_messages
 from vllm.entrypoints.openai.protocol import (
-    ChatCompletionToolsParam,
-    ResponseInputOutputItem,
-    ResponsesRequest,
+    ChatCompletionRequest,
+    DeltaMessage,
+    ExtractedToolCallInformation,
+    FunctionCall,
+    ToolCall,
 )
-from vllm.utils import random_uuid
-
-REASONING_EFFORT = {
-    "high": ReasoningEffort.HIGH,
-    "medium": ReasoningEffort.MEDIUM,
-    "low": ReasoningEffort.LOW,
-}
-
-_harmony_encoding = None
-
-# Builtin tools that should be included in the system message when
-# they are available and requested by the user.
-# Tool args are provided by MCP tool descriptions. Output
-# of the tools are stringified.
-MCP_BUILTIN_TOOLS: set[str] = {
-    "web_search_preview",
-    "code_interpreter",
-    "container",
-}
-
-
-def has_custom_tools(tool_types: set[str]) -> bool:
-    """
-    Checks if the given tool types are custom tools
-    (i.e. any tool other than MCP buildin tools)
-    """
-    return not tool_types.issubset(MCP_BUILTIN_TOOLS)
-
-
-def get_encoding():
-    global _harmony_encoding
-    if _harmony_encoding is None:
-        _harmony_encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
-    return _harmony_encoding
-
-
-def get_system_message(
-    model_identity: str | None = None,
-    reasoning_effort: Literal["high", "medium", "low"] | None = None,
-    start_date: str | None = None,
-    browser_description: str | None = None,
-    python_description: str | None = None,
-    container_description: str | None = None,
-    instructions: str | None = None,
-    with_custom_tools: bool = False,
-) -> Message:
-    sys_msg_content = SystemContent.new()
-    if model_identity is not None:
-        sys_msg_content = sys_msg_content.with_model_identity(model_identity)
-    if instructions is not None and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
-        current_identity = sys_msg_content.model_identity
-        new_identity = (
-            f"{current_identity}\n{instructions}" if current_identity else instructions
-        )
-        sys_msg_content = sys_msg_content.with_model_identity(new_identity)
-    if reasoning_effort is not None:
-        sys_msg_content = sys_msg_content.with_reasoning_effort(
-            REASONING_EFFORT[reasoning_effort]
-        )
-    if start_date is None:
-        # NOTE(woosuk): This brings non-determinism in vLLM. Be careful.
-        start_date = datetime.datetime.now().strftime("%Y-%m-%d")
-    sys_msg_content = sys_msg_content.with_conversation_start_date(start_date)
-    if browser_description is not None:
-        sys_msg_content = sys_msg_content.with_tools(browser_description)
-    if python_description is not None:
-        sys_msg_content = sys_msg_content.with_tools(python_description)
-    if container_description is not None:
-        sys_msg_content = sys_msg_content.with_tools(container_description)
-    if not with_custom_tools:
-        channel_config = sys_msg_content.channel_config
-        invalid_channel = "commentary"
-        new_config = ChannelConfig.require_channels(
-            [c for c in channel_config.valid_channels if c != invalid_channel]
-        )
-        sys_msg_content = sys_msg_content.with_channel_config(new_config)
-    sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
-    return sys_msg
+from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
+    ToolParser,
+)
+from vllm.logger import init_logger
 
+if TYPE_CHECKING:
+    from vllm.transformers_utils.tokenizer import AnyTokenizer
+else:
+    AnyTokenizer = object
 
-def create_tool_definition(tool: ChatCompletionToolsParam | Tool):
-    if isinstance(tool, ChatCompletionToolsParam):
-        return ToolDescription.new(
-            name=tool.function.name,
-            description=tool.function.description,
-            parameters=tool.function.parameters,
-        )
-    return ToolDescription.new(
-        name=tool.name,
-        description=tool.description,
-        parameters=tool.parameters,
-    )
+logger = init_logger(__name__)
 
 
-def get_developer_message(
-    instructions: str | None = None,
-    tools: list[Tool | ChatCompletionToolsParam] | None = None,
-) -> Message:
-    dev_msg_content = DeveloperContent.new()
-    if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
-        dev_msg_content = dev_msg_content.with_instructions(instructions)
-    if tools is not None:
-        function_tools: list[Tool | ChatCompletionToolsParam] = []
-        for tool in tools:
-            if tool.type in (
-                "web_search_preview",
-                "code_interpreter",
-                "container",
-                "mcp",
-            ):
-                # These are built-in tools that are added to the system message.
-                # Adding in MCP for now until we support MCP tools executed
-                # server side
-                pass
+class OpenAIToolParser(ToolParser):
+    def __init__(self, tokenizer: "AnyTokenizer"):
+        super().__init__(tokenizer)
 
-            elif tool.type == "function":
-                function_tools.append(tool)
-            else:
-                raise ValueError(f"tool type {tool.type} not supported")
-        if function_tools:
-            function_tool_descriptions = [
-                create_tool_definition(tool) for tool in function_tools
-            ]
-            dev_msg_content = dev_msg_content.with_function_tools(
-                function_tool_descriptions
+    def extract_tool_calls(
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+        token_ids: Sequence[int] | None = None,
+    ) -> ExtractedToolCallInformation:
+        if token_ids is None:
+            raise NotImplementedError(
+                "OpenAIToolParser requires token IDs and does not support text-based extraction."  # noqa: E501
             )
-    dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content)
-    return dev_msg
-
-
-def get_user_message(content: str) -> Message:
-    return Message.from_role_and_content(Role.USER, content)
 
+        parser = parse_output_into_messages(token_ids)
+        tool_calls = []
+        final_content = None
 
-def parse_response_input(
-    response_msg: ResponseInputOutputItem,
-    prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
-) -> Message:
-    if not isinstance(response_msg, dict):
-        response_msg = response_msg.model_dump()
-    if "type" not in response_msg or response_msg["type"] == "message":
-        role = response_msg["role"]
-        content = response_msg["content"]
-        if role == "system":
-            # User is trying to set a system message. Change it to:
-            # <|start|>developer<|message|># Instructions
-            # {instructions}<|end|>
-            role = "developer"
-            text_prefix = "Instructions:\n"
-        else:
-            text_prefix = ""
-        if isinstance(content, str):
-            msg = Message.from_role_and_content(role, text_prefix + content)
-        else:
-            contents = [TextContent(text=text_prefix + c["text"]) for c in content]
-            msg = Message.from_role_and_contents(role, contents)
-        if role == "assistant":
-            msg = msg.with_channel("final")
-    elif response_msg["type"] == "function_call_output":
-        call_id = response_msg["call_id"]
-        call_response: ResponseFunctionToolCall | None = None
-        for prev_response in reversed(prev_responses):
-            if (
-                isinstance(prev_response, ResponseFunctionToolCall)
-                and prev_response.call_id == call_id
-            ):
-                call_response = prev_response
-                break
-        if call_response is None:
-            raise ValueError(f"No call message found for {call_id}")
-        msg = Message.from_author_and_content(
-            Author.new(Role.TOOL, f"functions.{call_response.name}"),
-            response_msg["output"],
-        )
-    elif response_msg["type"] == "reasoning":
-        content = response_msg["content"]
-        assert len(content) == 1
-        msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
-    elif response_msg["type"] == "function_call":
-        msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
-        msg = msg.with_channel("commentary")
-        msg = msg.with_recipient(f"functions.{response_msg['name']}")
-        msg = msg.with_content_type("json")
-    else:
-        raise ValueError(f"Unknown input type: {response_msg['type']}")
-    return msg
-
-
-def parse_input_to_harmony_message(chat_msg) -> list[Message]:
-    if not isinstance(chat_msg, dict):
-        # Handle Pydantic models
-        chat_msg = chat_msg.model_dump(exclude_none=True)
-
-    role = chat_msg.get("role")
-
-    # Assistant message with tool calls
-    tool_calls = chat_msg.get("tool_calls")
-    if role == "assistant" and tool_calls:
-        msgs: list[Message] = []
-        for call in tool_calls:
-            func = call.get("function", {})
-            name = func.get("name", "")
-            arguments = func.get("arguments", "") or ""
-            msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
-            msg = msg.with_channel("commentary")
-            msg = msg.with_recipient(f"functions.{name}")
-            msg = msg.with_content_type("json")
-            msgs.append(msg)
-        return msgs
-
-    # Tool role message (tool output)
-    if role == "tool":
-        name = chat_msg.get("name", "")
-        content = chat_msg.get("content", "") or ""
-        if isinstance(content, list):
-            # Handle array format for tool message content
-            # by concatenating all text parts.
-            content = "".join(
-                item.get("text", "")
-                for item in content
-                if isinstance(item, dict) and item.get("type") == "text"
+        def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
+            # Sanitize the function name to remove leaked tags (e.g. <|channel|>)
+            clean_name = function_name.split("<")[0].strip()
+            
+            try:
+                clean_args = json.dumps(json.loads(arguments))
+            except json.JSONDecodeError:
+                logger.debug("Partial or invalid JSON tool call detected.")
+                clean_args = arguments
+            
+            return ToolCall(
+                type="function",
+                function=FunctionCall(
+                    name=clean_name,
+                    arguments=clean_args,
+                ),
             )
 
-        msg = Message.from_author_and_content(
-            Author.new(Role.TOOL, f"functions.{name}"), content
-        ).with_channel("commentary")
-        return [msg]
-
-    # Default: user/assistant/system messages with content
-    content = chat_msg.get("content", "")
-    if isinstance(content, str):
-        contents = [TextContent(text=content)]
-    else:
-        # TODO: Support refusal.
-        contents = [TextContent(text=c.get("text", "")) for c in content]
-    msg = Message.from_role_and_contents(role, contents)
-    return [msg]
-
-
-def construct_harmony_previous_input_messages(
-    request: ResponsesRequest,
-) -> list[OpenAIHarmonyMessage]:
-    messages: list[OpenAIHarmonyMessage] = []
-    if request.previous_input_messages:
-        for message in request.previous_input_messages:
-            # Handle both OpenAIHarmonyMessage objects and dictionary inputs
-            if isinstance(message, OpenAIHarmonyMessage):
-                message_role = message.author.role
-                # To match OpenAI, instructions, reasoning and tools are
-                # always taken from the most recent Responses API request
-                # not carried over from previous requests
-                if (
-                    message_role == OpenAIHarmonyRole.SYSTEM
-                    or message_role == OpenAIHarmonyRole.DEVELOPER
-                ):
+        if len(parser.messages) > 0:
+            for msg in parser.messages:
+                if len(msg.content) < 1:
                     continue
-                messages.append(message)
-            else:
-                harmony_messages = parse_input_to_harmony_message(message)
-                for harmony_msg in harmony_messages:
-                    message_role = harmony_msg.author.role
-                    # To match OpenAI, instructions, reasoning and tools are
-                    # always taken from the most recent Responses API request
-                    # not carried over from previous requests
-                    if (
-                        message_role == OpenAIHarmonyRole.SYSTEM
-                        or message_role == OpenAIHarmonyRole.DEVELOPER
-                    ):
-                        continue
-                    messages.append(harmony_msg)
-    return messages
-
-
-def render_for_completion(messages: list[Message]) -> list[int]:
-    conversation = Conversation.from_messages(messages)
-    token_ids = get_encoding().render_conversation_for_completion(
-        conversation, Role.ASSISTANT
-    )
-    return token_ids
-
-
-def parse_output_message(message: Message) -> list[ResponseOutputItem]:
-    """
-    Parse a Harmony message into a list of output response items.
-    """
-    if message.author.role != "assistant":
-        # This is a message from a tool to the assistant (e.g., search result).
-        # Don't include it in the final output for now. This aligns with
-        # OpenAI's behavior on models like o4-mini.
-        return []
-
-    output_items: list[ResponseOutputItem] = []
-    recipient = message.recipient
-    if recipient is not None and recipient.startswith("browser."):
-        if len(message.content) != 1:
-            raise ValueError("Invalid number of contents in browser message")
-        content = message.content[0]
-        # We do not need to check the VLLM_TOOL_JSON_ERROR_AUTOMATIC_RETRY
-        # env variable since if it is not set, we are certain the json is valid
-        # The use of Actions for web search will be removed entirely in
-        # the future, so this is only necessary temporarily
-        try:
-            browser_call = json.loads(content.text)
-        except json.JSONDecodeError:
-            # If the content is not valid JSON, then it was
-            # caught and retried by vLLM, which means we
-            # need to make note of that so the user is aware
-            json_retry_output_message = (
-                f"Invalid JSON args, caught and retried: {content.text}"
-            )
-            browser_call = {
-                "query": json_retry_output_message,
-                "url": json_retry_output_message,
-                "pattern": json_retry_output_message,
-            }
-        # TODO: translate to url properly!
-        if recipient == "browser.search":
-            action = ActionSearch(
-                query=f"cursor:{browser_call.get('query', '')}", type="search"
-            )
-        elif recipient == "browser.open":
-            action = ActionOpenPage(
-                url=f"cursor:{browser_call.get('url', '')}", type="open_page"
-            )
-        elif recipient == "browser.find":
-            action = ActionFind(
-                pattern=browser_call["pattern"],
-                url=f"cursor:{browser_call.get('url', '')}",
-                type="find",
-            )
-        else:
-            raise ValueError(f"Unknown browser action: {recipient}")
-        web_search_item = ResponseFunctionWebSearch(
-            id=f"ws_{random_uuid()}",
-            action=action,
-            status="completed",
-            type="web_search_call",
-        )
-        output_items.append(web_search_item)
-    elif message.channel == "analysis":
-        for content in message.content:
-            reasoning_item = ResponseReasoningItem(
-                id=f"rs_{random_uuid()}",
-                summary=[],
-                type="reasoning",
-                content=[
-                    ResponseReasoningTextContent(
-                        text=content.text, type="reasoning_text"
-                    )
-                ],
-                status=None,
-            )
-            output_items.append(reasoning_item)
-    elif message.channel == "commentary":
-        if recipient is not None and recipient.startswith("functions."):
-            function_name = recipient.split(".")[-1]
-            for content in message.content:
-                random_id = random_uuid()
-                response_item = ResponseFunctionToolCall(
-                    arguments=content.text,
-                    call_id=f"call_{random_id}",
-                    type="function_call",
-                    name=function_name,
-                    id=f"fc_{random_id}",
-                )
-                output_items.append(response_item)
-        elif recipient is not None and (
-            recipient.startswith("python")
-            or recipient.startswith("browser")
-            or recipient.startswith("container")
-        ):
-            for content in message.content:
-                reasoning_item = ResponseReasoningItem(
-                    id=f"rs_{random_uuid()}",
-                    summary=[],
-                    type="reasoning",
-                    content=[
-                        ResponseReasoningTextContent(
-                            text=content.text, type="reasoning_text"
-                        )
-                    ],
-                    status=None,
-                )
-                output_items.append(reasoning_item)
-        else:
-            raise ValueError(f"Unknown recipient: {recipient}")
-    elif message.channel == "final":
-        contents = []
-        for content in message.content:
-            output_text = ResponseOutputText(
-                text=content.text,
-                annotations=[],  # TODO
-                type="output_text",
-                logprobs=None,  # TODO
-            )
-            contents.append(output_text)
-        text_item = ResponseOutputMessage(
-            id=f"msg_{random_uuid()}",
-            content=contents,
-            role=message.author.role,
-            status="completed",
-            type="message",
+                msg_text = msg.content[0].text
+                
+                if msg.recipient and msg.recipient.startswith("functions."):
+                    if not msg.content_type or "json" in msg.content_type:
+                        func_name = msg.recipient.split("functions.")[1]
+                        tool_calls.append(_create_tool_call(func_name, msg_text))
+                elif msg.channel == "final":
+                    final_content = msg_text
+
+        if parser.current_content:
+            curr_text = parser.current_content
+            curr_channel = parser.current_channel
+            curr_recipient = parser.current_recipient
+
+            if (curr_channel == "commentary" 
+                and curr_recipient 
+                and curr_recipient.startswith("functions.")):
+                
+                func_name = curr_recipient.split("functions.")[1]
+                tool_calls.append(_create_tool_call(func_name, curr_text))
+            
+            elif curr_channel == "final":
+                if final_content:
+                    final_content += curr_text
+                else:
+                    final_content = curr_text
+
+        return ExtractedToolCallInformation(
+            tools_called=len(tool_calls) > 0,
+            tool_calls=tool_calls,
+            content=final_content,
         )
-        output_items.append(text_item)
-    else:
-        raise ValueError(f"Unknown channel: {message.channel}")
-    return output_items
 
-
-def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
-    if not parser.current_content:
-        return []
-    if parser.current_role != Role.ASSISTANT:
-        return []
-    current_recipient = parser.current_recipient
-    if current_recipient is not None and current_recipient.startswith("browser."):
-        return []
-
-    if parser.current_channel == "analysis":
-        reasoning_item = ResponseReasoningItem(
-            id=f"rs_{random_uuid()}",
-            summary=[],
-            type="reasoning",
-            content=[
-                ResponseReasoningTextContent(
-                    text=parser.current_content, type="reasoning_text"
-                )
-            ],
-            status=None,
-        )
-        return [reasoning_item]
-    elif parser.current_channel == "final":
-        output_text = ResponseOutputText(
-            text=parser.current_content,
-            annotations=[],  # TODO
-            type="output_text",
-            logprobs=None,  # TODO
-        )
-        text_item = ResponseOutputMessage(
-            id=f"msg_{random_uuid()}",
-            content=[output_text],
-            role="assistant",
-            # if the parser still has messages (ie if the generator got cut
-            # abruptly), this should be incomplete
-            status="incomplete",
-            type="message",
-        )
-        return [text_item]
-    return []
-
-
-def get_stop_tokens_for_assistant_actions() -> list[int]:
-    return get_encoding().stop_tokens_for_assistant_actions()
-
-
-def get_streamable_parser_for_assistant() -> StreamableParser:
-    return StreamableParser(get_encoding(), role=Role.ASSISTANT)
-
-
-def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
-    parser = get_streamable_parser_for_assistant()
-    for token_id in token_ids:
-        parser.process(token_id)
-    return parser
-
-
-def parse_chat_output(
-    token_ids: Sequence[int],
-) -> tuple[str | None, str | None, bool]:
-    parser = parse_output_into_messages(token_ids)
-    output_msgs = parser.messages
-    is_tool_call = False  # TODO: update this when tool call is supported
-    if len(output_msgs) == 0:
-        # The generation has stopped during reasoning.
-        reasoning = parser.current_content
-        final_content = None
-    elif len(output_msgs) == 1:
-        # The generation has stopped during final message.
-        reasoning = output_msgs[0].content[0].text
-        final_content = parser.current_content
-    else:
-        reasoning_msg = output_msgs[:-1]
-        final_msg = output_msgs[-1]
-        reasoning = "\n".join([msg.content[0].text for msg in reasoning_msg])
-        final_content = final_msg.content[0].text
-    return reasoning, final_content, is_tool_call
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> DeltaMessage | None:
+        raise NotImplementedError(
+            "Not being used, manual parsing in serving_chat.py"  # noqa: E501
+        )
\ No newline at end of file
diff --git a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
index d1b36a297e0b..045ae7e95693 100644
--- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
@@ -44,38 +44,55 @@ def extract_tool_calls(
         tool_calls = []
         final_content = None
 
+        def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
+            # Sanitize the function name to remove leaked tags (e.g. <|channel|>)
+            clean_name = function_name.split("<")[0].strip()
+            
+            try:
+                clean_args = json.dumps(json.loads(arguments))
+            except json.JSONDecodeError:
+                logger.debug("Partial or invalid JSON tool call detected.")
+                clean_args = arguments
+            
+            return ToolCall(
+                type="function",
+                function=FunctionCall(
+                    name=clean_name,
+                    arguments=clean_args,
+                ),
+            )
+
         if len(parser.messages) > 0:
             for msg in parser.messages:
                 if len(msg.content) < 1:
                     continue
                 msg_text = msg.content[0].text
+                
                 if msg.recipient and msg.recipient.startswith("functions."):
-                    # If no content-type is given assume JSON, as that's the
-                    # most common case with gpt-oss models.
                     if not msg.content_type or "json" in msg.content_type:
-                        # load and dump the JSON text to check validity and
-                        # remove any extra newlines or other odd formatting
-                        try:
-                            tool_args = json.dumps(json.loads(msg_text))
-                        except json.JSONDecodeError:
-                            logger.exception(
-                                "Error decoding JSON tool call from response."
-                            )
-                            tool_args = msg_text
-                    else:
-                        tool_args = msg_text
-                    tool_calls.append(
-                        ToolCall(
-                            type="function",
-                            function=FunctionCall(
-                                name=msg.recipient.split("functions.")[1],
-                                arguments=tool_args,
-                            ),
-                        )
-                    )
+                        func_name = msg.recipient.split("functions.")[1]
+                        tool_calls.append(_create_tool_call(func_name, msg_text))
                 elif msg.channel == "final":
                     final_content = msg_text
 
+        if parser.current_content:
+            curr_text = parser.current_content
+            curr_channel = parser.current_channel
+            curr_recipient = parser.current_recipient
+
+            if (curr_channel == "commentary" 
+                and curr_recipient 
+                and curr_recipient.startswith("functions.")):
+                
+                func_name = curr_recipient.split("functions.")[1]
+                tool_calls.append(_create_tool_call(func_name, curr_text))
+            
+            elif curr_channel == "final":
+                if final_content:
+                    final_content += curr_text
+                else:
+                    final_content = curr_text
+
         return ExtractedToolCallInformation(
             tools_called=len(tool_calls) > 0,
             tool_calls=tool_calls,
@@ -94,4 +111,4 @@ def extract_tool_calls_streaming(
     ) -> DeltaMessage | None:
         raise NotImplementedError(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
-        )
+        )
\ No newline at end of file

From 31af7b8a9e10cdfbda0b0b985f3ef8a13bc1238d Mon Sep 17 00:00:00 2001
From: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
Date: Sat, 22 Nov 2025 16:55:35 +0530
Subject: [PATCH 2/4] Fix: Improve tool call parsing for Harmony/GPT-OSS models

Signed-off-by: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
---
 vllm/entrypoints/harmony_utils.py | 630 +++++++++++++++++++++++++-----
 1 file changed, 538 insertions(+), 92 deletions(-)

diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 045ae7e95693..649a7396ad4c 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -1,114 +1,560 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import datetime
 import json
-from collections.abc import Sequence
-from typing import TYPE_CHECKING
+from collections.abc import Iterable, Sequence
+from typing import Literal
 
-from vllm.entrypoints.harmony_utils import parse_output_into_messages
-from vllm.entrypoints.openai.protocol import (
-    ChatCompletionRequest,
-    DeltaMessage,
-    ExtractedToolCallInformation,
-    FunctionCall,
-    ToolCall,
+from openai.types.responses import (
+    ResponseFunctionToolCall,
+    ResponseOutputItem,
+    ResponseOutputMessage,
+    ResponseOutputText,
+    ResponseReasoningItem,
+)
+from openai.types.responses.response_function_web_search import (
+    ActionFind,
+    ActionOpenPage,
+    ActionSearch,
+    ResponseFunctionWebSearch,
 )
-from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
-    ToolParser,
+from openai.types.responses.response_reasoning_item import (
+    Content as ResponseReasoningTextContent,
 )
-from vllm.logger import init_logger
+from openai.types.responses.tool import Tool
+from openai_harmony import (
+    Author,
+    ChannelConfig,
+    Conversation,
+    DeveloperContent,
+    HarmonyEncodingName,
+    Message,
+    ReasoningEffort,
+    Role,
+    StreamableParser,
+    SystemContent,
+    TextContent,
+    ToolDescription,
+    load_harmony_encoding,
+)
+from openai_harmony import Message as OpenAIHarmonyMessage
+from openai_harmony import Role as OpenAIHarmonyRole
+
+from vllm import envs
+from vllm.entrypoints.openai.protocol import (
+    ChatCompletionToolsParam,
+    ResponseInputOutputItem,
+    ResponsesRequest,
+)
+from vllm.utils import random_uuid
+
+REASONING_EFFORT = {
+    "high": ReasoningEffort.HIGH,
+    "medium": ReasoningEffort.MEDIUM,
+    "low": ReasoningEffort.LOW,
+}
+
+_harmony_encoding = None
 
-if TYPE_CHECKING:
-    from vllm.transformers_utils.tokenizer import AnyTokenizer
-else:
-    AnyTokenizer = object
+MCP_BUILTIN_TOOLS: set[str] = {
+    "web_search_preview",
+    "code_interpreter",
+    "container",
+}
+
+
+def has_custom_tools(tool_types: set[str]) -> bool:
+    return not tool_types.issubset(MCP_BUILTIN_TOOLS)
+
+
+def get_encoding():
+    global _harmony_encoding
+    if _harmony_encoding is None:
+        _harmony_encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+    return _harmony_encoding
+
+
+def get_system_message(
+    model_identity: str | None = None,
+    reasoning_effort: Literal["high", "medium", "low"] | None = None,
+    start_date: str | None = None,
+    browser_description: str | None = None,
+    python_description: str | None = None,
+    container_description: str | None = None,
+    instructions: str | None = None,
+    with_custom_tools: bool = False,
+) -> Message:
+    sys_msg_content = SystemContent.new()
+    if model_identity is not None:
+        sys_msg_content = sys_msg_content.with_model_identity(model_identity)
+    if instructions is not None and envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
+        current_identity = sys_msg_content.model_identity
+        new_identity = (
+            f"{current_identity}\n{instructions}" if current_identity else instructions
+        )
+        sys_msg_content = sys_msg_content.with_model_identity(new_identity)
+    if reasoning_effort is not None:
+        sys_msg_content = sys_msg_content.with_reasoning_effort(
+            REASONING_EFFORT[reasoning_effort]
+        )
+    if start_date is None:
+        start_date = datetime.datetime.now().strftime("%Y-%m-%d")
+    sys_msg_content = sys_msg_content.with_conversation_start_date(start_date)
+    if browser_description is not None:
+        sys_msg_content = sys_msg_content.with_tools(browser_description)
+    if python_description is not None:
+        sys_msg_content = sys_msg_content.with_tools(python_description)
+    if container_description is not None:
+        sys_msg_content = sys_msg_content.with_tools(container_description)
+    if not with_custom_tools:
+        channel_config = sys_msg_content.channel_config
+        invalid_channel = "commentary"
+        new_config = ChannelConfig.require_channels(
+            [c for c in channel_config.valid_channels if c != invalid_channel]
+        )
+        sys_msg_content = sys_msg_content.with_channel_config(new_config)
+    sys_msg = Message.from_role_and_content(Role.SYSTEM, sys_msg_content)
+    return sys_msg
 
-logger = init_logger(__name__)
 
+def create_tool_definition(tool: ChatCompletionToolsParam | Tool):
+    if isinstance(tool, ChatCompletionToolsParam):
+        return ToolDescription.new(
+            name=tool.function.name,
+            description=tool.function.description,
+            parameters=tool.function.parameters,
+        )
+    return ToolDescription.new(
+        name=tool.name,
+        description=tool.description,
+        parameters=tool.parameters,
+    )
 
-class OpenAIToolParser(ToolParser):
-    def __init__(self, tokenizer: "AnyTokenizer"):
-        super().__init__(tokenizer)
 
-    def extract_tool_calls(
-        self,
-        model_output: str,
-        request: ChatCompletionRequest,
-        token_ids: Sequence[int] | None = None,
-    ) -> ExtractedToolCallInformation:
-        if token_ids is None:
-            raise NotImplementedError(
-                "OpenAIToolParser requires token IDs and does not support text-based extraction."  # noqa: E501
+def get_developer_message(
+    instructions: str | None = None,
+    tools: list[Tool | ChatCompletionToolsParam] | None = None,
+) -> Message:
+    dev_msg_content = DeveloperContent.new()
+    if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
+        dev_msg_content = dev_msg_content.with_instructions(instructions)
+    if tools is not None:
+        function_tools: list[Tool | ChatCompletionToolsParam] = []
+        for tool in tools:
+            if tool.type in (
+                "web_search_preview",
+                "code_interpreter",
+                "container",
+                "mcp",
+            ):
+                pass
+            elif tool.type == "function":
+                function_tools.append(tool)
+            else:
+                raise ValueError(f"tool type {tool.type} not supported")
+        if function_tools:
+            function_tool_descriptions = [
+                create_tool_definition(tool) for tool in function_tools
+            ]
+            dev_msg_content = dev_msg_content.with_function_tools(
+                function_tool_descriptions
             )
+    dev_msg = Message.from_role_and_content(Role.DEVELOPER, dev_msg_content)
+    return dev_msg
 
-        parser = parse_output_into_messages(token_ids)
-        tool_calls = []
-        final_content = None
 
-        def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
-            # Sanitize the function name to remove leaked tags (e.g. <|channel|>)
-            clean_name = function_name.split("<")[0].strip()
-            
-            try:
-                clean_args = json.dumps(json.loads(arguments))
-            except json.JSONDecodeError:
-                logger.debug("Partial or invalid JSON tool call detected.")
-                clean_args = arguments
+def get_user_message(content: str) -> Message:
+    return Message.from_role_and_content(Role.USER, content)
+
+
+def parse_response_input(
+    response_msg: ResponseInputOutputItem,
+    prev_responses: list[ResponseOutputItem | ResponseReasoningItem],
+) -> Message:
+    if not isinstance(response_msg, dict):
+        response_msg = response_msg.model_dump()
+    if "type" not in response_msg or response_msg["type"] == "message":
+        role = response_msg["role"]
+        content = response_msg["content"]
+        if role == "system":
+            role = "developer"
+            text_prefix = "Instructions:\n"
+        else:
+            text_prefix = ""
+        if isinstance(content, str):
+            msg = Message.from_role_and_content(role, text_prefix + content)
+        else:
+            contents = [TextContent(text=text_prefix + c["text"]) for c in content]
+            msg = Message.from_role_and_contents(role, contents)
+        if role == "assistant":
+            msg = msg.with_channel("final")
+    elif response_msg["type"] == "function_call_output":
+        call_id = response_msg["call_id"]
+        call_response: ResponseFunctionToolCall | None = None
+        for prev_response in reversed(prev_responses):
+            if (
+                isinstance(prev_response, ResponseFunctionToolCall)
+                and prev_response.call_id == call_id
+            ):
+                call_response = prev_response
+                break
+        if call_response is None:
+            raise ValueError(f"No call message found for {call_id}")
+        msg = Message.from_author_and_content(
+            Author.new(Role.TOOL, f"functions.{call_response.name}"),
+            response_msg["output"],
+        )
+    elif response_msg["type"] == "reasoning":
+        content = response_msg["content"]
+        assert len(content) == 1
+        msg = Message.from_role_and_content(Role.ASSISTANT, content[0]["text"])
+    elif response_msg["type"] == "function_call":
+        msg = Message.from_role_and_content(Role.ASSISTANT, response_msg["arguments"])
+        msg = msg.with_channel("commentary")
+        msg = msg.with_recipient(f"functions.{response_msg['name']}")
+        msg = msg.with_content_type("json")
+    else:
+        raise ValueError(f"Unknown input type: {response_msg['type']}")
+    return msg
+
+
+def parse_input_to_harmony_message(chat_msg) -> list[Message]:
+    if not isinstance(chat_msg, dict):
+        chat_msg = chat_msg.model_dump(exclude_none=True)
+
+    role = chat_msg.get("role")
+    tool_calls = chat_msg.get("tool_calls")
+    if role == "assistant" and tool_calls:
+        msgs: list[Message] = []
+        for call in tool_calls:
+            func = call.get("function", {})
+            name = func.get("name", "")
+            arguments = func.get("arguments", "") or ""
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments)
             
-            return ToolCall(
-                type="function",
-                function=FunctionCall(
-                    name=clean_name,
-                    arguments=clean_args,
-                ),
+            msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
+            msg = msg.with_channel("commentary")
+            msg = msg.with_recipient(f"functions.{name}")
+            msg = msg.with_content_type("json")
+            msgs.append(msg)
+        return msgs
+
+    if role == "tool":
+        name = chat_msg.get("name", "")
+        content = chat_msg.get("content", "") or ""
+        if isinstance(content, list):
+            content = "".join(
+                item.get("text", "")
+                for item in content
+                if isinstance(item, dict) and item.get("type") == "text"
             )
 
-        if len(parser.messages) > 0:
-            for msg in parser.messages:
-                if len(msg.content) < 1:
+        msg = Message.from_author_and_content(
+            Author.new(Role.TOOL, f"functions.{name}"), content
+        ).with_channel("commentary")
+        return [msg]
+
+    content = chat_msg.get("content", "")
+    if isinstance(content, str):
+        contents = [TextContent(text=content)]
+    else:
+        contents = [TextContent(text=c.get("text", "")) for c in content]
+    msg = Message.from_role_and_contents(role, contents)
+    return [msg]
+
+
+def construct_harmony_previous_input_messages(
+    request: ResponsesRequest,
+) -> list[OpenAIHarmonyMessage]:
+    messages: list[OpenAIHarmonyMessage] = []
+    if request.previous_input_messages:
+        for message in request.previous_input_messages:
+            if isinstance(message, OpenAIHarmonyMessage):
+                message_role = message.author.role
+                if (
+                    message_role == OpenAIHarmonyRole.SYSTEM
+                    or message_role == OpenAIHarmonyRole.DEVELOPER
+                ):
                     continue
-                msg_text = msg.content[0].text
-                
-                if msg.recipient and msg.recipient.startswith("functions."):
-                    if not msg.content_type or "json" in msg.content_type:
-                        func_name = msg.recipient.split("functions.")[1]
-                        tool_calls.append(_create_tool_call(func_name, msg_text))
-                elif msg.channel == "final":
-                    final_content = msg_text
-
-        if parser.current_content:
-            curr_text = parser.current_content
-            curr_channel = parser.current_channel
-            curr_recipient = parser.current_recipient
-
-            if (curr_channel == "commentary" 
-                and curr_recipient 
-                and curr_recipient.startswith("functions.")):
+                messages.append(message)
+            else:
+                harmony_messages = parse_input_to_harmony_message(message)
+                for harmony_msg in harmony_messages:
+                    message_role = harmony_msg.author.role
+                    if (
+                        message_role == OpenAIHarmonyRole.SYSTEM
+                        or message_role == OpenAIHarmonyRole.DEVELOPER
+                    ):
+                        continue
+                    messages.append(harmony_msg)
+    return messages
+
+
+def render_for_completion(messages: list[Message]) -> list[int]:
+    conversation = Conversation.from_messages(messages)
+    token_ids = get_encoding().render_conversation_for_completion(
+        conversation, Role.ASSISTANT
+    )
+    return token_ids
+
+
+def parse_output_message(message: Message) -> list[ResponseOutputItem]:
+    if message.author.role != "assistant":
+        return []
+
+    output_items: list[ResponseOutputItem] = []
+    recipient = message.recipient
+    
+    if recipient is not None and recipient.startswith("browser."):
+        if len(message.content) != 1:
+            raise ValueError("Invalid number of contents in browser message")
+        content = message.content[0]
+        try:
+            browser_call = json.loads(content.text)
+        except json.JSONDecodeError:
+            json_retry_output_message = (
+                f"Invalid JSON args, caught and retried: {content.text}"
+            )
+            browser_call = {
+                "query": json_retry_output_message,
+                "url": json_retry_output_message,
+                "pattern": json_retry_output_message,
+            }
+        
+        if recipient == "browser.search":
+            action = ActionSearch(
+                query=f"cursor:{browser_call.get('query', '')}", type="search"
+            )
+        elif recipient == "browser.open":
+            action = ActionOpenPage(
+                url=f"cursor:{browser_call.get('url', '')}", type="open_page"
+            )
+        elif recipient == "browser.find":
+            action = ActionFind(
+                pattern=browser_call["pattern"],
+                url=f"cursor:{browser_call.get('url', '')}",
+                type="find",
+            )
+        else:
+            raise ValueError(f"Unknown browser action: {recipient}")
+        web_search_item = ResponseFunctionWebSearch(
+            id=f"ws_{random_uuid()}",
+            action=action,
+            status="completed",
+            type="web_search_call",
+        )
+        output_items.append(web_search_item)
+        
+    elif message.channel == "analysis":
+        for content in message.content:
+            reasoning_item = ResponseReasoningItem(
+                id=f"rs_{random_uuid()}",
+                summary=[],
+                type="reasoning",
+                content=[
+                    ResponseReasoningTextContent(
+                        text=content.text, type="reasoning_text"
+                    )
+                ],
+                status=None,
+            )
+            output_items.append(reasoning_item)
+            
+    elif message.channel == "commentary":
+        if recipient is not None and recipient.startswith("functions."):
+            # FIX: Strict name sanitization to remove leaked tags like <|channel|>
+            raw_name = recipient.split("functions.")[1]
+            function_name = raw_name.split("<")[0].strip()
+            
+            for content in message.content:
+                random_id = random_uuid()
+                response_item = ResponseFunctionToolCall(
+                    arguments=content.text,
+                    call_id=f"call_{random_id}",
+                    type="function_call",
+                    name=function_name,
+                    id=f"fc_{random_id}",
+                )
+                output_items.append(response_item)
                 
-                func_name = curr_recipient.split("functions.")[1]
-                tool_calls.append(_create_tool_call(func_name, curr_text))
+        elif recipient is not None and (
+            recipient.startswith("python")
+            or recipient.startswith("browser")
+            or recipient.startswith("container")
+        ):
+            for content in message.content:
+                reasoning_item = ResponseReasoningItem(
+                    id=f"rs_{random_uuid()}",
+                    summary=[],
+                    type="reasoning",
+                    content=[
+                        ResponseReasoningTextContent(
+                            text=content.text, type="reasoning_text"
+                        )
+                    ],
+                    status=None,
+                )
+                output_items.append(reasoning_item)
+        else:
+            raise ValueError(f"Unknown recipient: {recipient}")
+            
+    elif message.channel == "final":
+        contents = []
+        for content in message.content:
+            output_text = ResponseOutputText(
+                text=content.text,
+                annotations=[],  
+                type="output_text",
+                logprobs=None,
+            )
+            contents.append(output_text)
+        text_item = ResponseOutputMessage(
+            id=f"msg_{random_uuid()}",
+            content=contents,
+            role=message.author.role,
+            status="completed",
+            type="message",
+        )
+        output_items.append(text_item)
+    else:
+        raise ValueError(f"Unknown channel: {message.channel}")
+    return output_items
+
+
+def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
+    if not parser.current_content:
+        return []
+    if parser.current_role != Role.ASSISTANT:
+        return []
+    current_recipient = parser.current_recipient
+    if current_recipient is not None and current_recipient.startswith("browser."):
+        return []
+
+    if parser.current_channel == "analysis":
+        reasoning_item = ResponseReasoningItem(
+            id=f"rs_{random_uuid()}",
+            summary=[],
+            type="reasoning",
+            content=[
+                ResponseReasoningTextContent(
+                    text=parser.current_content, type="reasoning_text"
+                )
+            ],
+            status=None,
+        )
+        return [reasoning_item]
+        
+    elif parser.current_channel == "commentary":
+        if current_recipient is not None and current_recipient.startswith("functions."):
+            # FIX: Strict name sanitization here as well
+            raw_name = current_recipient.split("functions.")[1]
+            function_name = raw_name.split("<")[0].strip()
             
-            elif curr_channel == "final":
-                if final_content:
-                    final_content += curr_text
-                else:
-                    final_content = curr_text
-
-        return ExtractedToolCallInformation(
-            tools_called=len(tool_calls) > 0,
-            tool_calls=tool_calls,
-            content=final_content,
+            random_id = random_uuid()
+            response_item = ResponseFunctionToolCall(
+                arguments=parser.current_content,
+                call_id=f"call_{random_id}",
+                type="function_call",
+                name=function_name,
+                id=f"fc_{random_id}",
+            )
+            return [response_item]
+            
+    elif parser.current_channel == "final":
+        output_text = ResponseOutputText(
+            text=parser.current_content,
+            annotations=[],  
+            type="output_text",
+            logprobs=None, 
+        )
+        text_item = ResponseOutputMessage(
+            id=f"msg_{random_uuid()}",
+            content=[output_text],
+            role="assistant",
+            status="incomplete",
+            type="message",
         )
+        return [text_item]
+    return []
+
+
+def get_stop_tokens_for_assistant_actions() -> list[int]:
+    return get_encoding().stop_tokens_for_assistant_actions()
+
+
+def get_streamable_parser_for_assistant() -> StreamableParser:
+    return StreamableParser(get_encoding(), role=Role.ASSISTANT)
+
+
+def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
+    parser = get_streamable_parser_for_assistant()
+    
+    tokens = list(token_ids)
+    if not tokens:
+        return parser
+
+    encoding = get_encoding()
+    
+    # FIX: Use allowed_special="all" to avoid Tokenizer errors
+    start_token = encoding.encode("<|start|>", allowed_special="all")[0]
+    
+    if tokens[0] != start_token:
+        def get_id(text):
+            return encoding.encode(text, allowed_special="all")[0]
+
+        header_tokens = [
+            start_token,
+            get_id("assistant"),
+            get_id("<|channel|>"),
+            get_id("analysis"),
+            get_id("<|message|>")
+        ]
+        tokens = header_tokens + tokens
+
+    for token_id in tokens:
+        try:
+            parser.process(token_id)
+        except Exception:
+            break
+            
+    return parser
+
+
+def parse_chat_output(
+    token_ids: Sequence[int],
+) -> tuple[str | None, str | None, bool]:
+    parser = parse_output_into_messages(token_ids)
+    output_msgs = parser.messages
+    
+    reasoning_parts = []
+    final_content = None
+    is_tool_call = False
+    
+    for msg in output_msgs:
+        if msg.channel == "analysis":
+            for content in msg.content:
+                reasoning_parts.append(content.text)
+        elif msg.channel == "final":
+            for content in msg.content:
+                final_content = content.text
+        elif msg.channel == "commentary" and msg.recipient and msg.recipient.startswith("functions."):
+            is_tool_call = True
+            if not final_content:
+                final_content = ""
+            for content in msg.content:
+                 final_content = content.text
+
+    if parser.current_content:
+        if parser.current_channel == "analysis":
+             reasoning_parts.append(parser.current_content)
+        elif parser.current_channel == "final":
+             final_content = parser.current_content
+        elif parser.current_channel == "commentary" and parser.current_recipient and parser.current_recipient.startswith("functions."):
+             is_tool_call = True
+             final_content = parser.current_content
+
+    reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
 
-    def extract_tool_calls_streaming(
-        self,
-        previous_text: str,
-        current_text: str,
-        delta_text: str,
-        previous_token_ids: Sequence[int],
-        current_token_ids: Sequence[int],
-        delta_token_ids: Sequence[int],
-        request: ChatCompletionRequest,
-    ) -> DeltaMessage | None:
-        raise NotImplementedError(
-            "Not being used, manual parsing in serving_chat.py"  # noqa: E501
-        )
\ No newline at end of file
+    return reasoning, final_content, is_tool_call
\ No newline at end of file

From 8d94e30313ed854a9ccb2a0cee155e3811af7716 Mon Sep 17 00:00:00 2001
From: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
Date: Sat, 22 Nov 2025 17:03:48 +0530
Subject: [PATCH 3/4] DCO Check headder

Signed-off-by: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
---
 vllm/entrypoints/harmony_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 649a7396ad4c..899b8f6de150 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -497,7 +497,7 @@ def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
 
     encoding = get_encoding()
     
-    # FIX: Use allowed_special="all" to avoid Tokenizer errors
+    # FIX: Use allowed_special="all" to avoid Tokenizer error
     start_token = encoding.encode("<|start|>", allowed_special="all")[0]
     
     if tokens[0] != start_token:

From fb93ad8a080247f9740650329b2ad07704883b21 Mon Sep 17 00:00:00 2001
From: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
Date: Sat, 22 Nov 2025 21:12:30 +0530
Subject: [PATCH 4/4] Fix ruff formatting and linting errors

Signed-off-by: ShaikAbdulHafeez03 <shaik.abdulhafeez323@gmail.com>
---
 vllm/entrypoints/harmony_utils.py             | 67 +++++++++++--------
 .../openai/tool_parsers/openai_tool_parser.py | 19 +++---
 2 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py
index 899b8f6de150..fc29cc4c97aa 100644
--- a/vllm/entrypoints/harmony_utils.py
+++ b/vllm/entrypoints/harmony_utils.py
@@ -233,7 +233,7 @@ def parse_input_to_harmony_message(chat_msg) -> list[Message]:
             arguments = func.get("arguments", "") or ""
             if isinstance(arguments, dict):
                 arguments = json.dumps(arguments)
-            
+
             msg = Message.from_role_and_content(Role.ASSISTANT, arguments)
             msg = msg.with_channel("commentary")
             msg = msg.with_recipient(f"functions.{name}")
@@ -306,7 +306,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
 
     output_items: list[ResponseOutputItem] = []
     recipient = message.recipient
-    
+
     if recipient is not None and recipient.startswith("browser."):
         if len(message.content) != 1:
             raise ValueError("Invalid number of contents in browser message")
@@ -322,7 +322,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
                 "url": json_retry_output_message,
                 "pattern": json_retry_output_message,
             }
-        
+
         if recipient == "browser.search":
             action = ActionSearch(
                 query=f"cursor:{browser_call.get('query', '')}", type="search"
@@ -346,7 +346,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
             type="web_search_call",
         )
         output_items.append(web_search_item)
-        
+
     elif message.channel == "analysis":
         for content in message.content:
             reasoning_item = ResponseReasoningItem(
@@ -361,13 +361,13 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
                 status=None,
             )
             output_items.append(reasoning_item)
-            
+
     elif message.channel == "commentary":
         if recipient is not None and recipient.startswith("functions."):
             # FIX: Strict name sanitization to remove leaked tags like <|channel|>
             raw_name = recipient.split("functions.")[1]
             function_name = raw_name.split("<")[0].strip()
-            
+
             for content in message.content:
                 random_id = random_uuid()
                 response_item = ResponseFunctionToolCall(
@@ -378,7 +378,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
                     id=f"fc_{random_id}",
                 )
                 output_items.append(response_item)
-                
+
         elif recipient is not None and (
             recipient.startswith("python")
             or recipient.startswith("browser")
@@ -399,13 +399,13 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
                 output_items.append(reasoning_item)
         else:
             raise ValueError(f"Unknown recipient: {recipient}")
-            
+
     elif message.channel == "final":
         contents = []
         for content in message.content:
             output_text = ResponseOutputText(
                 text=content.text,
-                annotations=[],  
+                annotations=[],
                 type="output_text",
                 logprobs=None,
             )
@@ -445,13 +445,13 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
             status=None,
         )
         return [reasoning_item]
-        
+
     elif parser.current_channel == "commentary":
         if current_recipient is not None and current_recipient.startswith("functions."):
             # FIX: Strict name sanitization here as well
             raw_name = current_recipient.split("functions.")[1]
             function_name = raw_name.split("<")[0].strip()
-            
+
             random_id = random_uuid()
             response_item = ResponseFunctionToolCall(
                 arguments=parser.current_content,
@@ -461,13 +461,13 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
                 id=f"fc_{random_id}",
             )
             return [response_item]
-            
+
     elif parser.current_channel == "final":
         output_text = ResponseOutputText(
             text=parser.current_content,
-            annotations=[],  
+            annotations=[],
             type="output_text",
-            logprobs=None, 
+            logprobs=None,
         )
         text_item = ResponseOutputMessage(
             id=f"msg_{random_uuid()}",
@@ -490,17 +490,18 @@ def get_streamable_parser_for_assistant() -> StreamableParser:
 
 def parse_output_into_messages(token_ids: Iterable[int]) -> StreamableParser:
     parser = get_streamable_parser_for_assistant()
-    
+
     tokens = list(token_ids)
     if not tokens:
         return parser
 
     encoding = get_encoding()
-    
+
     # FIX: Use allowed_special="all" to avoid Tokenizer error
     start_token = encoding.encode("<|start|>", allowed_special="all")[0]
-    
+
     if tokens[0] != start_token:
+
         def get_id(text):
             return encoding.encode(text, allowed_special="all")[0]
 
@@ -509,7 +510,7 @@ def get_id(text):
             get_id("assistant"),
             get_id("<|channel|>"),
             get_id("analysis"),
-            get_id("<|message|>")
+            get_id("<|message|>"),
         ]
         tokens = header_tokens + tokens
 
@@ -518,7 +519,7 @@ def get_id(text):
             parser.process(token_id)
         except Exception:
             break
-            
+
     return parser
 
 
@@ -527,11 +528,11 @@ def parse_chat_output(
 ) -> tuple[str | None, str | None, bool]:
     parser = parse_output_into_messages(token_ids)
     output_msgs = parser.messages
-    
+
     reasoning_parts = []
     final_content = None
     is_tool_call = False
-    
+
     for msg in output_msgs:
         if msg.channel == "analysis":
             for content in msg.content:
@@ -539,22 +540,30 @@ def parse_chat_output(
         elif msg.channel == "final":
             for content in msg.content:
                 final_content = content.text
-        elif msg.channel == "commentary" and msg.recipient and msg.recipient.startswith("functions."):
+        elif (
+            msg.channel == "commentary"
+            and msg.recipient
+            and msg.recipient.startswith("functions.")
+        ):
             is_tool_call = True
             if not final_content:
                 final_content = ""
             for content in msg.content:
-                 final_content = content.text
+                final_content = content.text
 
     if parser.current_content:
         if parser.current_channel == "analysis":
-             reasoning_parts.append(parser.current_content)
+            reasoning_parts.append(parser.current_content)
         elif parser.current_channel == "final":
-             final_content = parser.current_content
-        elif parser.current_channel == "commentary" and parser.current_recipient and parser.current_recipient.startswith("functions."):
-             is_tool_call = True
-             final_content = parser.current_content
+            final_content = parser.current_content
+        elif (
+            parser.current_channel == "commentary"
+            and parser.current_recipient
+            and parser.current_recipient.startswith("functions.")
+        ):
+            is_tool_call = True
+            final_content = parser.current_content
 
     reasoning = "\n".join(reasoning_parts) if reasoning_parts else None
 
-    return reasoning, final_content, is_tool_call
\ No newline at end of file
+    return reasoning, final_content, is_tool_call
diff --git a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
index 045ae7e95693..69727b1a68a0 100644
--- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py
@@ -47,13 +47,13 @@ def extract_tool_calls(
         def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
             # Sanitize the function name to remove leaked tags (e.g. <|channel|>)
             clean_name = function_name.split("<")[0].strip()
-            
+
             try:
                 clean_args = json.dumps(json.loads(arguments))
             except json.JSONDecodeError:
                 logger.debug("Partial or invalid JSON tool call detected.")
                 clean_args = arguments
-            
+
             return ToolCall(
                 type="function",
                 function=FunctionCall(
@@ -67,7 +67,7 @@ def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
                 if len(msg.content) < 1:
                     continue
                 msg_text = msg.content[0].text
-                
+
                 if msg.recipient and msg.recipient.startswith("functions."):
                     if not msg.content_type or "json" in msg.content_type:
                         func_name = msg.recipient.split("functions.")[1]
@@ -80,13 +80,14 @@ def _create_tool_call(function_name: str, arguments: str) -> ToolCall:
             curr_channel = parser.current_channel
             curr_recipient = parser.current_recipient
 
-            if (curr_channel == "commentary" 
-                and curr_recipient 
-                and curr_recipient.startswith("functions.")):
-                
+            if (
+                curr_channel == "commentary"
+                and curr_recipient
+                and curr_recipient.startswith("functions.")
+            ):
                 func_name = curr_recipient.split("functions.")[1]
                 tool_calls.append(_create_tool_call(func_name, curr_text))
-            
+
             elif curr_channel == "final":
                 if final_content:
                     final_content += curr_text
@@ -111,4 +112,4 @@ def extract_tool_calls_streaming(
     ) -> DeltaMessage | None:
         raise NotImplementedError(
             "Not being used, manual parsing in serving_chat.py"  # noqa: E501
-        )
\ No newline at end of file
+        )