NVIDIA
diff --git a/‎tensorrt_llm/serve/chat_utils.py‎
Lines changed: 31 additions & 0 deletions b/‎tensorrt_llm/serve/chat_utils.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎tensorrt_llm/serve/openai_protocol.py‎
Lines changed: 6 additions & 0 deletions b/‎tensorrt_llm/serve/openai_protocol.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tensorrt_llm/serve/openai_server.py‎
Lines changed: 4 additions & 0 deletions b/‎tensorrt_llm/serve/openai_server.py‎
Lines changed: 4 additions & 0 deletions
@@ -1,3 +1,4 @@
+import json
 import uuid
 from functools import partial
 from typing import (Any, Callable, Coroutine, Dict, Iterable, List, Literal,
@@ -185,6 +186,36 @@ def parse_chat_message_content(
         content,
         mm_data_tracker,
     )
+    if role == "assistant":
+        result.update(_parse_assistant_message_content(message))
+    elif role == "tool":
+        result.update(_parse_tool_message_content(message))
+    return result
+
+
+# Adapted from: https://github.com/vllm-project/vllm/blob/4574d48bab9c4e38b7c0a830eeefc8f0980e8c58/vllm/entrypoints/chat_utils.py#L1406
+def _parse_assistant_message_content(message: Dict[str, Any]) -> Dict[str, Any]:
+    result = {}
+    tool_calls = message.get("tool_calls")
+    if tool_calls is not None:
+        result["tool_calls"] = []
+        for item in tool_calls:
+            if content := item["function"].get("arguments"):
+                if isinstance(content, str):
+                    item["function"]["arguments"] = json.loads(content)
+                else:
+                    item["function"]["arguments"] = content
+            else:
+                item["function"]["arguments"] = {}
+            result["tool_calls"].append(item)
+
+    return result
+
+
+def _parse_tool_message_content(message: Dict[str, Any]) -> Dict[str, Any]:
+    result = {}
+    if "tool_call_id" in message:
+        result["tool_call_id"] = message["tool_call_id"]
     return result
 
 
 
@@ -396,6 +396,12 @@ class CustomChatCompletionContentPartParam(TypedDict, total=False):
 
 class CustomChatCompletionMessageParam(TypedDict, total=False):
     """Enables custom roles in the Chat Completion API."""
+
+    # This is so custom fields not in any of the `ChatCompletionMessage<XYZ>Param` defined by OpenAI
+    # are still allowed.
+    # Examples include: assistant messages with `reasoning` / `reasoning_content`.
+    __pydantic_config__ = ConfigDict(extra="allow")  # type: ignore
+
     role: Required[str]
     """The role of the message's author."""
 
 
@@ -515,6 +515,10 @@ async def create_chat_response(
                     chat_template=request.chat_template,
                     chat_template_kwargs=request.chat_template_kwargs or {},
                 )
+            logger.debug(
+                "Rendered chat template:\n"
+                f"{prompt!r}"
+            )
             prompt = prompt_inputs(prompt)
 
             mm_data = await mm_coroutines