Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions tests/entrypoints/openai/test_chat_with_tool_reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,84 @@ async def test_chat_full_of_tool_and_reasoning(client: openai.AsyncOpenAI):
assert len(tool_calls.choices[0].message.reasoning) > 0
assert tool_calls.choices[0].message.tool_calls[0].function.name == FUNC_NAME
assert tool_calls.choices[0].message.tool_calls[0].function.arguments == FUNC_ARGS


# test that content does not leak into final chunk when finish_reason=tool_calls
@pytest.mark.asyncio
async def test_no_content_leak_when_finish_reason_tool_calls(
client: openai.AsyncOpenAI,
):
"""
Test that when finish_reason='tool_calls', the final chunk does not
contain any content field. This prevents reasoning_content from leaking
into content, which violates OpenAI's schema contract.

This test specifically targets the bug where leftover reasoning buffers
(especially from speculative decoding) were incorrectly flushed into
the content field in the final streamed chunk.
"""
stream = await client.chat.completions.create(
model=MODEL_NAME,
messages=MESSAGES,
tools=TOOLS,
temperature=0.0,
stream=True,
tool_choice="auto",
include_reasoning=True,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got an unexpected keyword argument 'include_reasoning'

)

chunks = []
final_chunk = None
async for chunk in stream:
chunks.append(chunk)
# Track the final chunk with finish_reason
if chunk.choices and chunk.choices[0].finish_reason:
final_chunk = chunk

# Ensure we got a final chunk with tool_calls
assert final_chunk is not None, "Expected a final chunk with finish_reason"
assert final_chunk.choices[0].finish_reason == "tool_calls", (
"Expected finish_reason to be 'tool_calls'"
)

delta = final_chunk.choices[0].delta

# Per OpenAI spec, when finish_reason='tool_calls', content must be null/absent
# This is the core fix: prevent reasoning_content from leaking into content
assert delta.content is None or delta.content == "", (
f"Final chunk with finish_reason='tool_calls' must not have content. "
f"Got content='{delta.content}'. This indicates reasoning_content leaked "
f"into content field."
)

# Also ensure reasoning fields are not present in final chunk
# (they should only appear in earlier chunks)
reasoning = getattr(delta, "reasoning", None)
reasoning_content = getattr(delta, "reasoning_content", None)
assert reasoning is None or reasoning == "", (
"Final chunk with tool_calls should not have reasoning field"
)
assert reasoning_content is None or reasoning_content == "", (
"Final chunk with tool_calls should not have reasoning_content field"
)

# Verify tool_calls are present (the expected behavior)
assert delta.tool_calls is not None and len(delta.tool_calls) > 0, (
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AssertionError: Final chunk with finish_reason='tool_calls' must have tool_calls

I ran this test on your branch and got the error shown above.

"Final chunk with finish_reason='tool_calls' must have tool_calls"
)

# Verify reasoning was streamed in earlier chunks (not in final)
reasoning_found_in_earlier_chunks = False
for chunk in chunks[:-1]: # All chunks except the final one
if chunk.choices:
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning") and delta.reasoning:
reasoning_found_in_earlier_chunks = True
break
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
reasoning_found_in_earlier_chunks = True
break

assert reasoning_found_in_earlier_chunks, (
"Reasoning should be streamed in earlier chunks, not in final chunk"
)
35 changes: 35 additions & 0 deletions vllm/entrypoints/openai/chat_completion/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,13 @@ async def chat_completion_stream_generator(
)
)
harmony_tools_streamed[i] |= tools_streamed_flag
# Ensure no content leaks when tool calls are present
# in harmony path. Per OpenAI spec, tool call deltas
# must not contain content.
if tools_streamed_flag and delta_message:
delta_message.content = None
delta_message.reasoning = None
delta_message.reasoning_content = None
# handle streaming deltas for tools with named tool_choice
elif tool_choice_function_name:
if (
Expand Down Expand Up @@ -980,6 +987,7 @@ async def chat_completion_stream_generator(
]
)
tools_streamed[i] = True
self._clear_non_tool_call_fields(delta_message)

elif request.tool_choice == "required":
assert previous_texts is not None
Expand Down Expand Up @@ -1036,6 +1044,7 @@ async def chat_completion_stream_generator(
):
history_tool_call_cnt += 1
tools_streamed[i] = True
self._clear_non_tool_call_fields(delta_message)

# handle streaming deltas for tools with "auto" tool choice
# and reasoning parser
Expand Down Expand Up @@ -1111,6 +1120,7 @@ async def chat_completion_stream_generator(
)
if delta_message and delta_message.tool_calls:
tools_streamed[i] = True
self._clear_non_tool_call_fields(delta_message)
# when only tool calls
elif tool_choice_auto:
assert tool_parser is not None
Expand All @@ -1125,6 +1135,7 @@ async def chat_completion_stream_generator(
)
if delta_message and delta_message.tool_calls:
tools_streamed[i] = True
self._clear_non_tool_call_fields(delta_message)

# when only reasoning
elif self.reasoning_parser:
Expand Down Expand Up @@ -1290,6 +1301,18 @@ async def chat_completion_stream_generator(
finish_reason_ = (
output.finish_reason if output.finish_reason else "stop"
)

# When finish_reason is "tool_calls", ensure no content
# or reasoning fields leak into the final delta.
# Per OpenAI spec, tool call responses must only contain
# tool_calls and finish_reason, never content.
if finish_reason_ == "tool_calls":
if delta_message is None:
# Create empty delta message if none exists
delta_message = DeltaMessage()
else:
self._clear_non_tool_call_fields(delta_message)

choice_data = ChatCompletionResponseStreamChoice(
index=i,
delta=delta_message,
Expand Down Expand Up @@ -1824,6 +1847,18 @@ def _create_chat_logprobs(

return ChatCompletionLogProbs(content=logprobs_content)

def _clear_non_tool_call_fields(self, delta_message: DeltaMessage) -> None:
"""
Clear content and reasoning fields from a delta message.

Per OpenAI spec, tool call deltas must not contain content or reasoning
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per OpenAI spec, tool call deltas must not contain content or reasoning

Could you share the link? I couldn’t find any similar documentation or explanation.

fields. This prevents leakage from reasoning buffers or speculative
decoding when tool calls are present.
"""
delta_message.content = None
delta_message.reasoning = None
delta_message.reasoning_content = None

def _should_stream_with_auto_tool_parsing(self, request: ChatCompletionRequest):
"""
Utility function to check if streamed tokens should go through the tool
Expand Down