Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,19 @@ def _make_request_with_harmony(

return messages, [prompt_token_ids], [engine_prompt]

async def _initialize_tool_sessions(self, request: ResponsesRequest,
context: ConversationContext,
exit_stack: AsyncExitStack):
# we should only initialize the tool session if the request needs tools
if len(request.tools) == 0:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i believe @Hanchenli also mentioned this issue. can we also cover this in unit test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me know if you have any thoughts @Hanchenli ?

I added a UT, should be ready for review

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change looks good to me. The issue I mentioned was that the model might generate function_call requests even if we do not provide tools to them.

return
mcp_tools = {
tool.server_label: tool
for tool in request.tools if tool.type == "mcp"
}
await context.init_tool_sessions(self.tool_server, exit_stack,
request.request_id, mcp_tools)

async def responses_full_generator(
self,
request: ResponsesRequest,
Expand All @@ -458,12 +471,8 @@ async def responses_full_generator(

async with AsyncExitStack() as exit_stack:
try:
mcp_tools = {
tool.server_label: tool
for tool in request.tools if tool.type == "mcp"
}
await context.init_tool_sessions(self.tool_server, exit_stack,
request.request_id, mcp_tools)
await self._initialize_tool_sessions(request, context,
exit_stack)
async for _ in result_generator:
pass
except asyncio.CancelledError:
Expand Down Expand Up @@ -1647,12 +1656,10 @@ def _increment_sequence_number_and_return(
async with AsyncExitStack() as exit_stack:
processer = None
if self.use_harmony:
mcp_tools = {
tool.server_label: tool
for tool in request.tools if tool.type == "mcp"
}
await context.init_tool_sessions(self.tool_server, exit_stack,
request.request_id, mcp_tools)
# TODO: in streaming, we noticed this bug:
# https://github.com/vllm-project/vllm/issues/25697
await self._initialize_tool_sessions(request, context,
exit_stack)
processer = self._process_harmony_streaming_events
else:
processer = self._process_simple_streaming_events
Expand Down