Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions tests/entrypoints/openai/test_response_api_with_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import pytest_asyncio
import requests
from openai import BadRequestError, NotFoundError, OpenAI
from openai_harmony import (
Message,
)

from ...utils import RemoteOpenAIServer

Expand Down Expand Up @@ -326,6 +329,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
],
stream=True,
background=background,
extra_body={"enable_response_messages": True},
)

current_item_id = ""
Expand All @@ -334,6 +338,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
events = []
current_event_mode = None
resp_id = None
checked_response_completed = False
async for event in response:
if event.type == "response.created":
resp_id = event.response.id
Expand All @@ -346,6 +351,16 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
]:
assert "input_messages" in event.response.model_extra
assert "output_messages" in event.response.model_extra
if event.type == "response.completed":
# make sure the serialization of content works
for msg in event.response.model_extra["output_messages"]:
# make sure we can convert the messages back into harmony
Message.from_dict(msg)

for msg in event.response.model_extra["input_messages"]:
# make sure we can convert the messages back into harmony
Message.from_dict(msg)
checked_response_completed = True

if current_event_mode != event.type:
current_event_mode = event.type
Expand Down Expand Up @@ -390,6 +405,7 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
assert len(events) > 0
response_completed_event = events[-1]
assert len(response_completed_event.response.output) > 0
assert checked_response_completed

if background:
starting_after = 5
Expand Down
49 changes: 44 additions & 5 deletions vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
Field,
TypeAdapter,
ValidationInfo,
field_serializer,
field_validator,
model_validator,
)
Expand Down Expand Up @@ -2078,11 +2079,6 @@ class ResponsesResponse(OpenAIBaseModel):
model: str
object: Literal["response"] = "response"
output: list[ResponseOutputItem]
# These are populated when enable_response_messages is set to True
# TODO: Currently an issue where content of harmony messages
# is not available when these are serialized. Metadata is available
input_messages: Optional[list[ChatCompletionMessageParam]] = None
output_messages: Optional[list[ChatCompletionMessageParam]] = None
parallel_tool_calls: bool
temperature: float
tool_choice: ToolChoice
Expand All @@ -2102,6 +2098,49 @@ class ResponsesResponse(OpenAIBaseModel):
usage: Optional[ResponseUsage] = None
user: Optional[str] = None

# --8<-- [start:responses-extra-params]
# These are populated when enable_response_messages is set to True
# NOTE: custom serialization is needed
# see serialize_input_messages and serialize_output_messages
input_messages: Optional[list[ChatCompletionMessageParam]] = None
output_messages: Optional[list[ChatCompletionMessageParam]] = None
# --8<-- [end:responses-extra-params]

# NOTE: openAI harmony doesn't serialize TextContent properly,
# TODO: this fixes for TextContent, but need to verify for tools etc
# https://github.com/openai/harmony/issues/78
@field_serializer("output_messages", when_used="json")
def serialize_output_messages(self, msgs, _info):
if msgs:
serialized = []
for m in msgs:
if isinstance(m, dict):
serialized.append(m)
elif hasattr(m, "__dict__"):
serialized.append(m.to_dict())
else:
# fallback to pyandic dump
serialized.append(m.model_dump_json())
return serialized
return None

# NOTE: openAI harmony doesn't serialize TextContent properly, this fixes it
# https://github.com/openai/harmony/issues/78
@field_serializer("input_messages", when_used="json")
def serialize_input_messages(self, msgs, _info):
if msgs:
serialized = []
for m in msgs:
if isinstance(m, dict):
serialized.append(m)
elif hasattr(m, "__dict__"):
serialized.append(m.to_dict())
else:
# fallback to pyandic dump
serialized.append(m.model_dump_json())
return serialized
Comment on lines +2132 to +2141
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems we could consolidate majority of the code (e.g. message to serialized_message mapping) to a separate function? It could allow us to

  1. replace list.append with a simple list comprehensive (with better readability and better performance)
  2. better individual unit tests.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to refactor it a bit in #26620

return None

@classmethod
def from_request(
cls,
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,6 @@ async def empty_async_generator():
ResponseCompletedEvent(
type="response.completed",
sequence_number=-1,
response=final_response.model_dump(),
response=final_response,
)
)