Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tests/entrypoints/openai/test_response_api_with_harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def server(monkeypatch_module: pytest.MonkeyPatch):

with monkeypatch_module.context() as m:
m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1")
m.setenv("VLLM_RESPONSES_API_ENABLE_HARMONY_MESSAGES_OUTPUT", "1")
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
yield remote_server

Expand All @@ -48,6 +49,11 @@ async def test_basic(client: OpenAI, model_name: str):
assert response is not None
print("response: ", response)
assert response.status == "completed"
# For now, just validate that input and output harmony messages exist
# To validate that the env flag enables them to be passed through properly
# Actual unit tests will need to be implemented on a model specific basis
assert len(response.input_harmony_messages) != 0
assert len(response.output_harmony_messages) != 0


@pytest.mark.asyncio
Expand Down
1 change: 1 addition & 0 deletions vllm/entrypoints/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(
available_tools: list[str],
):
self._messages = messages
self.input_messages = messages
self.available_tools = available_tools
self._tool_sessions: dict[str, Union[ClientSession, Tool]] = {}

Expand Down
11 changes: 10 additions & 1 deletion vllm/entrypoints/openai/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from openai.types.responses.response import ToolChoice
from openai.types.responses.tool import Tool
from openai.types.shared import Metadata, Reasoning
from openai_harmony import Message
from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
ValidationInfo, field_validator, model_validator)
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -1847,6 +1848,10 @@ class ResponseUsage(OpenAIBaseModel):
class ResponsesResponse(OpenAIBaseModel):
id: str = Field(default_factory=lambda: f"resp_{random_uuid()}")
created_at: int = Field(default_factory=lambda: int(time.time()))
# These are populated when the env flag
# VLLM_RESPONSES_API_ENABLE_HARMONY_MESSAGES_OUTPUT is set
input_harmony_messages: Optional[list[Message]] = None
output_harmony_messages: Optional[list[Message]] = None
# error: Optional[ResponseError] = None
# incomplete_details: Optional[IncompleteDetails] = None
instructions: Optional[str] = None
Expand Down Expand Up @@ -1882,12 +1887,16 @@ def from_request(
created_time: int,
output: list[ResponseOutputItem],
status: ResponseStatus,
input_harmony_messages: Optional[list[Message]] = None,
output_harmony_messages: Optional[list[Message]] = None,
usage: Optional[ResponseUsage] = None,
) -> "ResponsesResponse":
return cls(
id=request.request_id,
created_at=created_time,
instructions=request.instructions,
input_harmony_messages=input_harmony_messages,
output_harmony_messages=output_harmony_messages,
metadata=request.metadata,
model=model_name,
output=output,
Expand Down Expand Up @@ -2179,7 +2188,7 @@ class TranscriptionRequest(OpenAIBaseModel):
to_language: Optional[str] = None
"""The language of the output audio we transcribe to.

Please note that this is not currently used by supported models at this
Please note that this is not currently used by supported models at this
time, but it is a placeholder for future use, matching translation api.
"""

Expand Down
10 changes: 10 additions & 0 deletions vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,17 @@ async def responses_full_generator(
# TODO: Use a vllm-specific Validation Error
return self.create_error_response(str(e))

output_harmony_messages = None
input_harmony_messages = None
if self.use_harmony:
assert isinstance(context, HarmonyContext)
output = self._make_response_output_items_with_harmony(context)
if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL:
# TODO: Handle leftover parser state?
input_harmony_messages = context.input_messages
# .messages contains input and output, so just get the output
output_harmony_messages = context.messages[
len(input_harmony_messages):]
# TODO: these are all 0 for now!
num_prompt_tokens = context.num_prompt_tokens
num_generated_tokens = context.num_output_tokens
Expand Down Expand Up @@ -479,6 +487,8 @@ async def responses_full_generator(
model_name=model_name,
created_time=created_time,
output=output,
input_harmony_messages=input_harmony_messages,
output_harmony_messages=output_harmony_messages,
status="completed",
usage=usage,
)
Expand Down
6 changes: 6 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,12 @@ def get_vllm_port() -> Optional[int]:
"VLLM_TUNED_CONFIG_FOLDER":
lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None),

# Whether to enable outputting Harmony messages on the
# Responses API response object
"VLLM_RESPONSES_API_ENABLE_HARMONY_MESSAGES_OUTPUT":
lambda: bool(int(
os.getenv("VLLM_RESPONSES_API_ENABLE_HARMONY_MESSAGES_OUTPUT",
"0"))),
}

# --8<-- [end:env-vars-definition]
Expand Down