Skip to content

Commit 4a62415

Browse files
qandrewrtourgeman
authored andcommitted
[gpt-oss][1/N] EZ: refactor serving_responses for modularity (vllm-project#26948)
Signed-off-by: Andrew Xia <[email protected]>
1 parent 450a57d commit 4a62415

File tree

1 file changed

+70
-54
lines changed

1 file changed

+70
-54
lines changed

vllm/entrypoints/openai/serving_responses.py

Lines changed: 70 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,29 @@ def _validate_generator_input(
227227
)
228228
return None
229229

230+
def _validate_create_responses_input(
231+
self, request: ResponsesRequest
232+
) -> ErrorResponse | None:
233+
if self.use_harmony and request.is_include_output_logprobs():
234+
return self.create_error_response(
235+
err_type="invalid_request_error",
236+
message="logprobs are not supported with gpt-oss models",
237+
status_code=HTTPStatus.BAD_REQUEST,
238+
)
239+
if request.store and not self.enable_store and request.background:
240+
return self.create_error_response(
241+
err_type="invalid_request_error",
242+
message=(
243+
"This vLLM engine does not support `store=True` and "
244+
"therefore does not support the background mode. To "
245+
"enable these features, set the environment variable "
246+
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
247+
"the vLLM server."
248+
),
249+
status_code=HTTPStatus.BAD_REQUEST,
250+
)
251+
return None
252+
230253
async def create_responses(
231254
self,
232255
request: ResponsesRequest,
@@ -240,6 +263,9 @@ async def create_responses(
240263
if error_check_ret is not None:
241264
logger.error("Error with model %s", error_check_ret)
242265
return error_check_ret
266+
maybe_validation_error = self._validate_create_responses_input(request)
267+
if maybe_validation_error is not None:
268+
return maybe_validation_error
243269

244270
# If the engine is dead, raise the engine's DEAD_ERROR.
245271
# This is required for the streaming case, where we return a
@@ -248,31 +274,13 @@ async def create_responses(
248274
raise self.engine_client.dead_error
249275

250276
if request.store and not self.enable_store:
251-
if request.background:
252-
return self.create_error_response(
253-
err_type="invalid_request_error",
254-
message=(
255-
"This vLLM engine does not support `store=True` and "
256-
"therefore does not support the background mode. To "
257-
"enable these features, set the environment variable "
258-
"`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
259-
"the vLLM server."
260-
),
261-
status_code=HTTPStatus.BAD_REQUEST,
262-
)
263277
# Disable the store option.
264278
# NOTE(woosuk): Although returning an error is possible, we opted
265279
# to implicitly disable store and process the request anyway, as
266280
# we assume most users do not intend to actually store the response
267281
# (i.e., their request's `store=True` just because it's the default
268282
# value).
269283
request.store = False
270-
if self.use_harmony and request.is_include_output_logprobs():
271-
return self.create_error_response(
272-
err_type="invalid_request_error",
273-
message="logprobs are not supported with gpt-oss models",
274-
status_code=HTTPStatus.BAD_REQUEST,
275-
)
276284

277285
# Handle the previous response ID.
278286
prev_response_id = request.previous_response_id
@@ -849,6 +857,47 @@ def _construct_input_messages(
849857
messages.extend(request.input) # type: ignore
850858
return messages
851859

860+
def _construct_harmony_system_input_message(
861+
self, request: ResponsesRequest, with_custom_tools: bool, tool_types: list[str]
862+
) -> OpenAIHarmonyMessage:
863+
reasoning_effort = request.reasoning.effort if request.reasoning else None
864+
enable_browser = (
865+
"web_search_preview" in tool_types
866+
and self.tool_server is not None
867+
and self.tool_server.has_tool("browser")
868+
)
869+
enable_code_interpreter = (
870+
"code_interpreter" in tool_types
871+
and self.tool_server is not None
872+
and self.tool_server.has_tool("python")
873+
)
874+
enable_container = (
875+
"container" in tool_types
876+
and self.tool_server is not None
877+
and self.tool_server.has_tool("container")
878+
)
879+
sys_msg = get_system_message(
880+
reasoning_effort=reasoning_effort,
881+
browser_description=(
882+
self.tool_server.get_tool_description("browser")
883+
if enable_browser and self.tool_server is not None
884+
else None
885+
),
886+
python_description=(
887+
self.tool_server.get_tool_description("python")
888+
if enable_code_interpreter and self.tool_server is not None
889+
else None
890+
),
891+
container_description=(
892+
self.tool_server.get_tool_description("container")
893+
if enable_container and self.tool_server is not None
894+
else None
895+
),
896+
instructions=request.instructions,
897+
with_custom_tools=with_custom_tools,
898+
)
899+
return sys_msg
900+
852901
def _construct_input_messages_with_harmony(
853902
self,
854903
request: ResponsesRequest,
@@ -857,9 +906,7 @@ def _construct_input_messages_with_harmony(
857906
messages: list[OpenAIHarmonyMessage] = []
858907
if prev_response is None:
859908
# New conversation.
860-
reasoning_effort = request.reasoning.effort if request.reasoning else None
861909
tool_types = [tool.type for tool in request.tools]
862-
863910
# Allow the MCP Tool type to enable built in tools if the
864911
# server_label is allowlisted in
865912
# envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
@@ -870,41 +917,10 @@ def _construct_input_messages_with_harmony(
870917
and tool.server_label in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
871918
):
872919
tool_types.append(tool.server_label)
873-
enable_browser = (
874-
"web_search_preview" in tool_types
875-
and self.tool_server is not None
876-
and self.tool_server.has_tool("browser")
877-
)
878-
enable_code_interpreter = (
879-
"code_interpreter" in tool_types
880-
and self.tool_server is not None
881-
and self.tool_server.has_tool("python")
882-
)
883-
enable_container = (
884-
"container" in tool_types
885-
and self.tool_server is not None
886-
and self.tool_server.has_tool("container")
887-
)
888920
with_custom_tools = has_custom_tools(tool_types)
889-
sys_msg = get_system_message(
890-
reasoning_effort=reasoning_effort,
891-
browser_description=(
892-
self.tool_server.get_tool_description("browser")
893-
if enable_browser and self.tool_server is not None
894-
else None
895-
),
896-
python_description=(
897-
self.tool_server.get_tool_description("python")
898-
if enable_code_interpreter and self.tool_server is not None
899-
else None
900-
),
901-
container_description=(
902-
self.tool_server.get_tool_description("container")
903-
if enable_container and self.tool_server is not None
904-
else None
905-
),
906-
instructions=request.instructions,
907-
with_custom_tools=with_custom_tools,
921+
922+
sys_msg = self._construct_harmony_system_input_message(
923+
request, with_custom_tools, tool_types
908924
)
909925
messages.append(sys_msg)
910926
if with_custom_tools:

0 commit comments

Comments
 (0)