diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 6cdabff6e709..ffc692f09973 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -227,6 +227,29 @@ def _validate_generator_input( ) return None + def _validate_create_responses_input( + self, request: ResponsesRequest + ) -> ErrorResponse | None: + if self.use_harmony and request.is_include_output_logprobs(): + return self.create_error_response( + err_type="invalid_request_error", + message="logprobs are not supported with gpt-oss models", + status_code=HTTPStatus.BAD_REQUEST, + ) + if request.store and not self.enable_store and request.background: + return self.create_error_response( + err_type="invalid_request_error", + message=( + "This vLLM engine does not support `store=True` and " + "therefore does not support the background mode. To " + "enable these features, set the environment variable " + "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching " + "the vLLM server." + ), + status_code=HTTPStatus.BAD_REQUEST, + ) + return None + async def create_responses( self, request: ResponsesRequest, @@ -240,6 +263,9 @@ async def create_responses( if error_check_ret is not None: logger.error("Error with model %s", error_check_ret) return error_check_ret + maybe_validation_error = self._validate_create_responses_input(request) + if maybe_validation_error is not None: + return maybe_validation_error # If the engine is dead, raise the engine's DEAD_ERROR. # This is required for the streaming case, where we return a @@ -248,18 +274,6 @@ async def create_responses( raise self.engine_client.dead_error if request.store and not self.enable_store: - if request.background: - return self.create_error_response( - err_type="invalid_request_error", - message=( - "This vLLM engine does not support `store=True` and " - "therefore does not support the background mode. To " - "enable these features, set the environment variable " - "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching " - "the vLLM server." - ), - status_code=HTTPStatus.BAD_REQUEST, - ) # Disable the store option. # NOTE(woosuk): Although returning an error is possible, we opted # to implicitly disable store and process the request anyway, as @@ -267,12 +281,6 @@ async def create_responses( # (i.e., their request's `store=True` just because it's the default # value). request.store = False - if self.use_harmony and request.is_include_output_logprobs(): - return self.create_error_response( - err_type="invalid_request_error", - message="logprobs are not supported with gpt-oss models", - status_code=HTTPStatus.BAD_REQUEST, - ) # Handle the previous response ID. prev_response_id = request.previous_response_id @@ -849,6 +857,47 @@ def _construct_input_messages( messages.extend(request.input) # type: ignore return messages + def _construct_harmony_system_input_message( + self, request: ResponsesRequest, with_custom_tools: bool, tool_types: list[str] + ) -> OpenAIHarmonyMessage: + reasoning_effort = request.reasoning.effort if request.reasoning else None + enable_browser = ( + "web_search_preview" in tool_types + and self.tool_server is not None + and self.tool_server.has_tool("browser") + ) + enable_code_interpreter = ( + "code_interpreter" in tool_types + and self.tool_server is not None + and self.tool_server.has_tool("python") + ) + enable_container = ( + "container" in tool_types + and self.tool_server is not None + and self.tool_server.has_tool("container") + ) + sys_msg = get_system_message( + reasoning_effort=reasoning_effort, + browser_description=( + self.tool_server.get_tool_description("browser") + if enable_browser and self.tool_server is not None + else None + ), + python_description=( + self.tool_server.get_tool_description("python") + if enable_code_interpreter and self.tool_server is not None + else None + ), + container_description=( + self.tool_server.get_tool_description("container") + if enable_container and self.tool_server is not None + else None + ), + instructions=request.instructions, + with_custom_tools=with_custom_tools, + ) + return sys_msg + def _construct_input_messages_with_harmony( self, request: ResponsesRequest, @@ -857,9 +906,7 @@ def _construct_input_messages_with_harmony( messages: list[OpenAIHarmonyMessage] = [] if prev_response is None: # New conversation. - reasoning_effort = request.reasoning.effort if request.reasoning else None tool_types = [tool.type for tool in request.tools] - # Allow the MCP Tool type to enable built in tools if the # server_label is allowlisted in # envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS @@ -870,41 +917,10 @@ def _construct_input_messages_with_harmony( and tool.server_label in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS ): tool_types.append(tool.server_label) - enable_browser = ( - "web_search_preview" in tool_types - and self.tool_server is not None - and self.tool_server.has_tool("browser") - ) - enable_code_interpreter = ( - "code_interpreter" in tool_types - and self.tool_server is not None - and self.tool_server.has_tool("python") - ) - enable_container = ( - "container" in tool_types - and self.tool_server is not None - and self.tool_server.has_tool("container") - ) with_custom_tools = has_custom_tools(tool_types) - sys_msg = get_system_message( - reasoning_effort=reasoning_effort, - browser_description=( - self.tool_server.get_tool_description("browser") - if enable_browser and self.tool_server is not None - else None - ), - python_description=( - self.tool_server.get_tool_description("python") - if enable_code_interpreter and self.tool_server is not None - else None - ), - container_description=( - self.tool_server.get_tool_description("container") - if enable_container and self.tool_server is not None - else None - ), - instructions=request.instructions, - with_custom_tools=with_custom_tools, + + sys_msg = self._construct_harmony_system_input_message( + request, with_custom_tools, tool_types ) messages.append(sys_msg) if with_custom_tools: