@@ -227,6 +227,29 @@ def _validate_generator_input(
227227 )
228228 return None
229229
230+ def _validate_create_responses_input (
231+ self , request : ResponsesRequest
232+ ) -> ErrorResponse | None :
233+ if self .use_harmony and request .is_include_output_logprobs ():
234+ return self .create_error_response (
235+ err_type = "invalid_request_error" ,
236+ message = "logprobs are not supported with gpt-oss models" ,
237+ status_code = HTTPStatus .BAD_REQUEST ,
238+ )
239+ if request .store and not self .enable_store and request .background :
240+ return self .create_error_response (
241+ err_type = "invalid_request_error" ,
242+ message = (
243+ "This vLLM engine does not support `store=True` and "
244+ "therefore does not support the background mode. To "
245+ "enable these features, set the environment variable "
246+ "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
247+ "the vLLM server."
248+ ),
249+ status_code = HTTPStatus .BAD_REQUEST ,
250+ )
251+ return None
252+
230253 async def create_responses (
231254 self ,
232255 request : ResponsesRequest ,
@@ -240,6 +263,9 @@ async def create_responses(
240263 if error_check_ret is not None :
241264 logger .error ("Error with model %s" , error_check_ret )
242265 return error_check_ret
266+ maybe_validation_error = self ._validate_create_responses_input (request )
267+ if maybe_validation_error is not None :
268+ return maybe_validation_error
243269
244270 # If the engine is dead, raise the engine's DEAD_ERROR.
245271 # This is required for the streaming case, where we return a
@@ -248,31 +274,13 @@ async def create_responses(
248274 raise self .engine_client .dead_error
249275
250276 if request .store and not self .enable_store :
251- if request .background :
252- return self .create_error_response (
253- err_type = "invalid_request_error" ,
254- message = (
255- "This vLLM engine does not support `store=True` and "
256- "therefore does not support the background mode. To "
257- "enable these features, set the environment variable "
258- "`VLLM_ENABLE_RESPONSES_API_STORE=1` when launching "
259- "the vLLM server."
260- ),
261- status_code = HTTPStatus .BAD_REQUEST ,
262- )
263277 # Disable the store option.
264278 # NOTE(woosuk): Although returning an error is possible, we opted
265279 # to implicitly disable store and process the request anyway, as
266280 # we assume most users do not intend to actually store the response
267281 # (i.e., their request's `store=True` just because it's the default
268282 # value).
269283 request .store = False
270- if self .use_harmony and request .is_include_output_logprobs ():
271- return self .create_error_response (
272- err_type = "invalid_request_error" ,
273- message = "logprobs are not supported with gpt-oss models" ,
274- status_code = HTTPStatus .BAD_REQUEST ,
275- )
276284
277285 # Handle the previous response ID.
278286 prev_response_id = request .previous_response_id
@@ -849,6 +857,47 @@ def _construct_input_messages(
849857 messages .extend (request .input ) # type: ignore
850858 return messages
851859
860+ def _construct_harmony_system_input_message (
861+ self , request : ResponsesRequest , with_custom_tools : bool , tool_types : list [str ]
862+ ) -> OpenAIHarmonyMessage :
863+ reasoning_effort = request .reasoning .effort if request .reasoning else None
864+ enable_browser = (
865+ "web_search_preview" in tool_types
866+ and self .tool_server is not None
867+ and self .tool_server .has_tool ("browser" )
868+ )
869+ enable_code_interpreter = (
870+ "code_interpreter" in tool_types
871+ and self .tool_server is not None
872+ and self .tool_server .has_tool ("python" )
873+ )
874+ enable_container = (
875+ "container" in tool_types
876+ and self .tool_server is not None
877+ and self .tool_server .has_tool ("container" )
878+ )
879+ sys_msg = get_system_message (
880+ reasoning_effort = reasoning_effort ,
881+ browser_description = (
882+ self .tool_server .get_tool_description ("browser" )
883+ if enable_browser and self .tool_server is not None
884+ else None
885+ ),
886+ python_description = (
887+ self .tool_server .get_tool_description ("python" )
888+ if enable_code_interpreter and self .tool_server is not None
889+ else None
890+ ),
891+ container_description = (
892+ self .tool_server .get_tool_description ("container" )
893+ if enable_container and self .tool_server is not None
894+ else None
895+ ),
896+ instructions = request .instructions ,
897+ with_custom_tools = with_custom_tools ,
898+ )
899+ return sys_msg
900+
852901 def _construct_input_messages_with_harmony (
853902 self ,
854903 request : ResponsesRequest ,
@@ -857,9 +906,7 @@ def _construct_input_messages_with_harmony(
857906 messages : list [OpenAIHarmonyMessage ] = []
858907 if prev_response is None :
859908 # New conversation.
860- reasoning_effort = request .reasoning .effort if request .reasoning else None
861909 tool_types = [tool .type for tool in request .tools ]
862-
863910 # Allow the MCP Tool type to enable built in tools if the
864911 # server_label is allowlisted in
865912 # envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS
@@ -870,41 +917,10 @@ def _construct_input_messages_with_harmony(
870917 and tool .server_label in envs .GPT_OSS_SYSTEM_TOOL_MCP_LABELS
871918 ):
872919 tool_types .append (tool .server_label )
873- enable_browser = (
874- "web_search_preview" in tool_types
875- and self .tool_server is not None
876- and self .tool_server .has_tool ("browser" )
877- )
878- enable_code_interpreter = (
879- "code_interpreter" in tool_types
880- and self .tool_server is not None
881- and self .tool_server .has_tool ("python" )
882- )
883- enable_container = (
884- "container" in tool_types
885- and self .tool_server is not None
886- and self .tool_server .has_tool ("container" )
887- )
888920 with_custom_tools = has_custom_tools (tool_types )
889- sys_msg = get_system_message (
890- reasoning_effort = reasoning_effort ,
891- browser_description = (
892- self .tool_server .get_tool_description ("browser" )
893- if enable_browser and self .tool_server is not None
894- else None
895- ),
896- python_description = (
897- self .tool_server .get_tool_description ("python" )
898- if enable_code_interpreter and self .tool_server is not None
899- else None
900- ),
901- container_description = (
902- self .tool_server .get_tool_description ("container" )
903- if enable_container and self .tool_server is not None
904- else None
905- ),
906- instructions = request .instructions ,
907- with_custom_tools = with_custom_tools ,
921+
922+ sys_msg = self ._construct_harmony_system_input_message (
923+ request , with_custom_tools , tool_types
908924 )
909925 messages .append (sys_msg )
910926 if with_custom_tools :
0 commit comments