From 6ce018e69c6e862324eb3de1b4d09ca99ebf05f7 Mon Sep 17 00:00:00 2001 From: Lidang-Jiang Date: Thu, 5 Mar 2026 19:52:43 +0800 Subject: [PATCH] [BugFix] Wrap ErrorResponse in JSONResponse with proper HTTP status code in create_speech The create_speech endpoint returns ErrorResponse as a raw Pydantic object when model validation fails (e.g. _check_model() returns 404). FastAPI serializes it as JSON with HTTP 200, making it impossible for clients to detect errors via status code. This follows the same pattern already used in create_chat_completion (api_server.py:788-792): check isinstance(result, ErrorResponse) and wrap in JSONResponse with the correct status_code. Co-Authored-By: Claude Opus 4.6 Signed-off-by: Lidang-Jiang --- vllm_omni/entrypoints/openai/api_server.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm_omni/entrypoints/openai/api_server.py b/vllm_omni/entrypoints/openai/api_server.py index 84a81a86139..5b1f2582444 100644 --- a/vllm_omni/entrypoints/openai/api_server.py +++ b/vllm_omni/entrypoints/openai/api_server.py @@ -855,7 +855,13 @@ async def create_speech(request: OpenAICreateSpeechRequest, raw_request: Request ) return base_server.create_error_response(message="The model does not support Speech API") try: - return await handler.create_speech(request, raw_request) + result = await handler.create_speech(request, raw_request) + if isinstance(result, ErrorResponse): + return JSONResponse( + content=result.model_dump(), + status_code=result.error.code if result.error else 400, + ) + return result except Exception as e: raise HTTPException(status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value, detail=str(e)) from e