Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
from vllm.entrypoints.launcher import serve_http
from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from vllm.entrypoints.openai.engine.protocol import GenerationError
from vllm.entrypoints.openai.models.protocol import BaseModelPath
from vllm.entrypoints.openai.models.serving import OpenAIServingModels
from vllm.entrypoints.openai.server_utils import (
engine_error_handler,
exception_handler,
generation_error_handler,
get_uvicorn_log_config,
http_exception_handler,
lifespan,
Expand Down Expand Up @@ -263,6 +265,7 @@ def build_app(
app.exception_handler(RequestValidationError)(validation_exception_handler)
app.exception_handler(EngineGenerateError)(engine_error_handler)
app.exception_handler(EngineDeadError)(engine_error_handler)
app.exception_handler(GenerationError)(generation_error_handler)
app.exception_handler(Exception)(exception_handler)

# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
Expand Down
17 changes: 16 additions & 1 deletion vllm/entrypoints/openai/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
from vllm import envs
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.launcher import terminate_if_errored
from vllm.entrypoints.openai.engine.protocol import ErrorInfo, ErrorResponse
from vllm.entrypoints.openai.engine.protocol import (
ErrorInfo,
ErrorResponse,
GenerationError,
)
from vllm.entrypoints.utils import create_error_response, sanitize_message
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
Expand Down Expand Up @@ -354,6 +358,17 @@ async def engine_error_handler(
return JSONResponse(err.model_dump(), status_code=err.error.code)


async def generation_error_handler(req: Request, exc: GenerationError):
"""Handle GenerationError without logging stack traces.

GenerationError is a known, expected error (e.g. KV cache load failure)
that should be returned to the client as a 500 response without polluting
server logs with stack traces.
"""
err = create_error_response(exc)
return JSONResponse(err.model_dump(), status_code=err.error.code)


async def exception_handler(req: Request, exc: Exception):
if req.app.state.args.log_error_stack:
logger.exception(
Expand Down
Loading