Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/configuration/serve_args.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ model: meta-llama/Llama-3.1-8B-Instruct
host: "127.0.0.1"
port: 6379
uvicorn-log-level: "info"
# Optional: log request metadata at INFO (see docs/usage/troubleshooting.md)
# enable-log-requests: true
# Optional: truncated prompt snippets at INFO; requires enable-log-requests.
# Off by default for security (see docs/usage/security.md).
# enable-log-request-prompts: true
```

To use the above config file:
Expand Down
14 changes: 14 additions & 0 deletions docs/usage/security.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,20 @@ significantly reduce the attack surface for these types of abuse.
Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
redirects from being followed to bypass domain restrictions.

## Logging and client payloads

For `vllm serve`, `--enable-log-requests` records request metadata at INFO (for
example sampling parameters). By design it does **not** include raw prompt
content at INFO: prompts can contain secrets or PII, and logs are often copied
to less-controlled systems.

To debug prompts without raising the global log level, you can:

- Set `VLLM_LOGGING_LEVEL=DEBUG` for full prompt details in logs, **or**
- Explicitly pass `--enable-log-request-prompts` **in addition to**
`--enable-log-requests` for **truncated** prompt summaries at INFO (still
treat log sinks as sensitive if you enable this).

## Security and Firewalls: Protecting Exposed vLLM Systems

While vLLM is designed to allow unsafe network services to be isolated to
Expand Down
2 changes: 2 additions & 0 deletions docs/usage/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ You can check if this is happening by trying the old defaults with `--generation
If other strategies don't solve the problem, it's likely that the vLLM instance is stuck somewhere. You can use the following environment variables to help debug the issue:

- `export VLLM_LOGGING_LEVEL=DEBUG` to turn on more logging.
- For the OpenAI-compatible server, `--enable-log-requests` logs request IDs and sampling parameters at INFO. **Full** prompt inputs (text, token IDs, shapes) appear only at DEBUG unless you also opt in to bounded INFO previews (see below).
- `--enable-log-request-prompts` (requires `--enable-log-requests`) adds **truncated** prompt summaries to INFO lines. It is **off by default** because prompts can contain sensitive data that may end up in log aggregation systems. See [Security](security.md#logging-and-client-payloads).
- `export VLLM_LOG_STATS_INTERVAL=1.` to get log statistics more frequently for tracking running queue, waiting queue and cache hit states.
- `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
- `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.
Expand Down
13 changes: 13 additions & 0 deletions tests/entrypoints/openai/test_cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,19 @@ def test_chat_template_validation_for_sad_paths(serve_parser):
validate_parsed_serve_args(args)


def test_enable_log_request_prompts_requires_enable_log_requests(serve_parser):
args = serve_parser.parse_args(args=["--enable-log-request-prompts"])
with pytest.raises(TypeError, match="--enable-log-request-prompts"):
validate_parsed_serve_args(args)


def test_enable_log_request_prompts_passes_with_log_requests(serve_parser):
args = serve_parser.parse_args(
args=["--enable-log-requests", "--enable-log-request-prompts"]
)
validate_parsed_serve_args(args)


@pytest.mark.parametrize(
"cli_args, expected_middleware",
[
Expand Down
24 changes: 24 additions & 0 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,11 +472,35 @@ def test_request_logger_log_outputs_integration():

assert "Received request %s" in input_call[0]
assert input_call[1] == "test-integration"
# Prompts at INFO require explicit --enable-log-request-prompts (security).
assert input_call[4] == ""

assert "Generated response %s%s" in output_call[0]
assert output_call[1] == "test-integration"


def test_request_logger_log_inputs_prompt_at_info_when_opt_in():
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(
max_log_len=None, log_prompts_at_info=True
)
request_logger.log_inputs(
request_id="test-prompt-info",
prompt="Hello",
prompt_token_ids=None,
prompt_embeds=None,
params=None,
lora_request=None,
)

mock_logger.info.assert_called_once()
input_call = mock_logger.info.call_args[0]
assert input_call[1] == "test-prompt-info"
assert "Hello" in input_call[4]


def test_streaming_complete_logs_full_text_content():
"""Test that streaming complete logging includes
full accumulated text, not just token count."""
Expand Down
5 changes: 3 additions & 2 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2280,8 +2280,9 @@ def add_cli_args(
action=argparse.BooleanOptionalAction,
default=AsyncEngineArgs.enable_log_requests,
help="Enable logging request information, dependent on log level:\n"
"- INFO: Request ID, parameters and LoRA request.\n"
"- DEBUG: Prompt inputs (e.g: text, token IDs).\n"
"- INFO: Request ID, parameters and LoRA request (and truncated "
"prompt inputs if `--enable-log-request-prompts` is set).\n"
"- DEBUG: Full prompt inputs (e.g. text, token IDs).\n"
"You can set the minimum log level via `VLLM_LOGGING_LEVEL`.",
)
current_platform.pre_register_and_update(parser)
Expand Down
56 changes: 54 additions & 2 deletions vllm/entrypoints/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,36 @@

logger = init_logger(__name__)

# With `--enable-log-request-prompts`, INFO logs may include a bounded prompt
# preview when `--max-log-len` is unset. Full inputs remain at DEBUG.
# See github.com/vllm-project/vllm/issues/38537.
_DEFAULT_INFO_PROMPT_STR_LEN = 4096
_DEFAULT_INFO_PROMPT_TOKEN_IDS = 512


class RequestLogger:
def __init__(self, *, max_log_len: int | None) -> None:
def __init__(
self,
*,
max_log_len: int | None,
log_prompts_at_info: bool = False,
) -> None:
self.max_log_len = max_log_len
self.log_prompts_at_info = log_prompts_at_info

if not logger.isEnabledFor(logging.INFO):
logger.warning_once(
"`--enable-log-requests` is set but "
"the minimum log level is higher than INFO. "
"No request information will be logged."
)
elif self.log_prompts_at_info and not logger.isEnabledFor(logging.DEBUG):
logger.info_once(
"`--enable-log-request-prompts` is set but "
"the minimum log level is higher than DEBUG. "
"Prompt details at INFO are truncated when long; "
"set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
)
elif not logger.isEnabledFor(logging.DEBUG):
logger.info_once(
"`--enable-log-requests` is set but "
Expand All @@ -32,6 +51,35 @@ def __init__(self, *, max_log_len: int | None) -> None:
"To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
)
Comment on lines 47 to 52
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The updated hint message specifically mentions 'Prompt text', but the implementation also includes prompt_token_ids and prompt_embeds shape in the INFO logs. To be more accurate and consistent with the changes in _prompt_summary_for_info, the message should refer to 'Prompt details' or 'Prompt inputs'.

Suggested change
logger.info_once(
"`--enable-log-requests` is set but "
"the minimum log level is higher than DEBUG. "
"Only limited information will be logged to minimize overhead. "
"To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
"Prompt text at INFO is truncated when long; "
"set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
)
logger.info_once(
"`--enable-log-requests` is set but "
"the minimum log level is higher than DEBUG. "
"Prompt details at INFO are truncated when long; "
"set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
)


def _prompt_summary_for_info(
self,
prompt: str | None,
prompt_token_ids: list[int] | None,
prompt_embeds: torch.Tensor | None,
) -> str:
if not self.log_prompts_at_info or not logger.isEnabledFor(logging.INFO):
return ""

max_chars = (
self.max_log_len
if self.max_log_len is not None
else _DEFAULT_INFO_PROMPT_STR_LEN
)
max_ids = (
self.max_log_len
if self.max_log_len is not None
else _DEFAULT_INFO_PROMPT_TOKEN_IDS
)
if prompt is not None:
preview = prompt[:max_chars]
return f", prompt: {preview!r}"
if prompt_token_ids is not None:
preview_ids = prompt_token_ids[:max_ids]
return f", prompt_token_ids: {preview_ids}"
if prompt_embeds is not None:
return f", prompt_embeds: shape={prompt_embeds.shape}"
return ""

def log_inputs(
self,
request_id: str,
Expand Down Expand Up @@ -60,11 +108,15 @@ def log_inputs(
prompt_embeds.shape if prompt_embeds is not None else None,
)

prompt_summary = self._prompt_summary_for_info(
prompt, prompt_token_ids, prompt_embeds
)
logger.info(
"Received request %s: params: %s, lora_request: %s.",
"Received request %s: params: %s, lora_request: %s%s.",
request_id,
params,
lora_request,
prompt_summary,
)

def log_outputs(
Expand Down
10 changes: 8 additions & 2 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,10 @@ async def init_app_state(
served_model_names = [args.model]

if args.enable_log_requests:
request_logger = RequestLogger(max_log_len=args.max_log_len)
request_logger = RequestLogger(
max_log_len=args.max_log_len,
log_prompts_at_info=args.enable_log_request_prompts,
)
else:
request_logger = None

Expand Down Expand Up @@ -445,7 +448,10 @@ async def init_render_app_state(
)

if args.enable_log_requests:
request_logger = RequestLogger(max_log_len=args.max_log_len)
request_logger = RequestLogger(
max_log_len=args.max_log_len,
log_prompts_at_info=args.enable_log_request_prompts,
)
else:
request_logger = None

Expand Down
14 changes: 14 additions & 0 deletions vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ class BaseFrontendArgs:
"""If set to True, log model outputs (generations).
Requires `--enable-log-requests`. As with `--enable-log-requests`,
information is only logged at INFO level at maximum."""
enable_log_request_prompts: bool = False
"""If set to True, include truncated prompt inputs (text, token ids, or
embedding tensor shape) in INFO-level request logs when
`--enable-log-requests` is set. **Off by default:** logging client
payloads can expose sensitive data in log sinks. Requires
`--enable-log-requests`. Use `VLLM_LOGGING_LEVEL=DEBUG` for full prompt
details without this flag."""
enable_log_deltas: bool = True
"""If set to False, output deltas will not be logged. Relevant only if
--enable-log-outputs is set.
Expand Down Expand Up @@ -364,6 +371,13 @@ def validate_parsed_serve_args(args: argparse.Namespace):
raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser")
if args.enable_log_outputs and not args.enable_log_requests:
raise TypeError("Error: --enable-log-outputs requires --enable-log-requests")
if (
getattr(args, "enable_log_request_prompts", False)
and not args.enable_log_requests
):
raise TypeError(
"Error: --enable-log-request-prompts requires --enable-log-requests"
)


def create_parser_for_docs() -> FlexibleArgumentParser:
Expand Down
Loading