vllm-project · JosephAhn23 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/docs/configuration/serve_args.md b/docs/configuration/serve_args.md
@@ -21,6 +21,11 @@ model: meta-llama/Llama-3.1-8B-Instruct
 host: "127.0.0.1"
 port: 6379
 uvicorn-log-level: "info"
+# Optional: log request metadata at INFO (see docs/usage/troubleshooting.md)
+# enable-log-requests: true
+# Optional: truncated prompt snippets at INFO; requires enable-log-requests.
+# Off by default for security (see docs/usage/security.md).
+# enable-log-request-prompts: true
 ```
 
 To use the above config file:

@@ -85,6 +85,20 @@ significantly reduce the attack surface for these types of abuse.
 Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
 redirects from being followed to bypass domain restrictions.
 
+## Logging and client payloads
+
+For `vllm serve`, `--enable-log-requests` records request metadata at INFO (for
+example sampling parameters). By design it does **not** include raw prompt
+content at INFO: prompts can contain secrets or PII, and logs are often copied
+to less-controlled systems.
+
+To debug prompts without raising the global log level, you can:
+
+- Set `VLLM_LOGGING_LEVEL=DEBUG` for full prompt details in logs, **or**
+- Explicitly pass `--enable-log-request-prompts` **in addition to**
+  `--enable-log-requests` for **truncated** prompt summaries at INFO (still
+  treat log sinks as sensitive if you enable this).
+
 ## Security and Firewalls: Protecting Exposed vLLM Systems
 
 While vLLM is designed to allow unsafe network services to be isolated to

diff --git a/docs/usage/troubleshooting.md b/docs/usage/troubleshooting.md
@@ -35,6 +35,8 @@ You can check if this is happening by trying the old defaults with `--generation
 If other strategies don't solve the problem, it's likely that the vLLM instance is stuck somewhere. You can use the following environment variables to help debug the issue:
 
 - `export VLLM_LOGGING_LEVEL=DEBUG` to turn on more logging.
+- For the OpenAI-compatible server, `--enable-log-requests` logs request IDs and sampling parameters at INFO. **Full** prompt inputs (text, token IDs, shapes) appear only at DEBUG unless you also opt in to bounded INFO previews (see below).
+- `--enable-log-request-prompts` (requires `--enable-log-requests`) adds **truncated** prompt summaries to INFO lines. It is **off by default** because prompts can contain sensitive data that may end up in log aggregation systems. See [Security](security.md#logging-and-client-payloads).
 - `export VLLM_LOG_STATS_INTERVAL=1.` to get log statistics more frequently for tracking running queue, waiting queue and cache hit states.
 - `export CUDA_LAUNCH_BLOCKING=1` to identify which CUDA kernel is causing the problem.
 - `export NCCL_DEBUG=TRACE` to turn on more logging for NCCL.

@@ -206,6 +206,19 @@ def test_chat_template_validation_for_sad_paths(serve_parser):
         validate_parsed_serve_args(args)
 
 
+def test_enable_log_request_prompts_requires_enable_log_requests(serve_parser):
+    args = serve_parser.parse_args(args=["--enable-log-request-prompts"])
+    with pytest.raises(TypeError, match="--enable-log-request-prompts"):
+        validate_parsed_serve_args(args)
+
+
+def test_enable_log_request_prompts_passes_with_log_requests(serve_parser):
+    args = serve_parser.parse_args(
+        args=["--enable-log-requests", "--enable-log-request-prompts"]
+    )
+    validate_parsed_serve_args(args)
+
+
 @pytest.mark.parametrize(
     "cli_args, expected_middleware",
     [

diff --git a/tests/test_logger.py b/tests/test_logger.py
@@ -472,11 +472,35 @@ def test_request_logger_log_outputs_integration():
 
         assert "Received request %s" in input_call[0]
         assert input_call[1] == "test-integration"
+        # Prompts at INFO require explicit --enable-log-request-prompts (security).
+        assert input_call[4] == ""
 
         assert "Generated response %s%s" in output_call[0]
         assert output_call[1] == "test-integration"
 
 
+def test_request_logger_log_inputs_prompt_at_info_when_opt_in():
+    mock_logger = MagicMock()
+
+    with patch("vllm.entrypoints.logger.logger", mock_logger):
+        request_logger = RequestLogger(
+            max_log_len=None, log_prompts_at_info=True
+        )
+        request_logger.log_inputs(
+            request_id="test-prompt-info",
+            prompt="Hello",
+            prompt_token_ids=None,
+            prompt_embeds=None,
+            params=None,
+            lora_request=None,
+        )
+
+        mock_logger.info.assert_called_once()
+        input_call = mock_logger.info.call_args[0]
+        assert input_call[1] == "test-prompt-info"
+        assert "Hello" in input_call[4]
+
+
 def test_streaming_complete_logs_full_text_content():
     """Test that streaming complete logging includes
     full accumulated text, not just token count."""

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -2280,8 +2280,9 @@ def add_cli_args(
             action=argparse.BooleanOptionalAction,
             default=AsyncEngineArgs.enable_log_requests,
             help="Enable logging request information, dependent on log level:\n"
-            "- INFO: Request ID, parameters and LoRA request.\n"
-            "- DEBUG: Prompt inputs (e.g: text, token IDs).\n"
+            "- INFO: Request ID, parameters and LoRA request (and truncated "
+            "prompt inputs if `--enable-log-request-prompts` is set).\n"
+            "- DEBUG: Full prompt inputs (e.g. text, token IDs).\n"
             "You can set the minimum log level via `VLLM_LOGGING_LEVEL`.",
         )
         current_platform.pre_register_and_update(parser)

@@ -13,17 +13,36 @@
 
 logger = init_logger(__name__)
 
+# With `--enable-log-request-prompts`, INFO logs may include a bounded prompt
+# preview when `--max-log-len` is unset. Full inputs remain at DEBUG.
+# See github.com/vllm-project/vllm/issues/38537.
+_DEFAULT_INFO_PROMPT_STR_LEN = 4096
+_DEFAULT_INFO_PROMPT_TOKEN_IDS = 512
+
 
 class RequestLogger:
-    def __init__(self, *, max_log_len: int | None) -> None:
+    def __init__(
+        self,
+        *,
+        max_log_len: int | None,
+        log_prompts_at_info: bool = False,
+    ) -> None:
         self.max_log_len = max_log_len
+        self.log_prompts_at_info = log_prompts_at_info
 
         if not logger.isEnabledFor(logging.INFO):
             logger.warning_once(
                 "`--enable-log-requests` is set but "
                 "the minimum log level is higher than INFO. "
                 "No request information will be logged."
             )
+        elif self.log_prompts_at_info and not logger.isEnabledFor(logging.DEBUG):
+            logger.info_once(
+                "`--enable-log-request-prompts` is set but "
+                "the minimum log level is higher than DEBUG. "
+                "Prompt details at INFO are truncated when long; "
+                "set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
+            )
         elif not logger.isEnabledFor(logging.DEBUG):
             logger.info_once(
                 "`--enable-log-requests` is set but "
@@ -32,6 +51,35 @@ def __init__(self, *, max_log_len: int | None) -> None:
                 "To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
             )
-            logger.info_once(
-                "`--enable-log-requests` is set but "
-                "the minimum log level is higher than DEBUG. "
-                "Only limited information will be logged to minimize overhead. "
-                "To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
-                "Prompt text at INFO is truncated when long; "
-                "set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
-            )
+            logger.info_once(
+                "`--enable-log-requests` is set but "
+                "the minimum log level is higher than DEBUG. "
+                "Prompt details at INFO are truncated when long; "
+                "set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
+            )
-            logger.info_once(
-                "`--enable-log-requests` is set but "
-                "the minimum log level is higher than DEBUG. "
-                "Only limited information will be logged to minimize overhead. "
-                "To view more details, set `VLLM_LOGGING_LEVEL=DEBUG`."
-                "Prompt text at INFO is truncated when long; "
-                "set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
-            )
+            logger.info_once(
+                "`--enable-log-requests` is set but "
+                "the minimum log level is higher than DEBUG. "
+                "Prompt details at INFO are truncated when long; "
+                "set `VLLM_LOGGING_LEVEL=DEBUG` for full details."
+            )
 
+    def _prompt_summary_for_info(
+        self,
+        prompt: str | None,
+        prompt_token_ids: list[int] | None,
+        prompt_embeds: torch.Tensor | None,
+    ) -> str:
+        if not self.log_prompts_at_info or not logger.isEnabledFor(logging.INFO):
+            return ""
+
+        max_chars = (
+            self.max_log_len
+            if self.max_log_len is not None
+            else _DEFAULT_INFO_PROMPT_STR_LEN
+        )
+        max_ids = (
+            self.max_log_len
+            if self.max_log_len is not None
+            else _DEFAULT_INFO_PROMPT_TOKEN_IDS
+        )
+        if prompt is not None:
+            preview = prompt[:max_chars]
+            return f", prompt: {preview!r}"
+        if prompt_token_ids is not None:
+            preview_ids = prompt_token_ids[:max_ids]
+            return f", prompt_token_ids: {preview_ids}"
+        if prompt_embeds is not None:
+            return f", prompt_embeds: shape={prompt_embeds.shape}"
+        return ""
+
     def log_inputs(
         self,
         request_id: str,
@@ -60,11 +108,15 @@ def log_inputs(
                 prompt_embeds.shape if prompt_embeds is not None else None,
             )
 
+        prompt_summary = self._prompt_summary_for_info(
+            prompt, prompt_token_ids, prompt_embeds
+        )
         logger.info(
-            "Received request %s: params: %s, lora_request: %s.",
+            "Received request %s: params: %s, lora_request: %s%s.",
             request_id,
             params,
             lora_request,
+            prompt_summary,
         )
 
     def log_outputs(

@@ -331,7 +331,10 @@ async def init_app_state(
         served_model_names = [args.model]
 
     if args.enable_log_requests:
-        request_logger = RequestLogger(max_log_len=args.max_log_len)
+        request_logger = RequestLogger(
+            max_log_len=args.max_log_len,
+            log_prompts_at_info=args.enable_log_request_prompts,
+        )
     else:
         request_logger = None
 
@@ -445,7 +448,10 @@ async def init_render_app_state(
     )
 
     if args.enable_log_requests:
-        request_logger = RequestLogger(max_log_len=args.max_log_len)
+        request_logger = RequestLogger(
+            max_log_len=args.max_log_len,
+            log_prompts_at_info=args.enable_log_request_prompts,
+        )
     else:
         request_logger = None
 

@@ -145,6 +145,13 @@ class BaseFrontendArgs:
     """If set to True, log model outputs (generations).
     Requires `--enable-log-requests`. As with `--enable-log-requests`,
     information is only logged at INFO level at maximum."""
+    enable_log_request_prompts: bool = False
+    """If set to True, include truncated prompt inputs (text, token ids, or
+    embedding tensor shape) in INFO-level request logs when
+    `--enable-log-requests` is set. **Off by default:** logging client
+    payloads can expose sensitive data in log sinks. Requires
+    `--enable-log-requests`. Use `VLLM_LOGGING_LEVEL=DEBUG` for full prompt
+    details without this flag."""
     enable_log_deltas: bool = True
     """If set to False, output deltas will not be logged. Relevant only if 
     --enable-log-outputs is set.
@@ -364,6 +371,13 @@ def validate_parsed_serve_args(args: argparse.Namespace):
         raise TypeError("Error: --enable-auto-tool-choice requires --tool-call-parser")
     if args.enable_log_outputs and not args.enable_log_requests:
         raise TypeError("Error: --enable-log-outputs requires --enable-log-requests")
+    if (
+        getattr(args, "enable_log_request_prompts", False)
+        and not args.enable_log_requests
+    ):
+        raise TypeError(
+            "Error: --enable-log-request-prompts requires --enable-log-requests"
+        )
 
 
 def create_parser_for_docs() -> FlexibleArgumentParser: