Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
VLLM_XGRAMMAR_CACHE_MB: int = 0
VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256
VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False
VLLM_DISABLE_REQUEST_ID_RANDOMIZATION: bool = False
VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost"
VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600
VLLM_MOONCAKE_BOOTSTRAP_PORT: int = 8998
Expand Down Expand Up @@ -1236,6 +1237,11 @@ def _get_or_set_default() -> str:
"VLLM_ALLOW_INSECURE_SERIALIZATION": lambda: bool(
int(os.getenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0"))
),
# Temporary: skip adding random suffix to internal request IDs. May be
# needed for KV connectors that match request IDs across instances.
"VLLM_DISABLE_REQUEST_ID_RANDOMIZATION": lambda: bool(
int(os.getenv("VLLM_DISABLE_REQUEST_ID_RANDOMIZATION", "0"))
),
# IP address used for NIXL handshake between remote agents.
"VLLM_NIXL_SIDE_CHANNEL_HOST": lambda: os.getenv(
"VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost"
Expand Down
10 changes: 9 additions & 1 deletion vllm/v1/engine/input_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections.abc import Mapping
from typing import Any, Literal, cast

import vllm.envs as envs
from vllm.config import VllmConfig
from vllm.inputs.data import (
ProcessorInputs,
Expand Down Expand Up @@ -299,7 +300,14 @@ def assign_request_id(request: EngineCoreRequest):
" passed to vLLM; use the request_id field."
)
request.external_req_id = request.request_id
request.request_id = f"{request.external_req_id}-{random_uuid():.8}"
if envs.VLLM_DISABLE_REQUEST_ID_RANDOMIZATION:
logger.warning_once(
"VLLM_DISABLE_REQUEST_ID_RANDOMIZATION is set and will be "
"removed in a future release. Duplicate externally-provided "
"request IDs may cause failures and/or subtle correctness errors."
)
else:
request.request_id = f"{request.external_req_id}-{random_uuid():.8}"

def process_inputs(
self,
Expand Down