diff --git a/vllm/envs.py b/vllm/envs.py index 674c1cde2a3e..15fa5fc3e417 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -169,6 +169,7 @@ VLLM_XGRAMMAR_CACHE_MB: int = 0 VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256 VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False + VLLM_DISABLE_REQUEST_ID_RANDOMIZATION: bool = False VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost" VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600 VLLM_MOONCAKE_BOOTSTRAP_PORT: int = 8998 @@ -1236,6 +1237,11 @@ def _get_or_set_default() -> str: "VLLM_ALLOW_INSECURE_SERIALIZATION": lambda: bool( int(os.getenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0")) ), + # Temporary: skip adding random suffix to internal request IDs. May be + # needed for KV connectors that match request IDs across instances. + "VLLM_DISABLE_REQUEST_ID_RANDOMIZATION": lambda: bool( + int(os.getenv("VLLM_DISABLE_REQUEST_ID_RANDOMIZATION", "0")) + ), # IP address used for NIXL handshake between remote agents. "VLLM_NIXL_SIDE_CHANNEL_HOST": lambda: os.getenv( "VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost" diff --git a/vllm/v1/engine/input_processor.py b/vllm/v1/engine/input_processor.py index 1bda736fe6a4..4f92d95d0b9f 100644 --- a/vllm/v1/engine/input_processor.py +++ b/vllm/v1/engine/input_processor.py @@ -5,6 +5,7 @@ from collections.abc import Mapping from typing import Any, Literal, cast +import vllm.envs as envs from vllm.config import VllmConfig from vllm.inputs.data import ( ProcessorInputs, @@ -299,7 +300,14 @@ def assign_request_id(request: EngineCoreRequest): " passed to vLLM; use the request_id field." ) request.external_req_id = request.request_id - request.request_id = f"{request.external_req_id}-{random_uuid():.8}" + if envs.VLLM_DISABLE_REQUEST_ID_RANDOMIZATION: + logger.warning_once( + "VLLM_DISABLE_REQUEST_ID_RANDOMIZATION is set and will be " + "removed in a future release. Duplicate externally-provided " + "request IDs may cause failures and/or subtle correctness errors." + ) + else: + request.request_id = f"{request.external_req_id}-{random_uuid():.8}" def process_inputs( self,