vllm-project · vllm-bot · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/docs/design/plugin_system.md b/docs/design/plugin_system.md
@@ -155,3 +155,4 @@ The interface for the model/module may change during vLLM's development. If you
     - `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It has been removed in v0.13.0.
     - `_Backend` in `vllm.attention` is deprecated. It has been removed in v0.13.0. Please use `vllm.v1.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead.
     - `seed_everything` platform interface is deprecated. It has been removed in v0.16.0. Please use `vllm.utils.torch_utils.set_random_seed` instead.
+    - `prompt` in `Platform.validate_request` is deprecated and will be removed in v0.18.0.
@@ -519,7 +519,6 @@ def enqueue(
             ),
             params=seq_params,
             lora_requests=seq_lora_requests,
-            tokenization_kwargs=tokenization_kwargs,
             priorities=seq_priority,
         )
 
@@ -1813,7 +1812,6 @@ def _run_completion(
             params=seq_params,
             use_tqdm=use_tqdm,
             lora_requests=seq_lora_requests,
-            tokenization_kwargs=tokenization_kwargs,
             priorities=seq_priority,
         )
 
@@ -1872,7 +1870,6 @@ def _run_chat(
             params=seq_params,
             lora_requests=seq_lora_requests,
             use_tqdm=use_tqdm,
-            tokenization_kwargs=tokenization_kwargs,
         )
 
     def _render_and_run_requests(
@@ -1881,7 +1878,6 @@ def _render_and_run_requests(
         params: Sequence[SamplingParams | PoolingParams],
         *,
         lora_requests: Sequence[LoRARequest | None] | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
         priorities: Sequence[int] | None = None,
         use_tqdm: bool | Callable[..., tqdm] = True,
     ):
@@ -1899,7 +1895,6 @@ def _render_and_run_requests(
             prompts=prompts,
             params=params,
             lora_requests=lora_requests,
-            tokenization_kwargs=tokenization_kwargs,
             priorities=priorities,
         )
 
@@ -1911,7 +1906,6 @@ def _render_and_add_requests(
         params: Sequence[SamplingParams | PoolingParams],
         *,
         lora_requests: Sequence[LoRARequest | None] | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
         priorities: Sequence[int] | None = None,
     ) -> list[str]:
         added_request_ids: list[str] = []
@@ -1922,7 +1916,6 @@ def _render_and_add_requests(
                     prompt,
                     params[i],
                     lora_request=None if lora_requests is None else lora_requests[i],
-                    tokenization_kwargs=tokenization_kwargs,
                     priority=0 if priorities is None else priorities[i],
                 )
                 added_request_ids.append(request_id)
@@ -1938,7 +1931,6 @@ def _add_request(
         prompt: ProcessorInputs,
         params: SamplingParams | PoolingParams,
         lora_request: LoRARequest | None = None,
-        tokenization_kwargs: dict[str, Any] | None = None,
         priority: int = 0,
     ) -> str:
         if isinstance(params, SamplingParams):
@@ -1947,27 +1939,11 @@ def _add_request(
 
         request_id = str(next(self.request_counter))
 
-        if params.truncate_prompt_tokens is not None:
-            params_type = type(params).__name__
-            warnings.warn(
-                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
-                "is deprecated and will be removed in v0.16. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
-            tokenization_kwargs = merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=params.truncate_prompt_tokens),
-            )
-
         return self.llm_engine.add_request(
             request_id,
             prompt,
             params,
             lora_request=lora_request,
-            tokenization_kwargs=tokenization_kwargs,
             priority=priority,
         )
 

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -17,7 +17,7 @@
     from torch.distributed import PrefixStore, ProcessGroup
 
     from vllm.config import VllmConfig
-    from vllm.inputs import ProcessorInputs, PromptType
+    from vllm.inputs import ProcessorInputs
     from vllm.pooling_params import PoolingParams
     from vllm.sampling_params import SamplingParams
     from vllm.utils.argparse_utils import FlexibleArgumentParser
@@ -568,9 +568,8 @@ def opaque_attention_op(cls) -> bool:
     @classmethod
     def validate_request(
         cls,
-        prompt: "PromptType | ProcessorInputs",
-        params: "SamplingParams | PoolingParams",
         processed_inputs: "ProcessorInputs",
+        params: "SamplingParams | PoolingParams",
     ) -> None:
         """Raises if this request is unsupported on this platform"""
 

@@ -27,7 +27,7 @@
 from vllm.outputs import STREAM_FINISHED, PoolingRequestOutput, RequestOutput
 from vllm.plugins.io_processors import get_io_processor
 from vllm.pooling_params import PoolingParams
-from vllm.renderers import merge_kwargs, renderer_from_config
+from vllm.renderers import renderer_from_config
 from vllm.renderers.inputs.preprocess import extract_prompt_components
 from vllm.sampling_params import RequestOutputKind, SamplingParams
 from vllm.tasks import SupportedTask
@@ -319,21 +319,6 @@ async def add_request(
                 "prompt logprobs"
             )
 
-        if params.truncate_prompt_tokens is not None:
-            params_type = type(params).__name__
-            warnings.warn(
-                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
-                "is deprecated and will be removed in v0.16. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
-            tokenization_kwargs = merge_kwargs(
-                tokenization_kwargs,
-                dict(truncate_prompt_tokens=params.truncate_prompt_tokens),
-            )
-
         if isinstance(prompt, AsyncGenerator):
             if reasoning_ended is not None:
                 raise NotImplementedError
@@ -353,6 +338,12 @@ async def add_request(
 
         # Convert Input --> Request.
         if isinstance(prompt, EngineCoreRequest):
+            logger.warning_once(
+                "Passing EngineCoreRequest to AsyncLLM.generate() and .add_requests() "
+                "is deprecated and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
             request = prompt
             if request_id != request.request_id:
                 logger.warning_once(

@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import time
+import warnings
 from collections.abc import Mapping
 from typing import Any, Literal
 
@@ -28,6 +29,7 @@
 from vllm.tasks import POOLING_TASKS, SupportedTask
 from vllm.tokenizers import TokenizerLike
 from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid
+from vllm.utils.func_utils import supports_kw
 from vllm.utils.jsontree import json_iter_leaves
 from vllm.v1.engine import EngineCoreRequest
 
@@ -72,6 +74,33 @@ def __init__(
             mm_registry=mm_registry,
         )
 
+        from vllm.platforms import current_platform
+
+        platform_validate_request = current_platform.validate_request
+        if supports_kw(platform_validate_request, "prompt"):
+            logger.warning_once(
+                "The signature of Platform.validate_request has changed from "
+                "`(cls, prompt, params, processed_inputs) -> None` to "
+                "`(cls, processed_inputs, params) -> None`. The old signature "
+                "will no longer be supported starting from v0.18."
+            )
+
+            orig_validate_request = platform_validate_request
+
+            def compat_validate_request(
+                processed_inputs: ProcessorInputs,
+                params: SamplingParams | PoolingParams,
+            ):
+                return orig_validate_request(
+                    processed_inputs,
+                    params,
+                    processed_inputs,  # type: ignore
+                )  # type: ignore
+
+            platform_validate_request = compat_validate_request
+
+        self._platform_validate_request = platform_validate_request
+
     @property
     def tokenizer(self) -> TokenizerLike | None:
         return self.renderer.tokenizer
@@ -87,6 +116,16 @@ def _validate_params(
         supported_tasks: tuple[SupportedTask, ...] | None,
     ):
         """Raise `ValueError` if SamplingParams or PoolingParams is not valid."""
+        if params.truncate_prompt_tokens is not None:
+            params_type = type(params).__name__
+            warnings.warn(
+                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
+                "is deprecated and will be removed in v0.17. "
+                "Please pass it via `tokenization_kwargs` instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+
         if isinstance(params, SamplingParams):
             params.verify(
                 self.model_config,
@@ -211,11 +250,24 @@ def process_inputs(
             )
 
         if isinstance(prompt, dict) and "type" in prompt:
+            if tokenization_kwargs:
+                logger.warning_once(
+                    "Passing tokenization_kwargs to InputProcessor is deprecated "
+                    "and will be removed in v0.18. You should instead pass "
+                    "them to Renderer.render_cmpl() or Renderer.render_chat()."
+                )
+
             if arrival_time is None:
                 arrival_time = prompt.get("arrival_time", time.time())  # type: ignore[assignment]
 
             processed_inputs: ProcessorInputs = prompt  # type: ignore[assignment]
         else:
+            logger.warning_once(
+                "Passing raw prompts to InputProcessor is deprecated "
+                "and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
             if arrival_time is None:
                 arrival_time = time.time()
 
@@ -224,13 +276,7 @@ def process_inputs(
                 tokenization_kwargs=tokenization_kwargs,
             )
 
-        from vllm.platforms import current_platform
-
-        current_platform.validate_request(
-            prompt=prompt,
-            params=params,
-            processed_inputs=processed_inputs,
-        )
+        self._platform_validate_request(processed_inputs, params)
 
         encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs)
         self._validate_model_inputs(encoder_inputs, decoder_inputs)

@@ -234,10 +234,16 @@ def add_request(
 
         # Process raw inputs into the request.
         if isinstance(prompt, EngineCoreRequest):
+            logger.warning_once(
+                "Passing EngineCoreRequest to LLMEngine.generate() and .add_requests() "
+                "is deprecated and will be removed in v0.18. You should instead pass "
+                "the outputs of Renderer.render_cmpl() or Renderer.render_chat()."
+            )
+
             request = prompt
             if request_id != request.request_id:
                 logger.warning_once(
-                    "AsyncLLM.add_request() was passed a request_id parameter that "
+                    "LLMEngine.add_request() was passed a request_id parameter that "
                     "does not match the EngineCoreRequest.request_id attribute. The "
                     "latter will be used, and the former will be ignored."
                 )