From 0a559972a70a8a3451ed639f5a1094b42fe953df Mon Sep 17 00:00:00 2001
From: Matthew Bonanni <mbonanni@redhat.com>
Date: Wed, 10 Dec 2025 13:07:01 -0500
Subject: [PATCH] Deprecate use_v1

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
---
 docs/design/plugin_system.md |  1 -
 vllm/attention/selector.py   | 46 ++++++++++--------------------------
 2 files changed, 12 insertions(+), 35 deletions(-)

diff --git a/docs/design/plugin_system.md b/docs/design/plugin_system.md
index 3485c40c3681..c91a4af66b3a 100644
--- a/docs/design/plugin_system.md
+++ b/docs/design/plugin_system.md
@@ -152,5 +152,4 @@ The interface for the model/module may change during vLLM's development. If you
 ## Deprecation announcement
 
 !!! warning "Deprecations"
-    - `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It will be removed in v0.13.0 or v1.0.0.
     - `_Backend` in `vllm.attention` is deprecated. It will be removed in v0.13.0 or v1.0.0. Please use `vllm.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead.
diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py
index f6aba271d2e9..bbf95ff00900 100644
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-import inspect
 from functools import cache
 from typing import cast, get_args
 
@@ -73,39 +72,18 @@ def _cached_get_attn_backend(
 ) -> type[AttentionBackend]:
     from vllm.platforms import current_platform
 
-    sig = inspect.signature(current_platform.get_attn_backend_cls)
-    if "use_v1" in sig.parameters:
-        logger.warning_once(
-            "use_v1 parameter for get_attn_backend_cls is deprecated and will "
-            "be removed in v0.13.0 or v1.0.0, whichever is soonest. Please "
-            "remove it from your plugin code."
-        )
-        attention_cls = current_platform.get_attn_backend_cls(
-            backend,
-            head_size,
-            dtype,
-            kv_cache_dtype,
-            block_size,
-            True,  # use_v1
-            use_mla,
-            has_sink,
-            use_sparse,
-            use_mm_prefix,
-            attn_type,
-        )
-    else:
-        attention_cls = current_platform.get_attn_backend_cls(
-            backend,
-            head_size,
-            dtype,
-            kv_cache_dtype,
-            block_size,
-            use_mla,
-            has_sink,
-            use_sparse,
-            use_mm_prefix,
-            attn_type,
-        )
+    attention_cls = current_platform.get_attn_backend_cls(
+        backend,
+        head_size,
+        dtype,
+        kv_cache_dtype,
+        block_size,
+        use_mla,
+        has_sink,
+        use_sparse,
+        use_mm_prefix,
+        attn_type,
+    )
     if not attention_cls:
         raise ValueError(
             f"Invalid attention backend for {current_platform.device_name}"