diff --git a/tests/models/quantization/test_nvfp4.py b/tests/models/quantization/test_nvfp4.py
index 5ca307a4b191..afbcb1e5aeca 100644
--- a/tests/models/quantization/test_nvfp4.py
+++ b/tests/models/quantization/test_nvfp4.py
@@ -90,9 +90,9 @@ def test_models(example_prompts, model_name) -> None:
 EAGER = [True, False]
 
 SM_100_NVFP4_BACKENDS = [
-    "flashinfer_cudnn",
-    "flashinfer_trtllm",
-    "flashinfer_cutlass",
+    "flashinfer-cudnn",
+    "flashinfer-trtllm",
+    "flashinfer-cutlass",
 ]
 
 
@@ -102,12 +102,12 @@ def test_models(example_prompts, model_name) -> None:
     "backend",
     [
         "emulation",
-        "flashinfer_cudnn",
-        "flashinfer_trtllm",  # the small seq_len ensures trtllm_8x4_layout backend is used
-        "flashinfer_cutlass",
+        "flashinfer-cudnn",
+        "flashinfer-trtllm",  # the small seq_len ensures trtllm_8x4_layout backend is used
+        "flashinfer-cutlass",
     ],
 )
-def test_nvfp4(vllm_runner, model, eager, backend):
+def test_nvfp4(vllm_runner, model, eager, backend, monkeypatch):
     if (
         not current_platform.has_device_capability(100)
         and backend in SM_100_NVFP4_BACKENDS
@@ -116,7 +116,8 @@ def test_nvfp4(vllm_runner, model, eager, backend):
             f"The backend {backend} is not supported with current_platform.has_device_capability(100) == False"
         )
 
-    with vllm_runner(model, enforce_eager=eager, linear_backend=backend) as llm:
+    monkeypatch.setenv("VLLM_NVFP4_GEMM_BACKEND", backend)
+    with vllm_runner(model, enforce_eager=eager) as llm:
         output = llm.generate_greedy(["1 2 3 4 5"], max_tokens=2)
     assert output[0][1] == "1 2 3 4 5 6"
 
diff --git a/vllm/config/kernel.py b/vllm/config/kernel.py
index 66b805383fcb..f450dd9b32c2 100644
--- a/vllm/config/kernel.py
+++ b/vllm/config/kernel.py
@@ -135,24 +135,6 @@ def with_default(
     "emulation",
 ]
 
-LinearBackend = Literal[
-    "auto",
-    "cutlass",
-    "flashinfer_cutlass",
-    "flashinfer_trtllm",
-    "flashinfer_cudnn",
-    "marlin",
-    "triton",
-    "deep_gemm",
-    "torch",
-    "aiter",
-    "machete",
-    "fbgemm",
-    "conch",
-    "exllama",
-    "emulation",
-]
-
 
 @config
 class KernelConfig:
@@ -186,25 +168,6 @@ class KernelConfig:
                    running QDQ on activations.
     """
 
-    linear_backend: LinearBackend = "auto"
-    """Backend for quantized linear layer GEMM kernels. Available options:
-
-    - "auto": Automatically select the best backend based on model and hardware
-    - "cutlass": Use CUTLASS-based kernels
-    - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
-    - "flashinfer_trtllm": Use FlashInfer with TensorRT-LLM kernels
-    - "flashinfer_cudnn": Use FlashInfer with cuDNN kernels
-    - "marlin": Use Marlin kernels
-    - "triton": Use Triton-based kernels
-    - "deep_gemm": Use DeepGEMM kernels
-    - "torch": Use PyTorch native scaled_mm kernels
-    - "aiter": Use AMD AITer kernels (ROCm only)
-    - "machete": Use Machete kernels (mixed-precision)
-    - "fbgemm": Use FBGEMM kernels
-    - "conch": Use Conch mixed-precision kernels
-    - "exllama": Use Exllama mixed-precision kernels
-    - "emulation": Use slow dequant-to-BF16 emulation (for testing only)"""
-
     @field_validator("moe_backend", mode="before")
     @classmethod
     def _normalize_moe_backend(cls, value: Any) -> Any:
@@ -212,13 +175,6 @@ def _normalize_moe_backend(cls, value: Any) -> Any:
             return value.lower().replace("-", "_")
         return value
 
-    @field_validator("linear_backend", mode="before")
-    @classmethod
-    def _normalize_linear_backend(cls, value: Any) -> Any:
-        if isinstance(value, str):
-            return value.lower().replace("-", "_")
-        return value
-
     def compute_hash(self) -> str:
         """
         Produces a hash unique to the pass configuration.
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index d6e3770d4c35..2b5e242bb2f4 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -71,7 +71,7 @@
     PrefixCachingHashAlgo,
 )
 from vllm.config.device import Device
-from vllm.config.kernel import IrOpPriorityConfig, LinearBackend, MoEBackend
+from vllm.config.kernel import IrOpPriorityConfig, MoEBackend
 from vllm.config.lora import MaxLoRARanks
 from vllm.config.mamba import MambaBackendEnum
 from vllm.config.model import (
@@ -477,7 +477,6 @@ class EngineArgs:
     enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
     enable_ep_weight_filter: bool = ParallelConfig.enable_ep_weight_filter
     moe_backend: MoEBackend = KernelConfig.moe_backend
-    linear_backend: LinearBackend = KernelConfig.linear_backend
     all2all_backend: All2AllBackend = ParallelConfig.all2all_backend
     enable_elastic_ep: bool = ParallelConfig.enable_elastic_ep
     enable_dbo: bool = ParallelConfig.enable_dbo
@@ -1413,9 +1412,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         moe_backend_kwargs = kernel_kwargs["moe_backend"]
         moe_backend_kwargs["type"] = lambda s: s.lower().replace("-", "_")
         kernel_group.add_argument("--moe-backend", **moe_backend_kwargs)
-        linear_backend_kwargs = kernel_kwargs["linear_backend"]
-        linear_backend_kwargs["type"] = lambda s: s.lower().replace("-", "_")
-        kernel_group.add_argument("--linear-backend", **linear_backend_kwargs)
 
         # vLLM arguments
         vllm_kwargs = get_kwargs(VllmConfig)
@@ -2089,8 +2085,6 @@ def create_engine_config(
             kernel_config.enable_flashinfer_autotune = self.enable_flashinfer_autotune
         if self.moe_backend != "auto":
             kernel_config.moe_backend = self.moe_backend
-        if self.linear_backend != "auto":
-            kernel_config.linear_backend = self.linear_backend
 
         # Transfer top-level ir_op_priority into KernelConfig.ir_op_priority
         for op_name, op_priority in asdict(self.ir_op_priority).items():
diff --git a/vllm/model_executor/kernels/linear/__init__.py b/vllm/model_executor/kernels/linear/__init__.py
index 277fb28f3c91..83d925637369 100644
--- a/vllm/model_executor/kernels/linear/__init__.py
+++ b/vllm/model_executor/kernels/linear/__init__.py
@@ -13,7 +13,6 @@
 import stability.
 """
 
-import warnings
 from typing import TypeVar
 
 import torch
@@ -162,96 +161,6 @@
 
 logger = init_logger(__name__)
 
-
-def _get_linear_backend() -> str:
-    """Get the linear_backend setting from the current vllm config."""
-    from vllm.config import get_current_vllm_config_or_none
-
-    config = get_current_vllm_config_or_none()
-    if config is not None:
-        return config.kernel_config.linear_backend
-    return "auto"
-
-
-# Mapping from linear_backend name to the set of kernel classes it covers.
-# When a user sets --linear-backend <name>, only kernels in the corresponding
-# set are considered candidates. If none can implement the layer config,
-# an error is raised to respect the user's explicit intent.
-_LINEAR_BACKEND_KERNEL_MAP: dict[str, set[type]] = {
-    "cutlass": {
-        CutlassInt8ScaledMMLinearKernel,
-        CutlassFP8ScaledMMLinearKernel,
-        CutlassFp8BlockScaledMMKernel,
-        CutlassW4A8LinearKernel,
-        CutlassNvFp4LinearKernel,
-    },
-    "flashinfer_cutlass": {
-        FlashInferFP8ScaledMMLinearKernel,
-        FlashInferFp8DeepGEMMDynamicBlockScaledKernel,
-        FlashInferCutlassMxfp8LinearKernel,
-        FlashInferCutlassNvFp4LinearKernel,
-        FlashInferMxFp4LinearKernel,
-    },
-    "flashinfer_trtllm": {
-        FlashInferTrtllmNvFp4LinearKernel,
-    },
-    "flashinfer_cudnn": {
-        FlashInferCudnnNvFp4LinearKernel,
-    },
-    "marlin": {
-        MarlinFP8ScaledMMLinearKernel,
-        MarlinLinearKernel,
-        MarlinMxfp8LinearKernel,
-        MarlinNvFp4LinearKernel,
-        MarlinMxFp4LinearKernel,
-    },
-    "triton": {
-        TritonInt8ScaledMMLinearKernel,
-        TritonFp8BlockScaledMMKernel,
-        TritonW4A16LinearKernel,
-    },
-    "deep_gemm": {
-        DeepGemmFp8BlockScaledMMKernel,
-    },
-    "torch": {
-        PerTensorTorchFP8ScaledMMLinearKernel,
-        ChannelWiseTorchFP8ScaledMMLinearKernel,
-        RowWiseTorchFP8ScaledMMLinearKernel,
-    },
-    "aiter": {
-        AiterInt8ScaledMMLinearKernel,
-        AiterFp8BlockScaledMMKernel,
-        AiterPerTokenFp8ScaledMMLinearKernel,
-        AiterPreshuffledPerTokenFp8ScaledMMLinearKernel,
-    },
-    "machete": {
-        MacheteLinearKernel,
-    },
-    "fbgemm": {
-        FbgemmNvFp4LinearKernel,
-    },
-    "conch": {
-        ConchLinearKernel,
-    },
-    "exllama": {
-        ExllamaLinearKernel,
-    },
-    "emulation": {
-        EmulationMxfp8LinearKernel,
-        EmulationNvFp4LinearKernel,
-    },
-}
-
-
-def _filter_kernels_by_backend(
-    backend: str,
-    kernels: list[type],
-) -> list[type]:
-    """Filter a kernel priority list to only those matching the backend."""
-    backend_kernels = _LINEAR_BACKEND_KERNEL_MAP.get(backend, set())
-    return [k for k in kernels if k in backend_kernels]
-
-
 # in priority/performance order (when available)
 _POSSIBLE_INT8_KERNELS: dict[PlatformEnum, list[type[Int8ScaledMMLinearKernel]]] = {
     PlatformEnum.CPU: [CPUInt8ScaledMMLinearKernel],
@@ -466,20 +375,7 @@ def choose_scaled_mm_linear_kernel(
             scope="global",
         )
 
-    platform_kernels = possible_kernels[current_platform._enum]
-
-    # Apply --linear-backend filtering when set.
-    linear_backend = _get_linear_backend()
-    if linear_backend != "auto":
-        filtered = _filter_kernels_by_backend(linear_backend, platform_kernels)
-        if not filtered:
-            raise ValueError(
-                f"--linear-backend={linear_backend} was requested but no "
-                f"'{linear_backend}' kernel exists for this layer type."
-            )
-        platform_kernels = filtered
-
-    for kernel in platform_kernels:
+    for kernel in possible_kernels[current_platform._enum]:
         is_supported_and_can_implement, failure_reason = (
             is_supported_and_can_implement_kernel(kernel, config, compute_capability)
         )
@@ -630,21 +526,8 @@ def choose_mp_linear_kernel(
         if _cc is not None:
             compute_capability = _cc[0] * 10 + _cc[1]
 
-    platform_kernels = _POSSIBLE_KERNELS[current_platform._enum]
-
-    # Apply --linear-backend filtering when set.
-    linear_backend = _get_linear_backend()
-    if linear_backend != "auto":
-        filtered = _filter_kernels_by_backend(linear_backend, platform_kernels)
-        if not filtered:
-            raise ValueError(
-                f"--linear-backend={linear_backend} was requested but no "
-                f"'{linear_backend}' kernel exists for mixed-precision layers."
-            )
-        platform_kernels = filtered
-
     failure_reasons = []
-    for kernel in platform_kernels:
+    for kernel in _POSSIBLE_KERNELS[current_platform._enum]:
         if kernel.__name__ in envs.VLLM_DISABLED_KERNELS:
             failure_reasons.append(
                 f" {kernel.__name__} disabled by environment variable"
@@ -681,18 +564,7 @@ def init_mxfp8_linear_kernel() -> Mxfp8LinearKernel:
     config = Mxfp8LinearLayerConfig()
 
     platform = current_platform._enum
-    possible = list(_POSSIBLE_MXFP8_KERNELS.get(platform, []))
-
-    # Apply --linear-backend filtering when set.
-    linear_backend = _get_linear_backend()
-    if linear_backend != "auto":
-        filtered = _filter_kernels_by_backend(linear_backend, possible)
-        if not filtered:
-            raise ValueError(
-                f"--linear-backend={linear_backend} was requested but no "
-                f"'{linear_backend}' kernel exists for MXFP8 layers."
-            )
-        possible = filtered
+    possible = _POSSIBLE_MXFP8_KERNELS.get(platform, [])
 
     failure_reasons = []
     for kernel_cls in possible:
@@ -724,10 +596,8 @@ def init_mxfp8_linear_kernel() -> Mxfp8LinearKernel:
 def init_mxfp4_linear_kernel() -> MxFp4LinearKernel:
     """Select and instantiate the best MXFP4 linear kernel for the
     current platform."""
-    linear_backend = _get_linear_backend()
-
     force_kernel: type[MxFp4LinearKernel] | None = None
-    if linear_backend == "auto" and envs.VLLM_MXFP4_USE_MARLIN:
+    if envs.VLLM_MXFP4_USE_MARLIN:
         force_kernel = MarlinMxFp4LinearKernel
 
     if force_kernel is not None:
@@ -741,17 +611,7 @@ def init_mxfp4_linear_kernel() -> MxFp4LinearKernel:
         return force_kernel(MxFp4LinearLayerConfig())
 
     platform = current_platform._enum
-    possible = list(_POSSIBLE_MXFP4_KERNELS.get(platform, []))
-
-    # Apply --linear-backend filtering when set.
-    if linear_backend != "auto":
-        filtered = _filter_kernels_by_backend(linear_backend, possible)
-        if not filtered:
-            raise ValueError(
-                f"--linear-backend={linear_backend} was requested but no "
-                f"'{linear_backend}' kernel exists for MXFP4 layers."
-            )
-        possible = filtered
+    possible = _POSSIBLE_MXFP4_KERNELS.get(platform, [])
 
     failure_reasons = []
     for kernel_cls in possible:
@@ -826,59 +686,26 @@ def init_nvfp4_linear_kernel() -> NvFp4LinearKernel:
     current platform."""
     config = NvFp4LinearLayerConfig()
 
-    # VLLM_BATCH_INVARIANT unconditionally forces emulation for deterministic
-    # execution. It overrides both --linear-backend and the deprecated env
-    # vars below.
+    # Env-var overrides.
     force_kernel: type[NvFp4LinearKernel] | None = None
-    linear_backend = _get_linear_backend()
     if envs.VLLM_BATCH_INVARIANT:
-        if linear_backend not in ("auto", "emulation"):
-            logger.warning_once(
-                "VLLM_BATCH_INVARIANT overrides --linear-backend=%s; using "
-                "the emulation backend for deterministic execution.",
-                linear_backend,
-            )
-        else:
-            logger.info_once(
-                "VLLM_BATCH_INVARIANT forces NVFP4 linear to use the "
-                "emulation backend for deterministic execution."
-            )
+        logger.info_once(
+            "VLLM_BATCH_INVARIANT forces NVFP4 linear to use the "
+            "emulation backend for deterministic execution."
+        )
         force_kernel = EmulationNvFp4LinearKernel
-    elif linear_backend == "auto":
-        # Deprecated env-var overrides — only honoured when --linear-backend
-        # is "auto". Will be removed in v0.21; users should migrate to
-        # --linear-backend.
-        if envs.VLLM_USE_FBGEMM:
-            warnings.warn(
-                "VLLM_USE_FBGEMM is deprecated and will be removed in "
-                "v0.21. Use --linear-backend fbgemm instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            force_kernel = FbgemmNvFp4LinearKernel
-        elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
-            warnings.warn(
-                "VLLM_USE_NVFP4_CT_EMULATIONS is deprecated and will be "
-                "removed in v0.21. Use --linear-backend emulation instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            force_kernel = EmulationNvFp4LinearKernel
-        elif envs.VLLM_NVFP4_GEMM_BACKEND is not None:
-            warnings.warn(
-                "VLLM_NVFP4_GEMM_BACKEND is deprecated and will be "
-                "removed in v0.21. Use --linear-backend instead.",
-                DeprecationWarning,
-                stacklevel=2,
+    elif envs.VLLM_USE_FBGEMM:
+        force_kernel = FbgemmNvFp4LinearKernel
+    elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
+        force_kernel = EmulationNvFp4LinearKernel
+    elif envs.VLLM_NVFP4_GEMM_BACKEND is not None:
+        backend_name = envs.VLLM_NVFP4_GEMM_BACKEND
+        force_kernel = _NVFP4_BACKEND_TO_KERNEL.get(backend_name)
+        if force_kernel is None:
+            raise ValueError(
+                f"Unknown VLLM_NVFP4_GEMM_BACKEND={backend_name!r}. "
+                f"Valid choices: {list(_NVFP4_BACKEND_TO_KERNEL.keys())}"
             )
-            backend_name = envs.VLLM_NVFP4_GEMM_BACKEND
-            force_kernel = _NVFP4_BACKEND_TO_KERNEL.get(backend_name)
-            if force_kernel is None:
-                raise ValueError(
-                    f"Unknown VLLM_NVFP4_GEMM_BACKEND={backend_name!r}. "
-                    f"Valid choices: "
-                    f"{list(_NVFP4_BACKEND_TO_KERNEL.keys())}"
-                )
 
     if force_kernel is not None:
         is_supported, reason = force_kernel.is_supported()
@@ -890,19 +717,9 @@ def init_nvfp4_linear_kernel() -> NvFp4LinearKernel:
         logger.info_once("Using %s for NVFP4 GEMM", force_kernel.__name__)
         return force_kernel(config)
 
-    # Auto-select from registry (or --linear-backend filtered).
+    # Auto-select from registry.
     platform = current_platform._enum
-    possible = list(_POSSIBLE_NVFP4_KERNELS.get(platform, []))
-
-    # Apply --linear-backend filtering when set.
-    if linear_backend != "auto":
-        filtered = _filter_kernels_by_backend(linear_backend, possible)
-        if not filtered:
-            raise ValueError(
-                f"--linear-backend={linear_backend} was requested but no "
-                f"'{linear_backend}' kernel exists for NVFP4 layers."
-            )
-        possible = filtered
+    possible = _POSSIBLE_NVFP4_KERNELS.get(platform, [])
 
     failure_reasons = []
     for kernel_cls in possible: