vllm-project · laudney · Feb 17, 2026
diff --git a/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py b/vllm/model_executor/kernels/linear/scaled_mm/pytorch.py
@@ -89,13 +89,8 @@ def is_supported(
         if not current_platform.is_rocm():
             return False, "requires ROCm."
 
-        from vllm.platforms.rocm import on_mi3xx
-
-        if not on_mi3xx():
-            return False, "requires MI3xx."
-
-        if compute_capability is not None and compute_capability < 94:
-            return False, "requires compute capability 94 and above."
+        if not current_platform.supports_fp8():
+            return False, "requires FP8-capable GPU."
 
         return True, None
 

@@ -79,10 +79,8 @@ def is_supported(
         if not current_platform.is_rocm():
             return False, "requires ROCm."
 
-        from vllm.platforms.rocm import on_mi3xx
-
-        if not on_mi3xx():
-            return False, "requires MI3xx."
+        if not current_platform.supports_fp8():
+            return False, "requires FP8-capable GPU."
 
         if not envs.VLLM_ROCM_USE_SKINNY_GEMM:
             return False, "requires VLLM_ROCM_USE_SKINNY_GEMM to be enabled."

@@ -916,17 +916,7 @@ def _supports_quant_scheme(
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        p = current_platform
-        if p.is_rocm():
-            from vllm.platforms.rocm import on_gfx9
-
-            is_rocm_on_gfx9 = on_gfx9()
-        else:
-            is_rocm_on_gfx9 = False
-
-        device_supports_fp8 = is_rocm_on_gfx9 or (
-            p.is_cuda() and p.has_device_capability((8, 9))
-        )
+        device_supports_fp8 = current_platform.supports_fp8()
 
         SUPPORTED_W_A_FP8 = [
             (kFp8Static128BlockSym, kFp8Dynamic128Sym),

@@ -1921,19 +1921,7 @@ def _supports_quant_scheme(
         weight_key: QuantKey | None,
         activation_key: QuantKey | None,
     ) -> bool:
-        p = current_platform
-        if p.is_rocm():
-            from vllm.platforms.rocm import on_gfx9
-
-            is_rocm_on_gfx9 = on_gfx9()
-        else:
-            is_rocm_on_gfx9 = False
-
-        device_supports_fp8 = (
-            is_rocm_on_gfx9
-            or (p.is_cuda() and p.has_device_capability((8, 9)))
-            or p.is_xpu()
-        )
+        device_supports_fp8 = current_platform.supports_fp8()
 
         if not device_supports_fp8:
             return (weight_key, activation_key) == (None, None)