vllm-project · Code4me2 · Feb 1, 2026
@@ -19,7 +19,7 @@
     kNvfp4Dynamic,
     kNvfp4Static,
 )
-from vllm.platforms import current_platform
+from vllm.platforms import is_blackwell_cuda
 from vllm.utils.flashinfer import (
     flashinfer_cutedsl_grouped_gemm_nt_masked,
     scaled_fp4_grouped_quantize,
@@ -54,8 +54,8 @@ def activation_format() -> mk.FusedMoEActivationFormat:
 
     @staticmethod
     def _supports_current_device() -> bool:
-        p = current_platform
-        return p.is_cuda() and p.is_device_capability_family(100)
+        """Supports Blackwell-family GPUs (SM100/110/120)."""
+        return is_blackwell_cuda()
 
     @staticmethod
     def _supports_no_act_and_mul() -> bool:

@@ -19,7 +19,7 @@
     kFp8Static128BlockSym,
     kFp8StaticTensorSym,
 )
-from vllm.platforms import current_platform
+from vllm.platforms import is_blackwell_cuda
 from vllm.utils.torch_utils import direct_register_custom_op
 
 #
@@ -28,9 +28,8 @@
 
 
 def _supports_current_device() -> bool:
-    """Supports only Blackwell-family GPUs."""
-    p = current_platform
-    return p.is_cuda() and p.is_device_capability_family(100)
+    """Supports Blackwell-family GPUs (SM100/110/120)."""
+    return is_blackwell_cuda()
 
 
 def _supports_no_act_and_mul() -> bool:

@@ -22,7 +22,7 @@
     kNvfp4Dynamic,
     kNvfp4Static,
 )
-from vllm.platforms import current_platform
+from vllm.platforms import is_blackwell_cuda
 
 if TYPE_CHECKING:
     from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
@@ -42,9 +42,8 @@
 
 
 def _supports_current_device() -> bool:
-    """Supports only Blackwell-family GPUs."""
-    p = current_platform
-    return p.is_cuda() and p.is_device_capability_family(100)
+    """Supports Blackwell-family GPUs (SM100/110/120)."""
+    return is_blackwell_cuda()
 
 
 def _supports_no_act_and_mul() -> bool:

@@ -6,7 +6,7 @@
 
 from vllm import envs
 from vllm.logger import init_logger
-from vllm.platforms import current_platform
+from vllm.platforms import current_platform, is_blackwell_cuda
 from vllm.utils.math_utils import round_up
 
 logger = init_logger(__name__)
@@ -169,10 +169,7 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
 
     flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND
     if flashinfer_moe_backend in backend_map:
-        if (
-            flashinfer_moe_backend == "latency"
-            and not current_platform.is_device_capability_family(100)
-        ):
+        if flashinfer_moe_backend == "latency" and not is_blackwell_cuda():
             logger.info_once(
                 "Flashinfer TRTLLM MOE backend is only supported on "
                 "SM100 and later, using CUTLASS backend instead",

diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py
@@ -276,4 +276,27 @@ def __setattr__(name: str, value):
         raise AttributeError(f"No attribute named '{name}' exists in {__name__}.")
 
 
-__all__ = ["Platform", "PlatformEnum", "current_platform", "CpuArchEnum", "_init_trace"]
+def is_blackwell_cuda() -> bool:
+    """Check if running on a Blackwell-family CUDA GPU (SM100/110/120).
+
+    This includes:
+    - SM100: Data center Blackwell (B100, B200)
+    - SM110: Future Blackwell variant
+    - SM120: Consumer Blackwell (RTX 5090)
+    - SM121: DGX Spark (GB10)
+    """
+    # Access current_platform via __getattr__ by using globals()
+    p = __getattr__("current_platform")
+    return p.is_cuda() and any(
+        p.is_device_capability_family(sm) for sm in (100, 110, 120)
+    )
+
+
+__all__ = [
+    "Platform",
+    "PlatformEnum",
+    "current_platform",
+    "CpuArchEnum",
+    "_init_trace",
+    "is_blackwell_cuda",
+]