Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
kNvfp4Dynamic,
kNvfp4Static,
)
from vllm.platforms import current_platform
from vllm.platforms import is_blackwell_cuda
from vllm.utils.flashinfer import (
flashinfer_cutedsl_grouped_gemm_nt_masked,
scaled_fp4_grouped_quantize,
Expand Down Expand Up @@ -54,8 +54,8 @@ def activation_format() -> mk.FusedMoEActivationFormat:

@staticmethod
def _supports_current_device() -> bool:
p = current_platform
return p.is_cuda() and p.is_device_capability_family(100)
"""Supports Blackwell-family GPUs (SM100/110/120)."""
return is_blackwell_cuda()

@staticmethod
def _supports_no_act_and_mul() -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
kFp8Static128BlockSym,
kFp8StaticTensorSym,
)
from vllm.platforms import current_platform
from vllm.platforms import is_blackwell_cuda
from vllm.utils.torch_utils import direct_register_custom_op

#
Expand All @@ -28,9 +28,8 @@


def _supports_current_device() -> bool:
"""Supports only Blackwell-family GPUs."""
p = current_platform
return p.is_cuda() and p.is_device_capability_family(100)
"""Supports Blackwell-family GPUs (SM100/110/120)."""
return is_blackwell_cuda()


def _supports_no_act_and_mul() -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
kNvfp4Dynamic,
kNvfp4Static,
)
from vllm.platforms import current_platform
from vllm.platforms import is_blackwell_cuda

if TYPE_CHECKING:
from vllm.model_executor.layers.fused_moe.oracle.nvfp4 import (
Expand All @@ -42,9 +42,8 @@


def _supports_current_device() -> bool:
"""Supports only Blackwell-family GPUs."""
p = current_platform
return p.is_cuda() and p.is_device_capability_family(100)
"""Supports Blackwell-family GPUs (SM100/110/120)."""
return is_blackwell_cuda()


def _supports_no_act_and_mul() -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from vllm import envs
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.platforms import current_platform, is_blackwell_cuda
from vllm.utils.math_utils import round_up

logger = init_logger(__name__)
Expand Down Expand Up @@ -169,10 +169,7 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:

flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND
if flashinfer_moe_backend in backend_map:
if (
flashinfer_moe_backend == "latency"
and not current_platform.is_device_capability_family(100)
):
if flashinfer_moe_backend == "latency" and not is_blackwell_cuda():
logger.info_once(
"Flashinfer TRTLLM MOE backend is only supported on "
"SM100 and later, using CUTLASS backend instead",
Expand Down
25 changes: 24 additions & 1 deletion vllm/platforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,27 @@ def __setattr__(name: str, value):
raise AttributeError(f"No attribute named '{name}' exists in {__name__}.")


__all__ = ["Platform", "PlatformEnum", "current_platform", "CpuArchEnum", "_init_trace"]
def is_blackwell_cuda() -> bool:
"""Check if running on a Blackwell-family CUDA GPU (SM100/110/120).

This includes:
- SM100: Data center Blackwell (B100, B200)
- SM110: Future Blackwell variant
- SM120: Consumer Blackwell (RTX 5090)
- SM121: DGX Spark (GB10)
"""
# Access current_platform via __getattr__ by using globals()
p = __getattr__("current_platform")
return p.is_cuda() and any(
p.is_device_capability_family(sm) for sm in (100, 110, 120)
)


__all__ = [
"Platform",
"PlatformEnum",
"current_platform",
"CpuArchEnum",
"_init_trace",
"is_blackwell_cuda",
]