diff --git a/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py index b4394b5fd382..e08ffb2c8ced 100644 --- a/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/experts/deep_gemm_moe.py @@ -354,9 +354,9 @@ def activation_format() -> mk.FusedMoEActivationFormat: def _supports_current_device() -> bool: from vllm.platforms import current_platform - return ( - is_deep_gemm_supported() - and current_platform.is_device_capability_family(100) + return is_deep_gemm_supported() and ( + current_platform.is_device_capability_family(100) + or current_platform.is_device_capability_family(120) ) @staticmethod diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 4f9b9d7bf234..0d1c098611cf 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -545,8 +545,11 @@ def support_static_graph_mode(cls) -> bool: @classmethod def support_deep_gemm(cls) -> bool: - """Currently, only Hopper and Blackwell GPUs are supported.""" - return cls.is_device_capability(90) or cls.is_device_capability_family(100) + """Currently, Hopper, datacenter Blackwell (SM 100+) and consumer + Blackwell (SM 12x — RTX 50-series, GB10/DGX Spark) are supported.""" + return (cls.is_device_capability(90) + or cls.is_device_capability_family(100) + or cls.is_device_capability_family(120)) @classmethod def is_integrated_gpu(cls, device_id: int = 0) -> bool: