diff --git a/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py index ac317ac7762c..4584dfcc511b 100644 --- a/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/experts/gpt_oss_triton_kernels_moe.py @@ -651,11 +651,14 @@ def _supports_current_device() -> bool: cap = p.get_device_capability() if cap is None: return False - # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) - # and ROCm gfx942/gfx950 (which map to 9.4/9.5). + # (9,0) <= cap < (13,0) covers CUDA SM90 (Hopper), SM100+ (datacenter + # Blackwell), SM120/SM121 (consumer Blackwell — RTX 50-series, GB10 + # /DGX Spark) and ROCm gfx942/gfx950 (which map to 9.4/9.5). The lower + # bound is enforced because the Triton MXFP4 kernels rely on + # SM 9.0+ tensor-core / matmul instructions. if not has_triton_kernels(): return False - return (9, 0) <= (cap.major, cap.minor) < (11, 0) + return (9, 0) <= (cap.major, cap.minor) < (13, 0) @staticmethod def _supports_no_act_and_mul() -> bool: @@ -1065,11 +1068,14 @@ def _supports_current_device() -> bool: cap = p.get_device_capability() if cap is None: return False - # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) - # and ROCm gfx942/gfx950 (which map to 9.4/9.5). + # (9,0) <= cap < (13,0) covers CUDA SM90 (Hopper), SM100+ (datacenter + # Blackwell), SM120/SM121 (consumer Blackwell — RTX 50-series, GB10 + # /DGX Spark) and ROCm gfx942/gfx950 (which map to 9.4/9.5). The lower + # bound is enforced because the Triton MXFP4 kernels rely on + # SM 9.0+ tensor-core / matmul instructions. if not has_triton_kernels(): return False - return (9, 0) <= (cap.major, cap.minor) < (11, 0) + return (9, 0) <= (cap.major, cap.minor) < (13, 0) @staticmethod def _supports_no_act_and_mul() -> bool: