diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index 5862abe20518..f50e162fb44c 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -552,9 +552,9 @@ def _supports_current_device() -> bool: cap = p.get_device_capability() if cap is None: return False - # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) - # and ROCm gfx942/gfx950 (which map to 9.4/9.5). - return (9, 0) <= (cap.major, cap.minor) < (11, 0) + # (9,0) <= cap < (13,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) + # and ROCm gfx942/gfx950 (9.4/9.5) + gfx1200/gfx1201 (12.0). + return (9, 0) <= (cap.major, cap.minor) < (13, 0) @staticmethod def _supports_no_act_and_mul() -> bool: @@ -884,9 +884,9 @@ def _supports_current_device() -> bool: cap = p.get_device_capability() if cap is None: return False - # (9,0) <= cap < (11,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) - # and ROCm gfx942/gfx950 (which map to 9.4/9.5). - return (9, 0) <= (cap.major, cap.minor) < (11, 0) + # (9,0) <= cap < (13,0) covers CUDA SM90 (Hopper), SM100+ (Blackwell) + # and ROCm gfx942/gfx950 (9.4/9.5) + gfx1200/gfx1201 (12.0). + return (9, 0) <= (cap.major, cap.minor) < (13, 0) @staticmethod def _supports_no_act_and_mul() -> bool: diff --git a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py index ddc6588dc517..39c946dcbdd7 100644 --- a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py +++ b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py @@ -202,7 +202,7 @@ def select_mxfp4_moe_backend( triton_kernels_supported = has_triton_kernels() and ( 9, 0, - ) <= current_platform.get_device_capability() < (11, 0) + ) <= current_platform.get_device_capability() < (13, 0) # LoRA: separate experts backend path if config.is_lora_enabled: