Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,10 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
compilation_config = vllm_config.compilation_config
parallel_config = vllm_config.parallel_config
is_eager_execution = compilation_config == CUDAGraphMode.NONE
use_aiter_fused_moe = rocm_aiter_ops.is_fused_moe_enabled()
use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled()
use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled()
use_aiter_fused_se = rocm_aiter_ops.is_fusion_moe_shared_experts_enabled()

if compilation_config.cudagraph_mode.has_full_cudagraphs():
# decode context parallel does not support full cudagraphs
Expand Down Expand Up @@ -458,6 +460,22 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
if use_aiter_fp8_linear and "-quant_fp8" not in compilation_config.custom_ops:
compilation_config.custom_ops.append("+quant_fp8")

if use_aiter_fused_se and "-grouped_topk" in compilation_config.custom_ops:
logger.warning_once(
"VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS is enabled, which "
"requires the 'grouped_topk' custom op. Overriding the "
"user-provided '-grouped_topk'."
)
compilation_config.custom_ops.remove("-grouped_topk")
# Ensure grouped_topk is always enabled when using AITER if
# its not disabled by user
if (
use_aiter_fused_moe
and "+grouped_topk" not in compilation_config.custom_ops
and "-grouped_topk" not in compilation_config.custom_ops
):
compilation_config.custom_ops.append("+grouped_topk")

@classmethod
def verify_model_arch(cls, model_arch: str) -> None:
if model_arch in _ROCM_UNSUPPORTED_MODELS:
Expand Down
Loading