From 81eb54d66c3f815c7629d2e284ad0561ddd216a8 Mon Sep 17 00:00:00 2001 From: Hubert Lu Date: Thu, 9 Oct 2025 00:34:05 +0000 Subject: [PATCH 1/2] Clean up vllm dependencies for AMD in moe_runner/triton.py --- python/sglang/srt/layers/moe/moe_runner/triton.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/layers/moe/moe_runner/triton.py b/python/sglang/srt/layers/moe/moe_runner/triton.py index 116fdcaa0198..2687cb68615c 100644 --- a/python/sglang/srt/layers/moe/moe_runner/triton.py +++ b/python/sglang/srt/layers/moe/moe_runner/triton.py @@ -32,23 +32,22 @@ _is_cuda = is_cuda() _is_cpu_amx_available = cpu_has_amx_support() _is_cpu = is_cpu() -_use_aiter = bool(int(os.getenv("SGLANG_MOE_USE_AITER", "0"))) +_use_aiter = bool(int(os.getenv("SGLANG_USE_AITER", "0"))) _MOE_PADDING_SIZE = 128 if bool(int(os.getenv("SGLANG_MOE_PADDING", "0"))) else 0 -if _is_cuda: +if _is_cuda or _is_hip: from sgl_kernel import gelu_and_mul, silu_and_mul elif _is_cpu and _is_cpu_amx_available: pass elif _is_hip: - from vllm import _custom_ops as vllm_ops # gelu_and_mul, silu_and_mul - if _use_aiter: try: from aiter import moe_sum except ImportError: raise ImportError("aiter is required when SGLANG_USE_AITER is set to True") - + else: + from vllm import _custom_ops as vllm_ops # moe_sum if _is_cuda or _is_hip: from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size @@ -202,7 +201,7 @@ def run( gemm1_alpha, gemm1_limit, ) - elif _is_cuda: + elif _is_cuda or _is_hip: silu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2) else: vllm_ops.silu_and_mul( @@ -211,7 +210,7 @@ def run( elif activation == "gelu": assert gemm1_alpha is None, "gemm1_alpha is not supported for gelu" assert gemm1_limit is None, "gemm1_limit is not supported for gelu" - if _is_cuda: + if _is_cuda or _is_hip: gelu_and_mul(intermediate_cache1.view(-1, N), intermediate_cache2) else: vllm_ops.gelu_and_mul( From 9a39143abe6679b0d97deae938f955ced0fa1bc3 Mon Sep 17 00:00:00 2001 From: Hubert Lu Date: Wed, 7 Jan 2026 17:49:59 +0000 Subject: [PATCH 2/2] Refactor the logic of import for hip --- .../srt/layers/moe/moe_runner/triton.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/python/sglang/srt/layers/moe/moe_runner/triton.py b/python/sglang/srt/layers/moe/moe_runner/triton.py index 19a33c603330..cdf3e9a471f3 100644 --- a/python/sglang/srt/layers/moe/moe_runner/triton.py +++ b/python/sglang/srt/layers/moe/moe_runner/triton.py @@ -38,16 +38,19 @@ if _is_cuda or _is_hip: from sgl_kernel import gelu_and_mul, silu_and_mul + + if _is_hip: + if _use_aiter: + try: + from aiter import moe_sum + except ImportError: + raise ImportError( + "aiter is required when SGLANG_USE_AITER is set to True" + ) + else: + from vllm import _custom_ops as vllm_ops # moe_sum elif _is_cpu and _is_cpu_amx_available: pass -elif _is_hip: - if _use_aiter: - try: - from aiter import moe_sum - except ImportError: - raise ImportError("aiter is required when SGLANG_USE_AITER is set to True") - else: - from vllm import _custom_ops as vllm_ops # moe_sum if _is_cuda or _is_hip: from sgl_kernel import ( # noqa: F401