From a681b599fc8ce3c374903b601ef20d1ae49aba17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E9=99=B6=E5=AE=87=EF=BC=88=E5=AE=9E=E4=B9=A0?= =?UTF-8?q?=EF=BC=89?= Date: Thu, 27 Nov 2025 13:01:03 +0800 Subject: [PATCH] using origin aiter kernel for mi300x --- python/sglang/srt/layers/quantization/fp8_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py index 4dddd407f296..88a1e1df43c7 100644 --- a/python/sglang/srt/layers/quantization/fp8_utils.py +++ b/python/sglang/srt/layers/quantization/fp8_utils.py @@ -34,6 +34,7 @@ get_device_capability, is_cuda, is_flashinfer_available, + is_gfx95_supported, is_hip, ) @@ -45,10 +46,11 @@ if _use_aiter: import aiter + from aiter import gemm_a8w8_blockscale, gemm_a8w8_bpreshuffle, get_hip_quant - # from aiter import gemm_a8w8_blockscale, gemm_a8w8_bpreshuffle, get_hip_quant - from aiter import gemm_a8w8_bpreshuffle, get_hip_quant - from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale + if is_gfx95_supported(): + from aiter import gemm_a8w8_bpreshuffle, get_hip_quant + from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128)