From a681b599fc8ce3c374903b601ef20d1ae49aba17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E9=99=B6=E5=AE=87=EF=BC=88=E5=AE=9E=E4=B9=A0?=
 =?UTF-8?q?=EF=BC=89?= <zhutaoyu@xiaohongshu.com>
Date: Thu, 27 Nov 2025 13:01:03 +0800
Subject: [PATCH] using origin aiter kernel for mi300x

---
 python/sglang/srt/layers/quantization/fp8_utils.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/layers/quantization/fp8_utils.py b/python/sglang/srt/layers/quantization/fp8_utils.py
index 4dddd407f296..88a1e1df43c7 100644
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -34,6 +34,7 @@
     get_device_capability,
     is_cuda,
     is_flashinfer_available,
+    is_gfx95_supported,
     is_hip,
 )
 
@@ -45,10 +46,11 @@
 
 if _use_aiter:
     import aiter
+    from aiter import gemm_a8w8_blockscale, gemm_a8w8_bpreshuffle, get_hip_quant
 
-    # from aiter import gemm_a8w8_blockscale, gemm_a8w8_bpreshuffle, get_hip_quant
-    from aiter import gemm_a8w8_bpreshuffle, get_hip_quant
-    from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
+    if is_gfx95_supported():
+        from aiter import gemm_a8w8_bpreshuffle, get_hip_quant
+        from aiter.ops.triton.gemm_a8w8_blockscale import gemm_a8w8_blockscale
 
     aiter_per1x128_quant = get_hip_quant(aiter.QuantType.per_1x128)