From b5db221f23fcb312befe06fbd85843d15ac924da Mon Sep 17 00:00:00 2001 From: HAI Date: Tue, 17 Feb 2026 01:06:43 -0800 Subject: [PATCH] =?UTF-8?q?Revert=20"[AMD]=20Fix=20RotaryEmbedding=20crash?= =?UTF-8?q?=20on=20AMD/ROCm=20(regression=20from=20#17934)=20=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5e3103a7872c645012ce316bc87da41a7cbaae52. --- python/sglang/srt/layers/rotary_embedding.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py index 5baa93154d31..4a881200f25e 100644 --- a/python/sglang/srt/layers/rotary_embedding.py +++ b/python/sglang/srt/layers/rotary_embedding.py @@ -389,19 +389,6 @@ def forward_cuda( ) return query, key - def forward_hip(self, *args, **kwargs): - """HIP/ROCm implementation. - - The JIT kernels (sglang.jit_kernel.pos_enc) used in forward_cuda's - fallback path depend on tvm_ffi which invokes nvidia-smi to detect - CUDA compute capability. This fails on AMD GPUs, so we use the - pure-PyTorch native implementation instead. - - Uses *args/**kwargs because subclasses (MRotaryEmbedding, etc.) - have different forward_native() signatures. - """ - return self.forward_native(*args, **kwargs) - def extra_repr(self) -> str: s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}" s += f", max_position_embeddings={self.max_position_embeddings}"