From 83443ab9535d8745a74d012bc1ae746eeb76bfd1 Mon Sep 17 00:00:00 2001 From: michaelzhang-ai Date: Mon, 16 Feb 2026 10:50:13 -0600 Subject: [PATCH 1/4] Add pull request triggers for nightly test workflows and implement HIP/ROCm support in RotaryEmbedding class --- .../workflows/nightly-test-amd-rocm720.yml | 3 +++ .github/workflows/nightly-test-amd.yml | 3 +++ python/sglang/srt/layers/rotary_embedding.py | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/.github/workflows/nightly-test-amd-rocm720.yml b/.github/workflows/nightly-test-amd-rocm720.yml index f0bac0328b79..155ac4df8b2c 100644 --- a/.github/workflows/nightly-test-amd-rocm720.yml +++ b/.github/workflows/nightly-test-amd-rocm720.yml @@ -1,6 +1,9 @@ name: Nightly Test (AMD ROCm 7.2) on: + pull_request: + branches: + - main schedule: - cron: '0 2 * * *' push: diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml index 505cf1908cdd..ae731c217109 100644 --- a/.github/workflows/nightly-test-amd.yml +++ b/.github/workflows/nightly-test-amd.yml @@ -1,6 +1,9 @@ name: Nightly Test (AMD) on: + pull_request: + branches: + - main schedule: - cron: '0 0 * * *' push: diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py index 4a881200f25e..b32439773b9e 100644 --- a/python/sglang/srt/layers/rotary_embedding.py +++ b/python/sglang/srt/layers/rotary_embedding.py @@ -389,6 +389,25 @@ def forward_cuda( ) return query, key + def forward_hip( + self, + positions: torch.Tensor, + query: torch.Tensor, + key: torch.Tensor, + offsets: Optional[torch.Tensor] = None, + fused_set_kv_buffer_arg: Optional[FusedSetKVBufferArg] = None, + ) -> Tuple[torch.Tensor, torch.Tensor]: + """HIP/ROCm implementation. + + The JIT kernels (sglang.jit_kernel.pos_enc) used in forward_cuda's + fallback path depend on tvm_ffi which invokes nvidia-smi to detect + CUDA compute capability. This fails on AMD GPUs, so we use the + pure-PyTorch native implementation instead. + """ + return self.forward_native( + positions, query, key, offsets, fused_set_kv_buffer_arg + ) + def extra_repr(self) -> str: s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}" s += f", max_position_embeddings={self.max_position_embeddings}" From 83328bcb0d9c0053257081060986f5d711a4b46c Mon Sep 17 00:00:00 2001 From: michaelzhang-ai Date: Mon, 16 Feb 2026 11:04:01 -0600 Subject: [PATCH 2/4] Remove pull request triggers from nightly test workflows for AMD and AMD ROCm 7.2 --- .github/workflows/nightly-test-amd-rocm720.yml | 3 --- .github/workflows/nightly-test-amd.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/nightly-test-amd-rocm720.yml b/.github/workflows/nightly-test-amd-rocm720.yml index 155ac4df8b2c..f0bac0328b79 100644 --- a/.github/workflows/nightly-test-amd-rocm720.yml +++ b/.github/workflows/nightly-test-amd-rocm720.yml @@ -1,9 +1,6 @@ name: Nightly Test (AMD ROCm 7.2) on: - pull_request: - branches: - - main schedule: - cron: '0 2 * * *' push: diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml index ae731c217109..505cf1908cdd 100644 --- a/.github/workflows/nightly-test-amd.yml +++ b/.github/workflows/nightly-test-amd.yml @@ -1,9 +1,6 @@ name: Nightly Test (AMD) on: - pull_request: - branches: - - main schedule: - cron: '0 0 * * *' push: From e19d2f53dd6cb2d4d38fe2568d862e8b4f4d98bb Mon Sep 17 00:00:00 2001 From: michaelzhang-ai Date: Mon, 16 Feb 2026 18:47:43 -0600 Subject: [PATCH 3/4] Add pull request triggers for nightly test workflows for AMD and AMD ROCm 7.2; refactor forward_hip method in RotaryEmbedding class to accept *args/**kwargs for better subclass compatibility. --- .github/workflows/nightly-test-amd-rocm720.yml | 3 +++ .github/workflows/nightly-test-amd.yml | 3 +++ python/sglang/srt/layers/rotary_embedding.py | 16 +++++----------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/nightly-test-amd-rocm720.yml b/.github/workflows/nightly-test-amd-rocm720.yml index f0bac0328b79..155ac4df8b2c 100644 --- a/.github/workflows/nightly-test-amd-rocm720.yml +++ b/.github/workflows/nightly-test-amd-rocm720.yml @@ -1,6 +1,9 @@ name: Nightly Test (AMD ROCm 7.2) on: + pull_request: + branches: + - main schedule: - cron: '0 2 * * *' push: diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml index 505cf1908cdd..ae731c217109 100644 --- a/.github/workflows/nightly-test-amd.yml +++ b/.github/workflows/nightly-test-amd.yml @@ -1,6 +1,9 @@ name: Nightly Test (AMD) on: + pull_request: + branches: + - main schedule: - cron: '0 0 * * *' push: diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py index b32439773b9e..5baa93154d31 100644 --- a/python/sglang/srt/layers/rotary_embedding.py +++ b/python/sglang/srt/layers/rotary_embedding.py @@ -389,24 +389,18 @@ def forward_cuda( ) return query, key - def forward_hip( - self, - positions: torch.Tensor, - query: torch.Tensor, - key: torch.Tensor, - offsets: Optional[torch.Tensor] = None, - fused_set_kv_buffer_arg: Optional[FusedSetKVBufferArg] = None, - ) -> Tuple[torch.Tensor, torch.Tensor]: + def forward_hip(self, *args, **kwargs): """HIP/ROCm implementation. The JIT kernels (sglang.jit_kernel.pos_enc) used in forward_cuda's fallback path depend on tvm_ffi which invokes nvidia-smi to detect CUDA compute capability. This fails on AMD GPUs, so we use the pure-PyTorch native implementation instead. + + Uses *args/**kwargs because subclasses (MRotaryEmbedding, etc.) + have different forward_native() signatures. """ - return self.forward_native( - positions, query, key, offsets, fused_set_kv_buffer_arg - ) + return self.forward_native(*args, **kwargs) def extra_repr(self) -> str: s = f"head_size={self.head_size}, rotary_dim={self.rotary_dim}" From 6714e4a48df0deef8b547935bc18b5ca10d27a21 Mon Sep 17 00:00:00 2001 From: michaelzhang-ai Date: Mon, 16 Feb 2026 18:54:58 -0600 Subject: [PATCH 4/4] Remove pull request triggers from nightly test workflows for AMD and AMD ROCm 7.2 --- .github/workflows/nightly-test-amd-rocm720.yml | 3 --- .github/workflows/nightly-test-amd.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/nightly-test-amd-rocm720.yml b/.github/workflows/nightly-test-amd-rocm720.yml index 155ac4df8b2c..f0bac0328b79 100644 --- a/.github/workflows/nightly-test-amd-rocm720.yml +++ b/.github/workflows/nightly-test-amd-rocm720.yml @@ -1,9 +1,6 @@ name: Nightly Test (AMD ROCm 7.2) on: - pull_request: - branches: - - main schedule: - cron: '0 2 * * *' push: diff --git a/.github/workflows/nightly-test-amd.yml b/.github/workflows/nightly-test-amd.yml index ae731c217109..505cf1908cdd 100644 --- a/.github/workflows/nightly-test-amd.yml +++ b/.github/workflows/nightly-test-amd.yml @@ -1,9 +1,6 @@ name: Nightly Test (AMD) on: - pull_request: - branches: - - main schedule: - cron: '0 0 * * *' push: