Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/pr-test-amd-rocm720.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,45 @@ jobs:
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 13 --timeout-per-file 1800 --continue-on-error

stage-b-test-small-1-gpu-amd-nondeterministic:
needs: [check-changes]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu-amd-nondeterministic') ||
(
!inputs.target_stage &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm

- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh --rocm-version rocm720
env:
GITHUB_WORKSPACE: ${{ github.workspace }}

- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh --skip-aiter-build

- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-nondeterministic --timeout-per-file 1800 --continue-on-error

stage-b-test-small-1-gpu-amd-mi35x:
needs: [check-changes]
if: |
Expand Down Expand Up @@ -801,6 +840,7 @@ jobs:
stage-a-test-1-amd,
jit-kernel-unit-test-amd,
stage-b-test-small-1-gpu-amd,
stage-b-test-small-1-gpu-amd-nondeterministic,
stage-b-test-small-1-gpu-amd-mi35x,
stage-b-test-large-1-gpu-amd,
stage-b-test-large-2-gpu-amd,
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/pr-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,45 @@ jobs:
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 14 --timeout-per-file 1800

stage-b-test-small-1-gpu-amd-nondeterministic:
needs: [check-changes, stage-a-test-1-amd]
if: |
always() &&
(
(inputs.target_stage == 'stage-b-test-small-1-gpu-amd-nondeterministic') ||
(
!inputs.target_stage &&
(!failure() && !cancelled()) &&
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
)
)
strategy:
fail-fast: false
matrix:
runner: [linux-mi325-gpu-1]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}

- name: Ensure VRAM is clear
run: bash scripts/ensure_vram_clear.sh rocm

- name: Start CI container
run: bash scripts/ci/amd/amd_ci_start_container.sh
env:
GITHUB_WORKSPACE: ${{ github.workspace }}

- name: Install dependencies
run: bash scripts/ci/amd/amd_ci_install_dependency.sh

- name: Run test
timeout-minutes: 30
run: |
bash scripts/ci/amd/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-small-1-gpu-amd-nondeterministic --timeout-per-file 1800 --continue-on-error

stage-b-test-small-1-gpu-amd-mi35x:
needs: [check-changes, stage-a-test-1-amd]
if: |
Expand Down Expand Up @@ -890,6 +929,7 @@ jobs:
stage-a-test-1-amd,
jit-kernel-unit-test-amd,
stage-b-test-small-1-gpu-amd,
stage-b-test-small-1-gpu-amd-nondeterministic,
stage-b-test-small-1-gpu-amd-mi35x,
stage-b-test-large-1-gpu-amd,
stage-b-test-large-2-gpu-amd,
Expand Down
8 changes: 3 additions & 5 deletions python/sglang/srt/layers/attention/aiter_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,9 @@ def __init__(
if self.use_mla:
# For MLA models, get v_head_dim from model config
self.v_head_dim = model_runner.model_config.v_head_dim
elif (
model_runner.hybrid_gdn_config is not None
or model_runner.kimi_linear_config is not None
):
# For hybrid linear models, layer_id = 0 may not be full attention
elif hasattr(model_runner.token_to_kv_pool, "get_v_head_dim"):
# For hybrid models (Mamba+attention, GDN, Kimi linear),
# layer_id=0 may not be a full attention layer
self.v_head_dim = model_runner.token_to_kv_pool.get_v_head_dim()
else:
self.v_head_dim = model_runner.token_to_kv_pool.get_value_buffer(0).shape[
Expand Down
46 changes: 44 additions & 2 deletions python/sglang/srt/layers/attention/mamba/causal_conv1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,23 @@
from typing import Optional

import torch
from sgl_kernel import causal_conv1d_fwd
from sgl_kernel import causal_conv1d_update as causal_conv1d_update_kernel

from .causal_conv1d_triton import PAD_SLOT_ID

try:
from sgl_kernel import causal_conv1d_fwd
from sgl_kernel import causal_conv1d_update as causal_conv1d_update_kernel

torch.ops.sgl_kernel.causal_conv1d_update
_USE_TRITON = False
except (ImportError, AttributeError):
from .causal_conv1d_triton import causal_conv1d_fn as _causal_conv1d_fn_triton
from .causal_conv1d_triton import (
causal_conv1d_update as _causal_conv1d_update_triton,
)

_USE_TRITON = True


def causal_conv1d_fn(
x: torch.Tensor,
Expand Down Expand Up @@ -54,6 +66,25 @@ def causal_conv1d_fn(

out: (batch, dim, seqlen)
"""
if _USE_TRITON:
seq_lens_cpu = (
(query_start_loc[1:] - query_start_loc[:-1]).cpu().tolist()
if query_start_loc is not None
else [x.shape[-1]]
)
return _causal_conv1d_fn_triton(
x,
weight,
bias,
conv_states=conv_states,
query_start_loc=query_start_loc,
seq_lens_cpu=seq_lens_cpu,
cache_indices=cache_indices,
has_initial_state=has_initial_state,
activation=activation,
pad_slot_id=pad_slot_id,
**kwargs,
)
if activation not in [None, "silu", "swish"]:
raise NotImplementedError("activation must be None, silu, or swish")
if x.stride(-1) != 1:
Expand Down Expand Up @@ -106,6 +137,17 @@ def causal_conv1d_update(
indices 0 and 3
out: (batch, dim) or (batch, dim, seqlen)
"""
if _USE_TRITON:
return _causal_conv1d_update_triton(
x,
conv_state,
weight,
bias=bias,
activation=activation,
cache_seqlens=cache_seqlens,
conv_state_indices=conv_state_indices,
pad_slot_id=pad_slot_id,
)
if activation not in [None, "silu", "swish"]:
raise NotImplementedError(
f"activation must be None, silu, or swish, actual: {activation}"
Expand Down
1 change: 1 addition & 0 deletions scripts/ci/utils/slash_command_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def handle_rerun_stage(
"sgl-kernel-unit-test-2-gpu-amd",
"stage-a-test-1-amd",
"stage-b-test-small-1-gpu-amd",
"stage-b-test-small-1-gpu-amd-nondeterministic",
"stage-b-test-small-1-gpu-amd-mi35x",
"stage-b-test-large-1-gpu-amd",
"stage-b-test-large-2-gpu-amd",
Expand Down
2 changes: 1 addition & 1 deletion test/registered/lora/test_multi_lora_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from sglang.test.test_utils import CustomTestCase, is_in_ci

register_cuda_ci(est_time=100, suite="stage-b-test-large-1-gpu")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd")
register_amd_ci(est_time=100, suite="stage-b-test-small-1-gpu-amd-nondeterministic")


class TestMultiLoRABackend(CustomTestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import openai

from sglang.srt.utils import is_hip, kill_process_tree
from sglang.srt.utils import kill_process_tree
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.test_utils import (
Expand Down Expand Up @@ -860,7 +860,6 @@ def test_complex_parameters_required_non_streaming(self):
# cls.tokenizer = get_tokenizer(cls.model)


@unittest.skipIf(is_hip(), "Disabled for AMD")
class TestToolChoiceLfm2(TestToolChoiceLlama32):
"""Test tool_choice functionality with LiquidAI LFM2 model"""

Expand Down
1 change: 1 addition & 0 deletions test/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
HWBackend.AMD: [
"stage-a-test-1-amd",
"stage-b-test-small-1-gpu-amd",
"stage-b-test-small-1-gpu-amd-nondeterministic",
"stage-b-test-small-1-gpu-amd-mi35x",
"stage-b-test-large-8-gpu-35x-disaggregation-amd",
"stage-b-test-large-1-gpu-amd",
Expand Down
Loading