Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion .github/workflows/nightly-test-amd-rocm720.yml
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU GLM-5 (Accuracy) ROCm 7.2
# 8-GPU GLM-5 (Accuracy + Performance combined) ROCm 7.2
nightly-8-gpu-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
Expand Down Expand Up @@ -697,6 +697,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test ROCm 7.2 (8-GPU GLM-5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU MiniMax-M2.5 (Accuracy + Performance combined) ROCm 7.2
nightly-8-gpu-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,'))
Expand Down Expand Up @@ -1276,6 +1288,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU GLM-5 (Accuracy + Performance combined) ROCm 7.2
nightly-8-gpu-mi35x-glm5-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5-rocm720,'))
runs-on: linux-mi35x-gpu-8
Expand Down Expand Up @@ -1309,6 +1322,17 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test MI35x ROCm 7.2 (8-GPU GLM-5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU GLM-4.7-FP8 (Accuracy) ROCm 7.2
nightly-8-gpu-mi35x-glm47-fp8-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm47-fp8-rocm720,'))
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/nightly-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU GLM-5 (Accuracy + Performance combined)
nightly-8-gpu-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-glm5,'))
runs-on: linux-mi325-8gpu-sglang
Expand Down Expand Up @@ -699,6 +700,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test (8-GPU GLM-5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
nightly-8-gpu-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25,'))
Expand Down Expand Up @@ -1281,6 +1294,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU GLM-5 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-glm5:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-glm5,'))
runs-on: linux-mi35x-gpu-8
Expand Down Expand Up @@ -1314,6 +1328,17 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test MI35x (8-GPU GLM-5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-glm5 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25,'))
Expand Down
8 changes: 6 additions & 2 deletions test/registered/amd/accuracy/mi30x/test_glm5_eval_amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,17 @@ def get_display_name(self) -> str:
GLM5_MODELS = [
# GLM-5 with NSA attention (TP=8)
ModelConfig(
model_path="zai-org/GLM-5",
model_path="zai-org/GLM-5-FP8",
tp_size=8,
accuracy_threshold=0.93,
timeout=3600,
variant="nsa",
other_args=[
"--trust-remote-code",
"--reasoning-parser",
"glm45",
"--tool-call-parser",
"glm47",
"--nsa-prefill-backend",
"tilelang",
"--nsa-decode-backend",
Expand All @@ -77,7 +81,7 @@ def get_display_name(self) -> str:
"--model-loader-extra-config",
'{"enable_multithread_load": true}',
"--watchdog-timeout",
"1200", # 20 minutes for weight loading
"1200",
],
env_vars={"SGLANG_USE_AITER": "1"},
),
Expand Down
8 changes: 6 additions & 2 deletions test/registered/amd/accuracy/mi35x/test_glm5_eval_mi35x.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,17 @@ def get_display_name(self) -> str:
MI35X_GLM5_MODELS = [
# GLM-5 with NSA attention (TP=8)
ModelConfig(
model_path="zai-org/GLM-5",
model_path="zai-org/GLM-5-FP8",
tp_size=8,
accuracy_threshold=0.93,
timeout=5400,
variant="nsa",
other_args=[
"--trust-remote-code",
"--reasoning-parser",
"glm45",
"--tool-call-parser",
"glm47",
"--nsa-prefill-backend",
"tilelang",
"--nsa-decode-backend",
Expand All @@ -82,7 +86,7 @@ def get_display_name(self) -> str:
"--model-loader-extra-config",
'{"enable_multithread_load": true}',
"--watchdog-timeout",
"1200", # 20 minutes for weight loading
"1200",
],
env_vars={},
),
Expand Down
140 changes: 140 additions & 0 deletions test/registered/amd/perf/mi30x/test_glm5_perf_amd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Nightly performance benchmark for GLM-5 on MI30x.

Tests GLM-5 with NSA attention backend using bench_one_batch on 8 GPUs.

Model paths can be configured via environment variables:
- GLM5_MODEL_PATH: Path to GLM-5 model (default: zai-org/GLM-5-FP8)

Example usage:
python -m pytest test_glm5_perf_amd.py -v
"""

import os
import unittest
from typing import List

from sglang.test.ci.ci_register import register_amd_ci
from sglang.test.nightly_bench_utils import BenchmarkResult
from sglang.test.nightly_utils import NightlyBenchmarkRunner
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env

register_amd_ci(est_time=5400, suite="nightly-perf-8-gpu-glm5", nightly=True)


def generate_simple_markdown_report(results: List[BenchmarkResult]) -> str:
model_header = results[0].model_path
if results[0].run_name and results[0].run_name != "default":
model_header += f" ({results[0].run_name})"

gpu_config = os.getenv("GPU_CONFIG", "MI325")
if gpu_config:
model_header += f" [{gpu_config}]"

summary = f"### {model_header}\n"
summary += "| batch size | input len | latency (s) | input throughput (tok/s) | output throughput (tok/s) | ITL (ms) |\n"
summary += "| ---------- | --------- | ----------- | ------------------------ | ------------------------- | -------- |\n"

report_results = (
results[1:]
if len(results) > 1 and results[0].batch_size == results[1].batch_size
else results
)

for result in report_results:
itl = 1 / (result.output_throughput / result.batch_size) * 1000
summary += f"| {result.batch_size} | {result.input_len} | {result.latency:.2f} | {result.input_throughput:.2f} | {result.output_throughput:.2f} | {itl:.2f} |\n"

return summary


GLM5_MODEL_PATH = os.environ.get("GLM5_MODEL_PATH", "zai-org/GLM-5-FP8")
PROFILE_DIR = "performance_profiles_glm5"


class TestNightlyGLM5Performance(unittest.TestCase):
"""Nightly performance benchmark for GLM-5.

Tests GLM-5 with NSA attention backend on TP=8.
"""

@classmethod
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
cls.batch_sizes = [1, 8, 16, 64]
cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))

cls.model_config = {
"name": "glm5",
"model_path": GLM5_MODEL_PATH,
"other_args": [
"--trust-remote-code",
"--reasoning-parser",
"glm45",
"--tool-call-parser",
"glm47",
"--tp",
"8",
"--nsa-prefill-backend",
"tilelang",
"--nsa-decode-backend",
"tilelang",
"--kv-cache-dtype",
"fp8_e4m3",
"--chunked-prefill-size",
"131072",
"--mem-fraction-static",
"0.85",
"--model-loader-extra-config",
'{"enable_multithread_load": true}',
"--watchdog-timeout",
"1200",
],
"env_vars": {
"SGLANG_USE_AITER": "1",
},
}

cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
cls.runner.setup_profile_directory()
cls.runner.full_report = f"## {cls.__name__}\n"

def test_bench_glm5(self):
"""Run benchmark for GLM-5."""
old_env = {}
for key, value in self.model_config.get("env_vars", {}).items():
old_env[key] = os.environ.get(key)
os.environ[key] = value

try:
result_tuple = self.runner.run_benchmark_for_model(
model_path=self.model_config["model_path"],
batch_sizes=self.batch_sizes,
input_lens=self.input_lens,
output_lens=self.output_lens,
other_args=self.model_config["other_args"],
variant=self.model_config["name"],
extra_bench_args=["--trust-remote-code"],
enable_profile=False,
timeout=5400,
)
results = result_tuple[0]
success = result_tuple[1]

if results:
self.runner.full_report += (
generate_simple_markdown_report(results) + "\n"
)

self.assertTrue(success, f"Benchmark failed for {GLM5_MODEL_PATH}")
finally:
for key, value in old_env.items():
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
self.runner.write_final_report()


if __name__ == "__main__":
unittest.main()
Loading
Loading