Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions .github/workflows/nightly-test-amd-rocm720.yml
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
# 8-GPU MiniMax-M2.5 (Accuracy + Performance combined) ROCm 7.2
nightly-8-gpu-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25-rocm720,'))
runs-on: linux-mi325-8gpu-sglang
Expand Down Expand Up @@ -716,6 +716,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# ============================================== MI30x ROCm 7.2 Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I) ROCm 7.2
nightly-1-gpu-zimage-turbo-rocm720:
Expand Down Expand Up @@ -1306,7 +1318,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU MiniMax-M2.5 (Accuracy) ROCm 7.2
# MI35x 8-GPU MiniMax-M2.5 (Accuracy + Performance combined) ROCm 7.2
nightly-8-gpu-mi35x-minimax-m25-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25-rocm720,'))
runs-on: linux-mi35x-gpu-8
Expand Down Expand Up @@ -1339,6 +1351,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test MI35x ROCm 7.2 (8-GPU MiniMax-M2.5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP) ROCm 7.2
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp-rocm720,'))
Expand Down
28 changes: 26 additions & 2 deletions .github/workflows/nightly-test-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# 8-GPU MiniMax-M2.5 (Accuracy)
# 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
nightly-8-gpu-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-minimax-m25,'))
runs-on: linux-mi325-8gpu-sglang
Expand Down Expand Up @@ -718,6 +718,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test (8-GPU MiniMax-M2.5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# ============================================== MI30x Diffusion Tests ==============================================
# 1-GPU Z-Image-Turbo (Diffusion T2I)
nightly-1-gpu-zimage-turbo:
Expand Down Expand Up @@ -1278,7 +1290,7 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU MiniMax-M2.5 (Accuracy)
# MI35x 8-GPU MiniMax-M2.5 (Accuracy + Performance combined)
nightly-8-gpu-mi35x-minimax-m25:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-8-gpu-mi35x-minimax-m25,'))
runs-on: linux-mi35x-gpu-8
Expand Down Expand Up @@ -1311,6 +1323,18 @@ jobs:
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

- name: Performance Test MI35x (8-GPU MiniMax-M2.5)
timeout-minutes: 120
continue-on-error: true # Perf test failure doesn't fail the job if accuracy passed
run: |
> github_summary.md # Clear summary file
bash scripts/ci/amd/amd_ci_exec.sh -w /sglang-checkout/test \
-e SGLANG_USE_AITER=1 \
-e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" \
python3 run_suite.py --hw amd --suite nightly-perf-8-gpu-mi35x-minimax-m25 --nightly --timeout-per-file 5400 ${{ inputs.continue_on_error && '--continue-on-error' || '' }} || TEST_EXIT_CODE=$?
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY || true
exit ${TEST_EXIT_CODE:-0}

# MI35x 8-GPU DeepSeek-V3.2 Performance Test (MTP)
nightly-perf-8-gpu-mi35x-deepseek-v32-mtp:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && (!(inputs.job_filter || inputs.job_select) || (inputs.job_filter || inputs.job_select) == 'all' || contains(format(',{0},', inputs.job_filter || inputs.job_select), ',nightly-perf-8-gpu-mi35x-deepseek-v32-mtp,'))
Expand Down
140 changes: 140 additions & 0 deletions test/registered/amd/perf/mi30x/test_minimax_m25_perf_amd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Nightly performance benchmark for MiniMax-M2.5 on MI325/MI300X (8-GPU).

This test benchmarks MiniMax-M2.5 with TP=8 + EP=8 configuration.

The model path can be configured via MINIMAX_M25_MODEL_PATH environment variable.

Registry: nightly-perf-8-gpu-minimax-m25 suite

Example usage:
python -m pytest test_minimax_m25_perf_amd.py -v
"""

import os
import unittest
from typing import List

from sglang.test.ci.ci_register import register_amd_ci
from sglang.test.nightly_bench_utils import BenchmarkResult
from sglang.test.nightly_utils import NightlyBenchmarkRunner
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env

register_amd_ci(est_time=5400, suite="nightly-perf-8-gpu-minimax-m25", nightly=True)


def generate_simple_markdown_report(results: List[BenchmarkResult]) -> str:
"""Generate a simplified markdown report without traces and cost columns.

Skips the first result if it's a warmup run (duplicate batch_size).
"""
model_header = results[0].model_path
if results[0].run_name and results[0].run_name != "default":
model_header += f" ({results[0].run_name})"

gpu_config = os.getenv("GPU_CONFIG", "MI325")
if gpu_config:
model_header += f" [{gpu_config}]"

summary = f"### {model_header}\n"
summary += "| batch size | input len | latency (s) | input throughput (tok/s) | output throughput (tok/s) | ITL (ms) |\n"
summary += "| ---------- | --------- | ----------- | ------------------------ | ------------------------- | -------- |\n"

report_results = (
results[1:]
if len(results) > 1 and results[0].batch_size == results[1].batch_size
else results
)

for result in report_results:
itl = 1 / (result.output_throughput / result.batch_size) * 1000
summary += f"| {result.batch_size} | {result.input_len} | {result.latency:.2f} | {result.input_throughput:.2f} | {result.output_throughput:.2f} | {itl:.2f} |\n"

return summary


MINIMAX_M25_MODEL_PATH = os.environ.get(
"MINIMAX_M25_MODEL_PATH", "MiniMaxAI/MiniMax-M2.5"
)
PROFILE_DIR = "performance_profiles_minimax_m25"


class TestNightlyMiniMaxM25Performance(unittest.TestCase):
"""Nightly performance benchmark for MiniMax-M2.5 on MI325/MI300X.

Tests MiniMax-M2.5 with TP=8 + EP=8 configuration.
"""

@classmethod
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
cls.batch_sizes = [1, 8, 16, 64]
cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))

cls.model_config = {
"name": "minimax-m25-tp8-ep8",
"model_path": MINIMAX_M25_MODEL_PATH,
"other_args": [
"--trust-remote-code",
"--tp",
"8",
"--ep-size",
"8",
"--attention-backend",
"aiter",
"--mem-fraction-static",
"0.85",
"--model-loader-extra-config",
'{"enable_multithread_load": true}',
"--watchdog-timeout",
"1200",
],
"env_vars": {
"SGLANG_USE_AITER": "1",
},
}

cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
cls.runner.setup_profile_directory()
cls.runner.full_report = f"## {cls.__name__}\n"

def test_bench_minimax_m25(self):
"""Run benchmark for MiniMax-M2.5."""
old_env = {}
for key, value in self.model_config.get("env_vars", {}).items():
old_env[key] = os.environ.get(key)
os.environ[key] = value
print(f"Setting env: {key}={value}")

try:
result_tuple = self.runner.run_benchmark_for_model(
model_path=self.model_config["model_path"],
batch_sizes=self.batch_sizes,
input_lens=self.input_lens,
output_lens=self.output_lens,
other_args=self.model_config["other_args"],
variant=self.model_config["name"],
extra_bench_args=["--trust-remote-code"],
enable_profile=False,
timeout=5400,
)
results = result_tuple[0]
success = result_tuple[1]

if results:
self.runner.full_report += (
generate_simple_markdown_report(results) + "\n"
)

self.assertTrue(success, "Benchmark failed for MiniMax-M2.5")
finally:
for key, value in old_env.items():
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
self.runner.write_final_report()


if __name__ == "__main__":
unittest.main()
146 changes: 146 additions & 0 deletions test/registered/amd/perf/mi35x/test_minimax_m25_perf_mi35x.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""MI35x Nightly performance benchmark for MiniMax-M2.5 (8-GPU).

This test benchmarks MiniMax-M2.5 with TP=8 + EP=8 configuration on MI35x.

The model path can be configured via MINIMAX_M25_MODEL_PATH environment variable.

Registry: nightly-perf-8-gpu-mi35x-minimax-m25 suite

Example usage:
python -m pytest test_minimax_m25_perf_mi35x.py -v
"""

import os

os.environ.setdefault("HF_HOME", "/data2/models/huggingface")
os.environ.setdefault("HF_HUB_CACHE", "/data2/models/huggingface/hub")

import unittest
from typing import List

from sglang.test.ci.ci_register import register_amd_ci
from sglang.test.nightly_bench_utils import BenchmarkResult
from sglang.test.nightly_utils import NightlyBenchmarkRunner
from sglang.test.test_utils import DEFAULT_URL_FOR_TEST, _parse_int_list_env

register_amd_ci(
est_time=5400, suite="nightly-perf-8-gpu-mi35x-minimax-m25", nightly=True
)


def generate_simple_markdown_report(results: List[BenchmarkResult]) -> str:
"""Generate a simplified markdown report without traces and cost columns.

Skips the first result if it's a warmup run (duplicate batch_size).
"""
model_header = results[0].model_path
if results[0].run_name and results[0].run_name != "default":
model_header += f" ({results[0].run_name})"

gpu_config = os.getenv("GPU_CONFIG", "MI35x")
if gpu_config:
model_header += f" [{gpu_config}]"

summary = f"### {model_header}\n"
summary += "| batch size | input len | latency (s) | input throughput (tok/s) | output throughput (tok/s) | ITL (ms) |\n"
summary += "| ---------- | --------- | ----------- | ------------------------ | ------------------------- | -------- |\n"

report_results = (
results[1:]
if len(results) > 1 and results[0].batch_size == results[1].batch_size
else results
)

for result in report_results:
itl = 1 / (result.output_throughput / result.batch_size) * 1000
summary += f"| {result.batch_size} | {result.input_len} | {result.latency:.2f} | {result.input_throughput:.2f} | {result.output_throughput:.2f} | {itl:.2f} |\n"

return summary


MINIMAX_M25_MODEL_PATH = os.environ.get(
"MINIMAX_M25_MODEL_PATH", "MiniMaxAI/MiniMax-M2.5"
)
PROFILE_DIR = "performance_profiles_minimax_m25_mi35x"


class TestNightlyMiniMaxM25PerformanceMI35x(unittest.TestCase):
"""MI35x Nightly performance benchmark for MiniMax-M2.5.

Tests MiniMax-M2.5 with TP=8 + EP=8 configuration.
"""

@classmethod
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
cls.batch_sizes = [1, 8, 16, 64]
cls.input_lens = tuple(_parse_int_list_env("NIGHTLY_INPUT_LENS", "4096"))
cls.output_lens = tuple(_parse_int_list_env("NIGHTLY_OUTPUT_LENS", "512"))

cls.model_config = {
"name": "minimax-m25-tp8-ep8",
"model_path": MINIMAX_M25_MODEL_PATH,
"other_args": [
"--trust-remote-code",
"--tp",
"8",
"--ep-size",
"8",
"--attention-backend",
"aiter",
"--mem-fraction-static",
"0.85",
"--model-loader-extra-config",
'{"enable_multithread_load": true}',
"--watchdog-timeout",
"1200",
],
"env_vars": {
"SGLANG_USE_AITER": "1",
},
}

cls.runner = NightlyBenchmarkRunner(PROFILE_DIR, cls.__name__, cls.base_url)
cls.runner.setup_profile_directory()
cls.runner.full_report = f"## {cls.__name__}\n"

def test_bench_minimax_m25(self):
"""Run benchmark for MiniMax-M2.5."""
old_env = {}
for key, value in self.model_config.get("env_vars", {}).items():
old_env[key] = os.environ.get(key)
os.environ[key] = value
print(f"Setting env: {key}={value}")

try:
result_tuple = self.runner.run_benchmark_for_model(
model_path=self.model_config["model_path"],
batch_sizes=self.batch_sizes,
input_lens=self.input_lens,
output_lens=self.output_lens,
other_args=self.model_config["other_args"],
variant=self.model_config["name"],
extra_bench_args=["--trust-remote-code"],
enable_profile=False,
timeout=5400,
)
results = result_tuple[0]
success = result_tuple[1]

if results:
self.runner.full_report += (
generate_simple_markdown_report(results) + "\n"
)

self.assertTrue(success, "Benchmark failed for MiniMax-M2.5 on MI35x")
finally:
for key, value in old_env.items():
if value is None:
os.environ.pop(key, None)
else:
os.environ[key] = value
self.runner.write_final_report()


if __name__ == "__main__":
unittest.main()
Loading