Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 14 additions & 19 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -359,19 +359,11 @@ jobs:
fail-fast: false
max-parallel: 5
matrix:
test_file: ["test_server_perf_a.py", "test_server_perf_b.py"]
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion
Expand All @@ -380,25 +372,25 @@ jobs:
timeout-minutes: 60
run: |
cd python
pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/${{ matrix.test_file }}
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 1-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2 \


multimodal-gen-test-2-gpu:
needs: [check-changes, sgl-kernel-build-wheels]
if: (always() && !failure() && !cancelled()) && needs.check-changes.outputs.multimodal_gen == 'true'
runs-on: 2-gpu-runner
strategy:
fail-fast: false
max-parallel: 5
matrix:
part: [0, 1]
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Download artifacts
if: needs.check-changes.outputs.sgl_kernel == 'true'
uses: actions/download-artifact@v4
with:
path: sgl-kernel/dist/
merge-multiple: true
pattern: wheel-python3.10-cuda12.9

- name: Install dependencies
run: |
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion
Expand All @@ -407,7 +399,10 @@ jobs:
timeout-minutes: 60
run: |
cd python
pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py
python3 sglang/multimodal_gen/test/run_suite.py \
--suite 2-gpu \
--partition-id ${{ matrix.part }} \
--total-partitions 2

unit-test-backend-1-gpu:
needs: [check-changes, stage-a-test-1]
Expand Down
128 changes: 128 additions & 0 deletions python/sglang/multimodal_gen/test/run_suite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
Test runner for multimodal_gen that manages test suites and parallel execution.

Usage:
python3 run_suite.py --suite <suite_name> --partition-id <id> --total-partitions <num>

Example:
python3 run_suite.py --suite 1-gpu --partition-id 0 --total-partitions 2
"""

import argparse
import os
import subprocess
import sys
from pathlib import Path

from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger

logger = init_logger(__name__)

SUITES = {
"1-gpu": [
"test_server_a.py",
"test_server_b.py",
# add new 1-gpu test files here
],
"2-gpu": [
"test_server_2_gpu_a.py",
"test_server_2_gpu_b.py",
# add new 2-gpu test files here
],
}


def parse_args():
parser = argparse.ArgumentParser(description="Run multimodal_gen test suite")
parser.add_argument(
"--suite",
type=str,
required=True,
choices=list(SUITES.keys()),
help="The test suite to run (e.g., 1-gpu, 2-gpu)",
)
parser.add_argument(
"--partition-id",
type=int,
default=0,
help="Index of the current partition (for parallel execution)",
)
parser.add_argument(
"--total-partitions",
type=int,
default=1,
help="Total number of partitions",
)
parser.add_argument(
"--base-dir",
type=str,
default="server",
help="Base directory for tests relative to this script's parent",
)
return parser.parse_args()


def run_pytest(files):
if not files:
print("No files to run.")
return 0

cmd = [sys.executable, "-m", "pytest", "-s", "-v", "--log-cli-level=INFO"] + files

logger.info(f"Running command: {' '.join(cmd)}")
result = subprocess.run(cmd)
return result.returncode


def main():
args = parse_args()

# 1. resolve base path
current_file_path = Path(__file__).resolve()
test_root_dir = current_file_path.parent
target_dir = test_root_dir / args.base_dir

if not target_dir.exists():
print(f"Error: Target directory {target_dir} does not exist.")
sys.exit(1)

# 2. get files from suite
suite_files_rel = SUITES[args.suite]

suite_files_abs = []
for f_rel in suite_files_rel:
f_abs = target_dir / f_rel
if not f_abs.exists():
print(f"Warning: Test file {f_rel} not found in {target_dir}. Skipping.")
continue
suite_files_abs.append(str(f_abs))

if not suite_files_abs:
print(f"No valid test files found for suite '{args.suite}'.")
sys.exit(0)

# 3. partitioning
my_files = [
f
for i, f in enumerate(suite_files_abs)
if i % args.total_partitions == args.partition_id
]

print(
f"Suite: {args.suite} | Partition: {args.partition_id}/{args.total_partitions}"
)
print(f"Selected {len(my_files)} files:")
for f in my_files:
print(f" - {os.path.basename(f)}")

if not my_files:
print("No files assigned to this partition. Exiting success.")
sys.exit(0)

# 4. execute
exit_code = run_pytest(my_files)
sys.exit(exit_code)


if __name__ == "__main__":
main()
115 changes: 58 additions & 57 deletions python/sglang/multimodal_gen/test/server/perf_baselines.json
Original file line number Diff line number Diff line change
Expand Up @@ -597,70 +597,71 @@
},
"wan2_1_i2v_14b_480P_2gpu": {
"stages_ms": {
"InputValidationStage": 33.57,
"TextEncodingStage": 2424.73,
"InputValidationStage": 38.23,
"TextEncodingStage": 3550.36,
"ImageEncodingStage": 3462.55,
"ConditioningStage": 0.01,
"TimestepPreparationStage": 2.69,
"TimestepPreparationStage": 2.6,
"LatentPreparationStage": 9.73,
"ImageVAEEncodingStage": 2290.98,
"DenoisingStage": 414428.85,
"DecodingStage": 3016.1
"DenoisingStage": 415021.17,
"DecodingStage": 3016.1,
"per_frame_generation": null
},
"denoise_step_ms": {
"0": 9304.67,
"1": 8218.78,
"2": 8269.27,
"3": 8291.59,
"4": 8308.29,
"5": 8300.75,
"6": 8302.76,
"7": 8297.95,
"8": 8295.26,
"9": 8296.45,
"10": 8287.48,
"11": 8275.98,
"12": 8281.9,
"13": 8283.39,
"14": 8264.96,
"15": 8275.66,
"16": 8271.89,
"17": 8273.77,
"18": 8279.34,
"19": 8271.89,
"20": 8265.83,
"21": 8259.99,
"22": 8260.36,
"23": 8270.06,
"24": 8271.58,
"25": 8272.39,
"26": 8267.87,
"27": 8277.09,
"28": 8264.49,
"29": 8266.14,
"30": 8263.67,
"31": 8273.82,
"32": 8260.5,
"33": 8268.44,
"34": 8253.2,
"35": 8244.32,
"36": 8258.15,
"37": 8256.65,
"38": 8255.48,
"39": 8260.09,
"40": 8250.99,
"41": 8253.52,
"42": 8247.39,
"43": 8252.7,
"44": 8243.67,
"45": 8251.94,
"46": 8258.73,
"47": 8240.57,
"48": 8249.64,
"49": 8248.14
"0": 10200.25,
"1": 8222.39,
"2": 8279.38,
"3": 8301.48,
"4": 8338.87,
"5": 8352.39,
"6": 8354.64,
"7": 8353.64,
"8": 8315.58,
"9": 8308.48,
"10": 8299.65,
"11": 8292.7,
"12": 8292.73,
"13": 8285.21,
"14": 8276.06,
"15": 8270.41,
"16": 8273.04,
"17": 8266.04,
"18": 8267.7,
"19": 8264.06,
"20": 8259.32,
"21": 8257.26,
"22": 8253.02,
"23": 8251.77,
"24": 8260.97,
"25": 8251.39,
"26": 8237.43,
"27": 8241.33,
"28": 8235.96,
"29": 8240.6,
"30": 8232.48,
"31": 8237.85,
"32": 8244.3,
"33": 8236.79,
"34": 8239.83,
"35": 8239.89,
"36": 8239.12,
"37": 8246.74,
"38": 8235.67,
"39": 8242.77,
"40": 8241.17,
"41": 8240.24,
"42": 8237.01,
"43": 8231.26,
"44": 8232.85,
"45": 8226.56,
"46": 8236.98,
"47": 8226.73,
"48": 8220.49,
"49": 8217.04
},
"expected_e2e_ms": 425569.98,
"expected_avg_denoise_ms": 8288.39,
"expected_e2e_ms": 426697.37,
"expected_avg_denoise_ms": 8300.19,
"expected_median_denoise_ms": 8267.01
},
"wan2_1_i2v_14b_720P_2gpu": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
"""
2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2.
2 GPU tests
"""

from __future__ import annotations

import pytest

from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401
DiffusionPerformanceBase,
from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401
DiffusionServerBase,
diffusion_server,
)
from sglang.multimodal_gen.test.server.testcase_configs import (
TWO_GPU_CASES,
TWO_GPU_CASES_A,
DiffusionTestCase,
)


class TestDiffusionPerformanceTwoGpu(DiffusionPerformanceBase):
class TestDiffusionServerTwoGpu(DiffusionServerBase):
"""Performance tests for 2-GPU diffusion cases."""

@pytest.fixture(params=TWO_GPU_CASES, ids=lambda c: c.id)
@pytest.fixture(params=TWO_GPU_CASES_A, ids=lambda c: c.id)
def case(self, request) -> DiffusionTestCase:
"""Provide a DiffusionTestCase for each 2-GPU test."""
return request.param
25 changes: 25 additions & 0 deletions python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
2 GPU tests
"""

from __future__ import annotations

import pytest

from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401
DiffusionServerBase,
diffusion_server,
)
from sglang.multimodal_gen.test.server.testcase_configs import (
TWO_GPU_CASES_B,
DiffusionTestCase,
)


class TestDiffusionServerTwoGpu(DiffusionServerBase):
"""Performance tests for 2-GPU diffusion cases."""

@pytest.fixture(params=TWO_GPU_CASES_B, ids=lambda c: c.id)
def case(self, request) -> DiffusionTestCase:
"""Provide a DiffusionTestCase for each 2-GPU test."""
return request.param
Loading
Loading