diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index c54a5bb7b24a..7648abe1e797 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -359,19 +359,11 @@ jobs: fail-fast: false max-parallel: 5 matrix: - test_file: ["test_server_perf_a.py", "test_server_perf_b.py"] + part: [0, 1] steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download artifacts - if: needs.check-changes.outputs.sgl_kernel == 'true' - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-python3.10-cuda12.9 - - name: Install dependencies run: | CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion @@ -380,25 +372,25 @@ jobs: timeout-minutes: 60 run: | cd python - pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/${{ matrix.test_file }} + python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 1-gpu \ + --partition-id ${{ matrix.part }} \ + --total-partitions 2 \ multimodal-gen-test-2-gpu: needs: [check-changes, sgl-kernel-build-wheels] if: (always() && !failure() && !cancelled()) && needs.check-changes.outputs.multimodal_gen == 'true' runs-on: 2-gpu-runner + strategy: + fail-fast: false + max-parallel: 5 + matrix: + part: [0, 1] steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download artifacts - if: needs.check-changes.outputs.sgl_kernel == 'true' - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-python3.10-cuda12.9 - - name: Install dependencies run: | CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion @@ -407,7 +399,10 @@ jobs: timeout-minutes: 60 run: | cd python - pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py + python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 2-gpu \ + --partition-id ${{ matrix.part }} \ + --total-partitions 2 unit-test-backend-1-gpu: needs: [check-changes, stage-a-test-1] diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py new file mode 100644 index 000000000000..02442882418c --- /dev/null +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -0,0 +1,128 @@ +""" +Test runner for multimodal_gen that manages test suites and parallel execution. + +Usage: + python3 run_suite.py --suite --partition-id --total-partitions + +Example: + python3 run_suite.py --suite 1-gpu --partition-id 0 --total-partitions 2 +""" + +import argparse +import os +import subprocess +import sys +from pathlib import Path + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger + +logger = init_logger(__name__) + +SUITES = { + "1-gpu": [ + "test_server_a.py", + "test_server_b.py", + # add new 1-gpu test files here + ], + "2-gpu": [ + "test_server_2_gpu_a.py", + "test_server_2_gpu_b.py", + # add new 2-gpu test files here + ], +} + + +def parse_args(): + parser = argparse.ArgumentParser(description="Run multimodal_gen test suite") + parser.add_argument( + "--suite", + type=str, + required=True, + choices=list(SUITES.keys()), + help="The test suite to run (e.g., 1-gpu, 2-gpu)", + ) + parser.add_argument( + "--partition-id", + type=int, + default=0, + help="Index of the current partition (for parallel execution)", + ) + parser.add_argument( + "--total-partitions", + type=int, + default=1, + help="Total number of partitions", + ) + parser.add_argument( + "--base-dir", + type=str, + default="server", + help="Base directory for tests relative to this script's parent", + ) + return parser.parse_args() + + +def run_pytest(files): + if not files: + print("No files to run.") + return 0 + + cmd = [sys.executable, "-m", "pytest", "-s", "-v", "--log-cli-level=INFO"] + files + + logger.info(f"Running command: {' '.join(cmd)}") + result = subprocess.run(cmd) + return result.returncode + + +def main(): + args = parse_args() + + # 1. resolve base path + current_file_path = Path(__file__).resolve() + test_root_dir = current_file_path.parent + target_dir = test_root_dir / args.base_dir + + if not target_dir.exists(): + print(f"Error: Target directory {target_dir} does not exist.") + sys.exit(1) + + # 2. get files from suite + suite_files_rel = SUITES[args.suite] + + suite_files_abs = [] + for f_rel in suite_files_rel: + f_abs = target_dir / f_rel + if not f_abs.exists(): + print(f"Warning: Test file {f_rel} not found in {target_dir}. Skipping.") + continue + suite_files_abs.append(str(f_abs)) + + if not suite_files_abs: + print(f"No valid test files found for suite '{args.suite}'.") + sys.exit(0) + + # 3. partitioning + my_files = [ + f + for i, f in enumerate(suite_files_abs) + if i % args.total_partitions == args.partition_id + ] + + print( + f"Suite: {args.suite} | Partition: {args.partition_id}/{args.total_partitions}" + ) + print(f"Selected {len(my_files)} files:") + for f in my_files: + print(f" - {os.path.basename(f)}") + + if not my_files: + print("No files assigned to this partition. Exiting success.") + sys.exit(0) + + # 4. execute + exit_code = run_pytest(my_files) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 0d30b1b3059b..6661f495f848 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -597,70 +597,71 @@ }, "wan2_1_i2v_14b_480P_2gpu": { "stages_ms": { - "InputValidationStage": 33.57, - "TextEncodingStage": 2424.73, + "InputValidationStage": 38.23, + "TextEncodingStage": 3550.36, "ImageEncodingStage": 3462.55, "ConditioningStage": 0.01, - "TimestepPreparationStage": 2.69, + "TimestepPreparationStage": 2.6, "LatentPreparationStage": 9.73, "ImageVAEEncodingStage": 2290.98, - "DenoisingStage": 414428.85, - "DecodingStage": 3016.1 + "DenoisingStage": 415021.17, + "DecodingStage": 3016.1, + "per_frame_generation": null }, "denoise_step_ms": { - "0": 9304.67, - "1": 8218.78, - "2": 8269.27, - "3": 8291.59, - "4": 8308.29, - "5": 8300.75, - "6": 8302.76, - "7": 8297.95, - "8": 8295.26, - "9": 8296.45, - "10": 8287.48, - "11": 8275.98, - "12": 8281.9, - "13": 8283.39, - "14": 8264.96, - "15": 8275.66, - "16": 8271.89, - "17": 8273.77, - "18": 8279.34, - "19": 8271.89, - "20": 8265.83, - "21": 8259.99, - "22": 8260.36, - "23": 8270.06, - "24": 8271.58, - "25": 8272.39, - "26": 8267.87, - "27": 8277.09, - "28": 8264.49, - "29": 8266.14, - "30": 8263.67, - "31": 8273.82, - "32": 8260.5, - "33": 8268.44, - "34": 8253.2, - "35": 8244.32, - "36": 8258.15, - "37": 8256.65, - "38": 8255.48, - "39": 8260.09, - "40": 8250.99, - "41": 8253.52, - "42": 8247.39, - "43": 8252.7, - "44": 8243.67, - "45": 8251.94, - "46": 8258.73, - "47": 8240.57, - "48": 8249.64, - "49": 8248.14 + "0": 10200.25, + "1": 8222.39, + "2": 8279.38, + "3": 8301.48, + "4": 8338.87, + "5": 8352.39, + "6": 8354.64, + "7": 8353.64, + "8": 8315.58, + "9": 8308.48, + "10": 8299.65, + "11": 8292.7, + "12": 8292.73, + "13": 8285.21, + "14": 8276.06, + "15": 8270.41, + "16": 8273.04, + "17": 8266.04, + "18": 8267.7, + "19": 8264.06, + "20": 8259.32, + "21": 8257.26, + "22": 8253.02, + "23": 8251.77, + "24": 8260.97, + "25": 8251.39, + "26": 8237.43, + "27": 8241.33, + "28": 8235.96, + "29": 8240.6, + "30": 8232.48, + "31": 8237.85, + "32": 8244.3, + "33": 8236.79, + "34": 8239.83, + "35": 8239.89, + "36": 8239.12, + "37": 8246.74, + "38": 8235.67, + "39": 8242.77, + "40": 8241.17, + "41": 8240.24, + "42": 8237.01, + "43": 8231.26, + "44": 8232.85, + "45": 8226.56, + "46": 8236.98, + "47": 8226.73, + "48": 8220.49, + "49": 8217.04 }, - "expected_e2e_ms": 425569.98, - "expected_avg_denoise_ms": 8288.39, + "expected_e2e_ms": 426697.37, + "expected_avg_denoise_ms": 8300.19, "expected_median_denoise_ms": 8267.01 }, "wan2_1_i2v_14b_720P_2gpu": { diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py similarity index 52% rename from python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py rename to python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py index 1a85e9a87523..3668f63e6334 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py @@ -1,25 +1,25 @@ """ -2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. +2 GPU tests """ from __future__ import annotations import pytest -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, diffusion_server, ) from sglang.multimodal_gen.test.server.testcase_configs import ( - TWO_GPU_CASES, + TWO_GPU_CASES_A, DiffusionTestCase, ) -class TestDiffusionPerformanceTwoGpu(DiffusionPerformanceBase): +class TestDiffusionServerTwoGpu(DiffusionServerBase): """Performance tests for 2-GPU diffusion cases.""" - @pytest.fixture(params=TWO_GPU_CASES, ids=lambda c: c.id) + @pytest.fixture(params=TWO_GPU_CASES_A, ids=lambda c: c.id) def case(self, request) -> DiffusionTestCase: """Provide a DiffusionTestCase for each 2-GPU test.""" return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py new file mode 100644 index 000000000000..2c9b5cdc7640 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py @@ -0,0 +1,25 @@ +""" +2 GPU tests +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, + diffusion_server, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + TWO_GPU_CASES_B, + DiffusionTestCase, +) + + +class TestDiffusionServerTwoGpu(DiffusionServerBase): + """Performance tests for 2-GPU diffusion cases.""" + + @pytest.fixture(params=TWO_GPU_CASES_B, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 2-GPU test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_a.py b/python/sglang/multimodal_gen/test/server/test_server_a.py similarity index 80% rename from python/sglang/multimodal_gen/test/server/test_server_perf_a.py rename to python/sglang/multimodal_gen/test/server/test_server_a.py index f99b410bf14c..fdf072ec89e1 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_a.py +++ b/python/sglang/multimodal_gen/test/server/test_server_a.py @@ -10,8 +10,8 @@ import pytest from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, diffusion_server, ) from sglang.multimodal_gen.test.server.testcase_configs import ( @@ -22,7 +22,7 @@ logger = init_logger(__name__) -class TestDiffusionPerformanceOneGpu(DiffusionPerformanceBase): +class TestDiffusionServerOneGpu(DiffusionServerBase): """Performance tests for 1-GPU diffusion cases.""" @pytest.fixture(params=ONE_GPU_CASES_A, ids=lambda c: c.id) diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py b/python/sglang/multimodal_gen/test/server/test_server_b.py similarity index 80% rename from python/sglang/multimodal_gen/test/server/test_server_perf_b.py rename to python/sglang/multimodal_gen/test/server/test_server_b.py index 0faa8fc647eb..1a0432db6f3b 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py +++ b/python/sglang/multimodal_gen/test/server/test_server_b.py @@ -10,8 +10,8 @@ import pytest from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, diffusion_server, ) from sglang.multimodal_gen.test.server.testcase_configs import ( @@ -22,7 +22,7 @@ logger = init_logger(__name__) -class TestDiffusionPerformanceOneGpu(DiffusionPerformanceBase): +class TestDiffusionServerOneGpu(DiffusionServerBase): """Performance tests for 1-GPU diffusion cases.""" @pytest.fixture(params=ONE_GPU_CASES_B, ids=lambda c: c.id) diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py b/python/sglang/multimodal_gen/test/server/test_server_common.py similarity index 99% rename from python/sglang/multimodal_gen/test/server/test_server_perf_common.py rename to python/sglang/multimodal_gen/test/server/test_server_common.py index 4da017f1e84f..c7bf6fbadba3 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py +++ b/python/sglang/multimodal_gen/test/server/test_server_common.py @@ -94,7 +94,7 @@ def diffusion_server(case: DiffusionTestCase) -> ServerContext: ctx.cleanup() -class DiffusionPerformanceBase: +class DiffusionServerBase: """Performance tests for all diffusion models/scenarios. This single test class runs against all cases defined in ONE_GPU_CASES. diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 83cf91ef8c78..d02b4dd1d858 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -3,14 +3,14 @@ Usage: -pytest python/sglang/multimodal_gen/test/server/test_server_performance.py +pytest python/sglang/multimodal_gen/test/server/test_server_a.py # for a single testcase, look for the name of the testcases in DIFFUSION_CASES -pytest python/sglang/multimodal_gen/test/server/test_server_performance.py -k qwen_image_t2i +pytest python/sglang/multimodal_gen/test/server/test_server_a.py -k qwen_image_t2i To add a new testcase: 1. add your testcase with case-id: `my_new_test_case_id` to DIFFUSION_CASES -2. run `SGLANG_GEN_BASELINE=1 pytest -s python/sglang/multimodal_gen/test/server/test_server_performance.py -k my_new_test_case_id` +2. run `SGLANG_GEN_BASELINE=1 pytest -s python/sglang/multimodal_gen/test/server/test_server_a.py -k my_new_test_case_id` 3. insert or override the corresponding scenario in `scenarios` section of perf_baselines.json with the output baseline of step-2 @@ -306,7 +306,7 @@ def from_req_perf_record( ), ] -TWO_GPU_CASES = [ +TWO_GPU_CASES_A = [ DiffusionTestCase( id="wan2_2_i2v_a14b_2gpu", model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers", @@ -343,6 +343,9 @@ def from_req_perf_record( custom_validator="video", num_gpus=2, ), +] + +TWO_GPU_CASES_B = [ DiffusionTestCase( id="wan2_1_i2v_14b_480P_2gpu", model_path="Wan-AI/Wan2.1-I2V-14B-480P-Diffusers",