From eeed06a982806f165837a7ae4dfef86987e7b95d Mon Sep 17 00:00:00 2001 From: Mick Date: Thu, 20 Nov 2025 14:39:36 +0800 Subject: [PATCH 01/11] upd --- .../sglang/multimodal_gen/test/server/test_server_perf_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py b/python/sglang/multimodal_gen/test/server/test_server_perf_common.py index 4da017f1e84f..db29775f523e 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py +++ b/python/sglang/multimodal_gen/test/server/test_server_perf_common.py @@ -542,6 +542,7 @@ def _dump_baseline_for_testcase( """ logger.error(output) + logger.error(output) def test_diffusion_perf( self, From 297ee09f46c40faca6a7a93146811d27e35f00b4 Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:13:10 +0800 Subject: [PATCH 02/11] [diffusion] CI: add run_suite to multimodal_gen CI --- .github/workflows/pr-test.yml | 37 +++--- .../sglang/multimodal_gen/test/run_suite.py | 113 ++++++++++++++++++ .../test/server/test_server_2_gpu_b.py | 25 ++++ .../test/server/testcase_configs.py | 25 ++-- 4 files changed, 164 insertions(+), 36 deletions(-) create mode 100644 python/sglang/multimodal_gen/test/run_suite.py create mode 100644 python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index c54a5bb7b24a..e387c96a9336 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -359,19 +359,11 @@ jobs: fail-fast: false max-parallel: 5 matrix: - test_file: ["test_server_perf_a.py", "test_server_perf_b.py"] + part: [0, 1] steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download artifacts - if: needs.check-changes.outputs.sgl_kernel == 'true' - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-python3.10-cuda12.9 - - name: Install dependencies run: | CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion @@ -380,34 +372,39 @@ jobs: timeout-minutes: 60 run: | cd python - pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/${{ matrix.test_file }} + python3 sglang/multimodal_gen/test/run_suite.py \ + --partition-id ${{ matrix.part }} \ + --total-partitions 2 \ + --target-dir server \ + --pattern "test_server_([A-Za-z].*)\.p" multimodal-gen-test-2-gpu: needs: [check-changes, sgl-kernel-build-wheels] if: (always() && !failure() && !cancelled()) && needs.check-changes.outputs.multimodal_gen == 'true' runs-on: 2-gpu-runner + strategy: + fail-fast: false + max-parallel: 5 + matrix: + part: [0, 1] steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download artifacts - if: needs.check-changes.outputs.sgl_kernel == 'true' - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-python3.10-cuda12.9 - - name: Install dependencies run: | CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion - - name: Run diffusion server tests + - name: Run diffusion server tests (2 GPU) timeout-minutes: 60 run: | cd python - pytest -s -v --log-cli-level=INFO sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py + python3 sglang/multimodal_gen/test/run_suite.py \ + --partition-id ${{ matrix.part }} \ + --total-partitions 1 \ + --target-dir server \ + --pattern "test_server_2_gpu*.py" unit-test-backend-1-gpu: needs: [check-changes, stage-a-test-1] diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py new file mode 100644 index 000000000000..3eb03db398b5 --- /dev/null +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -0,0 +1,113 @@ +""" +Helper script to discover and run multimodal generation tests + +This script scans the `multimodal_gen/test/server/` directory for test files matching a pattern and executes a subset of them based on the partition ID + +How to add a new test: +1. Create a new test file in `python/sglang/multimodal_gen/test/server/`. +2. Name it matching the pattern `test_server_*.py` (e.g., `test_server_c.py`). +3. The CI will automatically pick it up and distribute it to one of the runners. +""" + +import argparse +import glob +import os +import subprocess +import sys +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser(description="Run multimodal_gen test suite") + parser.add_argument( + "--partition-id", + type=int, + default=0, + help="Index of the current partition (for parallel execution)", + ) + parser.add_argument( + "--total-partitions", + type=int, + default=1, + help="Total number of partitions", + ) + parser.add_argument( + "--target-dir", + type=str, + default="server", + help="Sub-directory under multimodal_gen/test to look for tests", + ) + parser.add_argument( + "--pattern", + type=str, + default="test_server_*.py", + help="Glob pattern to match test files", + ) + return parser.parse_args() + + +def get_test_files(base_dir, sub_dir, pattern): + """Find all test files matching the pattern.""" + search_path = os.path.join(base_dir, sub_dir, pattern) + files = sorted(glob.glob(search_path)) + return files + + +def main(): + args = parse_args() + + # Determine the absolute path of the test directory + # Assuming this script is located at python/sglang/multimodal_gen/test/run_suite.py + current_file_path = Path(__file__).resolve() + test_root_dir = current_file_path.parent + + # 1. Discover Test Files + all_files = get_test_files(str(test_root_dir), args.target_dir, args.pattern) + + if not all_files: + print( + f"No test files found in {os.path.join(test_root_dir, args.target_dir)} matching {args.pattern}" + ) + sys.exit(0) # Exit gracefully if no files found + + print(f"Found {len(all_files)} test files total.") + + # 2. Partitioning (Distribute files across runners) + # Using simple interleaving: file 0 -> runner 0, file 1 -> runner 1, file 2 -> runner 0... + my_files = [ + f + for i, f in enumerate(all_files) + if i % args.total_partitions == args.partition_id + ] + + if not my_files: + print( + f"Partition {args.partition_id}/{args.total_partitions} has no files to run. Skipping." + ) + sys.exit(0) + + print(f"Running {len(my_files)} files on this partition:") + for f in my_files: + print(f" - {os.path.basename(f)}") + + # 3. Execute Pytest + # Construct the pytest command + # -s: show stdout + # -v: verbose + cmd = [ + sys.executable, + "-m", + "pytest", + "-s", + "-v", + "--log-cli-level=INFO", + ] + my_files + + print(f"Executing command: {' '.join(cmd)}") + + result = subprocess.run(cmd) + sys.exit(result.returncode) + + +if __name__ == "__main__": + main() diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py new file mode 100644 index 000000000000..cc4e18cae1f5 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py @@ -0,0 +1,25 @@ +""" +2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionPerformanceBase, + diffusion_server, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + TWO_GPU_CASES_B, + DiffusionTestCase, +) + + +class TestDiffusionPerformanceTwoGpu(DiffusionPerformanceBase): + """Performance tests for 2-GPU diffusion cases.""" + + @pytest.fixture(params=TWO_GPU_CASES_B, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 2-GPU test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 83cf91ef8c78..872bc0236845 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -3,14 +3,14 @@ Usage: -pytest python/sglang/multimodal_gen/test/server/test_server_performance.py +pytest python/sglang/multimodal_gen/test/server/test_server_a.py # for a single testcase, look for the name of the testcases in DIFFUSION_CASES -pytest python/sglang/multimodal_gen/test/server/test_server_performance.py -k qwen_image_t2i +pytest python/sglang/multimodal_gen/test/server/test_server_a.py -k qwen_image_t2i To add a new testcase: 1. add your testcase with case-id: `my_new_test_case_id` to DIFFUSION_CASES -2. run `SGLANG_GEN_BASELINE=1 pytest -s python/sglang/multimodal_gen/test/server/test_server_performance.py -k my_new_test_case_id` +2. run `SGLANG_GEN_BASELINE=1 pytest -s python/sglang/multimodal_gen/test/server/test_server_a.py -k my_new_test_case_id` 3. insert or override the corresponding scenario in `scenarios` section of perf_baselines.json with the output baseline of step-2 @@ -245,7 +245,6 @@ def from_req_perf_record( ), ] - ONE_GPU_CASES_B: list[DiffusionTestCase] = [ # === Text to Video (T2V) === DiffusionTestCase( @@ -258,17 +257,9 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), - # NOTE(mick): flaky - # DiffusionTestCase( - # id="hunyuan_video", - # model_path="hunyuanvideo-community/HunyuanVideo", - # modality="video", - # prompt="A curious raccoon", - # output_size="720x480", - # warmup_text=0, - # warmup_edit=0, - # custom_validator="video", - # ), +] + +TWO_GPU_CASES_A = [ DiffusionTestCase( id="fast_hunyuan_video", model_path="FastVideo/FastHunyuan-diffusers", @@ -279,7 +270,9 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), - # === Text and Image to Video (TI2V) === +] + +TWO_GPU_CASES_B = [ DiffusionTestCase( id="wan2_2_ti2v_5b", model_path="Wan-AI/Wan2.2-TI2V-5B-Diffusers", From 39d881e505a3dd73c19def34d4f899db2da606e3 Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:14:39 +0800 Subject: [PATCH 03/11] upd --- .../test/server/test_server_perf_common.py | 565 ------------------ .../test/server/testcase_configs.py | 36 +- 2 files changed, 18 insertions(+), 583 deletions(-) diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py b/python/sglang/multimodal_gen/test/server/test_server_perf_common.py index db29775f523e..e69de29bb2d1 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py +++ b/python/sglang/multimodal_gen/test/server/test_server_perf_common.py @@ -1,565 +0,0 @@ -""" -Config-driven diffusion performance test with pytest parametrization. - - -If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed -""" - -from __future__ import annotations - -import os -import time -from pathlib import Path -from typing import Any, Callable - -import pytest -from openai import OpenAI - -from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger -from sglang.multimodal_gen.runtime.utils.perf_logger import RequestPerfRecord -from sglang.multimodal_gen.test.server.conftest import _GLOBAL_PERF_RESULTS -from sglang.multimodal_gen.test.server.test_server_utils import ( - VALIDATOR_REGISTRY, - PerformanceValidator, - ServerContext, - ServerManager, - WarmupRunner, - download_image_from_url, -) -from sglang.multimodal_gen.test.server.testcase_configs import ( - BASELINE_CONFIG, - DiffusionTestCase, - PerformanceSummary, - ScenarioConfig, -) -from sglang.multimodal_gen.test.test_utils import ( - get_dynamic_server_port, - read_perf_logs, - validate_image, - validate_openai_video, - wait_for_req_perf_record, -) - -logger = init_logger(__name__) - - -@pytest.fixture -def diffusion_server(case: DiffusionTestCase) -> ServerContext: - """Start a diffusion server for a single case and tear it down afterwards.""" - default_port = get_dynamic_server_port() - port = int(os.environ.get("SGLANG_TEST_SERVER_PORT", default_port)) - - extra_args = os.environ.get("SGLANG_TEST_SERVE_ARGS", "") - extra_args += f" --num-gpus {case.num_gpus} --ulysses-degree {case.num_gpus}" - - # start server - manager = ServerManager( - model=case.model_path, - port=port, - wait_deadline=float(os.environ.get("SGLANG_TEST_WAIT_SECS", "1200")), - extra_args=extra_args, - ) - ctx = manager.start() - - try: - warmup = WarmupRunner( - port=ctx.port, - model=case.model_path, - prompt=case.prompt or "A colorful raccoon icon", - output_size=case.output_size, - ) - warmup.run_text_warmups(case.warmup_text) - - if case.warmup_edit > 0 and case.edit_prompt and case.image_path: - # Handle URL or local path - image_path = case.image_path - if case.is_image_url(): - image_path = download_image_from_url(str(case.image_path)) - else: - image_path = Path(case.image_path) - - warmup.run_edit_warmups( - count=case.warmup_edit, - edit_prompt=case.edit_prompt, - image_path=image_path, - ) - except Exception as exc: - logger.error("Warm-up failed for %s: %s", case.id, exc) - ctx.cleanup() - raise - - try: - yield ctx - finally: - ctx.cleanup() - - -class DiffusionPerformanceBase: - """Performance tests for all diffusion models/scenarios. - - This single test class runs against all cases defined in ONE_GPU_CASES. - Each case gets its own server instance via the parametrized fixture. - """ - - _perf_results: list[dict[str, Any]] = [] - _improved_baselines: list[dict[str, Any]] = [] - - @classmethod - def setup_class(cls): - cls._perf_results = [] - cls._improved_baselines = [] - - @classmethod - def teardown_class(cls): - for result in cls._perf_results: - result["class_name"] = cls.__name__ - _GLOBAL_PERF_RESULTS.append(result) - - if cls._improved_baselines: - import json - - output = """ ---- POTENTIAL BASELINE IMPROVEMENTS DETECTED --- -The following test cases performed significantly better than their baselines. -Consider updating perf_baselines.json with the snippets below: -""" - for item in cls._improved_baselines: - output += ( - f'\n"{item["id"]}": {json.dumps(item["baseline"], indent=4)},\n' - ) - print(output) - - def _client(self, ctx: ServerContext) -> OpenAI: - """Get OpenAI client for the server.""" - return OpenAI( - api_key="sglang-anything", - base_url=f"http://localhost:{ctx.port}/v1", - ) - - def run_and_collect( - self, - ctx: ServerContext, - generate_fn: Callable[[], str], - ) -> RequestPerfRecord: - """Run generation and collect performance records.""" - log_path = ctx.perf_log_path - prev_len = len(read_perf_logs(log_path)) - log_wait_timeout = 30 - - rid = generate_fn() - - req_perf_record, _ = wait_for_req_perf_record( - rid, - prev_len, - log_path, - timeout=log_wait_timeout, - ) - - return req_perf_record - - def get_generate_fn( - self, - ctx: ServerContext, - case: DiffusionTestCase, - ) -> Callable[[], str]: - """Return appropriate generation function for the case.""" - client = self._client(ctx) - - def _create_and_download_video( - *, - model: str, - size: str, - prompt: str | None = None, - seconds: int | None = None, - input_reference: Any | None = None, - ) -> str: - """ - Create a video job via /v1/videos, poll until completion, - then download the binary content and validate it. - """ - create_kwargs: dict[str, Any] = { - "model": model, - "size": size, - } - if prompt is not None: - create_kwargs["prompt"] = prompt - if seconds is not None: - create_kwargs["seconds"] = seconds - if input_reference is not None: - create_kwargs["input_reference"] = input_reference # triggers multipart - - # create video job - job = client.videos.create(**create_kwargs) # type: ignore[attr-defined] - video_id = job.id - - job_completed = False - is_baseline_generation_mode = ( - os.environ.get("SGLANG_GEN_BASELINE", "0") == "1" - ) - timeout = 3600.0 if is_baseline_generation_mode else 1200.0 - deadline = time.time() + timeout - while True: - page = client.videos.list() # type: ignore[attr-defined] - item = next((v for v in page.data if v.id == video_id), None) - - if item and getattr(item, "status", None) == "completed": - job_completed = True - break - - if time.time() > deadline: - break - - time.sleep(1) - - if not job_completed: - if is_baseline_generation_mode: - logger.warning( - f"{case.id}: video job {video_id} timed out during baseline generation. " - "Attempting to collect performance data anyway." - ) - return video_id - - pytest.fail(f"{case.id}: video job {video_id} did not complete in time") - - # download video - resp = client.videos.download_content(video_id=video_id) # type: ignore[attr-defined] - content = resp.read() - validate_openai_video(content) - return video_id - - # for all tests, seconds = case.seconds or fallback 4 seconds - video_seconds = case.seconds or 4 - - # ------------------------- - # IMAGE MODE - # ------------------------- - - def generate_image() -> str: - """T2I: Text to Image generation.""" - if not case.prompt: - pytest.skip(f"{case.id}: no text prompt configured") - - response = client.images.with_raw_response.generate( - model=case.model_path, - prompt=case.prompt, - n=1, - size=case.output_size, - response_format="b64_json", - ) - result = response.parse() - validate_image(result.data[0].b64_json) - return str(result.created) - - def generate_image_edit() -> str: - """TI2I: Text + Image ? Image edit.""" - if not case.edit_prompt or not case.image_path: - pytest.skip(f"{case.id}: no edit config") - - # Handle URL or local path - if case.is_image_url(): - image_path = download_image_from_url(str(case.image_path)) - else: - image_path = Path(case.image_path) - if not image_path.exists(): - pytest.skip(f"{case.id}: file missing: {image_path}") - - with image_path.open("rb") as fh: - response = client.images.with_raw_response.edit( - model=case.model_path, - image=fh, - prompt=case.edit_prompt, - n=1, - size=case.output_size, - response_format="b64_json", - ) - rid = response.headers.get("x-request-id", "") - - result = response.parse() - validate_image(result.data[0].b64_json) - return rid - - # ------------------------- - # VIDEO MODE - # ------------------------- - - def generate_video() -> str: - """T2V: Text ? Video.""" - if not case.prompt: - pytest.skip(f"{case.id}: no text prompt configured") - - return _create_and_download_video( - model=case.model_path, - prompt=case.prompt, - size=case.output_size, - seconds=video_seconds, - ) - - def generate_image_to_video() -> str: - """I2V: Image ? Video (optional prompt).""" - if not case.image_path: - pytest.skip(f"{case.id}: no input image configured") - - # Handle URL or local path - if case.is_image_url(): - image_path = download_image_from_url(str(case.image_path)) - else: - image_path = Path(case.image_path) - if not image_path.exists(): - pytest.skip(f"{case.id}: file missing: {image_path}") - - with image_path.open("rb") as fh: - return _create_and_download_video( - model=case.model_path, - prompt=case.edit_prompt, - size=case.output_size, - seconds=video_seconds, - input_reference=fh, - ) - - def generate_text_image_to_video() -> str: - """TI2V: Text + Image ? Video.""" - if not case.edit_prompt or not case.image_path: - pytest.skip(f"{case.id}: no edit config") - - # Handle URL or local path - if case.is_image_url(): - image_path = download_image_from_url(str(case.image_path)) - else: - image_path = Path(case.image_path) - if not image_path.exists(): - pytest.skip(f"{case.id}: file missing: {image_path}") - - with image_path.open("rb") as fh: - return _create_and_download_video( - model=case.model_path, - prompt=case.edit_prompt, - size=case.output_size, - seconds=video_seconds, - input_reference=fh, - ) - - if case.modality == "video": - if case.image_path and case.edit_prompt: - return generate_text_image_to_video - elif case.image_path: - return generate_image_to_video - else: - return generate_video - - # Image modality - if case.edit_prompt and case.image_path: - return generate_image_edit - - return generate_image - - def _validate_and_record( - self, - case: DiffusionTestCase, - perf_record: RequestPerfRecord, - ) -> None: - """Validate metrics and record results.""" - is_baseline_generation_mode = os.environ.get("SGLANG_GEN_BASELINE", "0") == "1" - - scenario = BASELINE_CONFIG.scenarios.get(case.id) - missing_scenario = False - if scenario is None: - # Create dummy scenario to allow metric collection - scenario = type( - "DummyScenario", - (), - { - "expected_e2e_ms": 0, - "expected_avg_denoise_ms": 0, - "expected_median_denoise_ms": 0, - "stages_ms": {}, - "denoise_step_ms": {}, - }, - )() - if not is_baseline_generation_mode: - missing_scenario = True - - validator_name = case.custom_validator or "default" - validator_class = VALIDATOR_REGISTRY.get(validator_name, PerformanceValidator) - - validator = validator_class( - scenario=scenario, - tolerances=BASELINE_CONFIG.tolerances, - step_fractions=BASELINE_CONFIG.step_fractions, - ) - - summary = validator.collect_metrics(perf_record) - - if is_baseline_generation_mode or missing_scenario: - self._dump_baseline_for_testcase(case, summary, missing_scenario) - if missing_scenario: - pytest.fail(f"Testcase '{case.id}' not found in perf_baselines.json") - return - - self._check_for_improvement(case, summary, scenario) - - try: - validator.validate(perf_record, case.num_frames) - except AssertionError as e: - logger.error(f"Performance validation failed for {case.id}:\n{e}") - self._dump_baseline_for_testcase(case, summary, missing_scenario) - raise - - result = { - "test_name": case.id, - "modality": case.modality, - "e2e_ms": summary.e2e_ms, - "avg_denoise_ms": summary.avg_denoise_ms, - "median_denoise_ms": summary.median_denoise_ms, - "stage_metrics": summary.stage_metrics, - "sampled_steps": summary.sampled_steps, - } - - # video-specific metrics - if summary.frames_per_second: - result.update( - { - "frames_per_second": summary.frames_per_second, - "total_frames": summary.total_frames, - "avg_frame_time_ms": summary.avg_frame_time_ms, - } - ) - - self.__class__._perf_results.append(result) - - def _check_for_improvement( - self, - case: DiffusionTestCase, - summary: PerformanceSummary, - scenario: "ScenarioConfig", - ) -> None: - """Check for potential significant performance improvements and record them.""" - is_improved = False - threshold = BASELINE_CONFIG.improvement_threshold - - def is_sig_faster(actual, expected): - if expected == 0 or expected is None: - return False - return actual < expected * (1 - threshold) - - def safe_get_metric(metric_dict, key): - val = metric_dict.get(key) - return val if val is not None else float("inf") - - # Check for any significant improvement - if ( - is_sig_faster(summary.e2e_ms, scenario.expected_e2e_ms) - or is_sig_faster(summary.avg_denoise_ms, scenario.expected_avg_denoise_ms) - or is_sig_faster( - summary.median_denoise_ms, scenario.expected_median_denoise_ms - ) - ): - is_improved = True - # Combine metrics, always taking the better (lower) value - new_stages = { - stage: min( - safe_get_metric(summary.stage_metrics, stage), - safe_get_metric(scenario.stages_ms, stage), - ) - for stage in set(summary.stage_metrics) | set(scenario.stages_ms) - } - new_denoise_steps = { - step: min( - safe_get_metric(summary.all_denoise_steps, step), - safe_get_metric(scenario.denoise_step_ms, step), - ) - for step in set(summary.all_denoise_steps.keys()) - | set(scenario.denoise_step_ms) - } - - # Check for stage-level improvements - if not is_improved: - for stage, new_val in new_stages.items(): - if is_sig_faster(new_val, scenario.stages_ms.get(stage, float("inf"))): - is_improved = True - break - if not is_improved: - for step, new_val in new_denoise_steps.items(): - if is_sig_faster( - new_val, scenario.denoise_step_ms.get(step, float("inf")) - ): - is_improved = True - break - - if is_improved: - new_baseline = { - "stages_ms": {k: round(v, 2) for k, v in new_stages.items()}, - "denoise_step_ms": { - str(k): round(v, 2) for k, v in new_denoise_steps.items() - }, - "expected_e2e_ms": round( - min(summary.e2e_ms, scenario.expected_e2e_ms), 2 - ), - "expected_avg_denoise_ms": round( - min(summary.avg_denoise_ms, scenario.expected_avg_denoise_ms), 2 - ), - "expected_median_denoise_ms": round( - min(summary.median_denoise_ms, scenario.expected_median_denoise_ms), - 2, - ), - } - self._improved_baselines.append({"id": case.id, "baseline": new_baseline}) - - def _dump_baseline_for_testcase( - self, - case: DiffusionTestCase, - summary: "PerformanceSummary", - missing_scenario: bool = False, - ) -> None: - """Dump performance metrics as a JSON scenario for baselines.""" - import json - - denoise_steps_formatted = { - str(k): round(v, 2) for k, v in summary.all_denoise_steps.items() - } - stages_formatted = {k: round(v, 2) for k, v in summary.stage_metrics.items()} - - baseline = { - "stages_ms": stages_formatted, - "denoise_step_ms": denoise_steps_formatted, - "expected_e2e_ms": round(summary.e2e_ms, 2), - "expected_avg_denoise_ms": round(summary.avg_denoise_ms, 2), - "expected_median_denoise_ms": round(summary.median_denoise_ms, 2), - } - - # Video-specific metrics - if case.modality == "video": - if "per_frame_generation" not in baseline["stages_ms"]: - baseline["stages_ms"]["per_frame_generation"] = ( - round(summary.avg_frame_time_ms, 2) - if summary.avg_frame_time_ms - else None - ) - action = "add" if missing_scenario else "update" - output = f""" -{action} this baseline in the "scenarios" section of perf_baselines.json: - -"{case.id}": {json.dumps(baseline, indent=4)} - -""" - logger.error(output) - logger.error(output) - - def test_diffusion_perf( - self, - case: DiffusionTestCase, - diffusion_server: ServerContext, - ): - """Single parametrized test that runs for all cases. - - Pytest will execute this test once per case in ONE_GPU_CASES, - with test IDs like: - - test_diffusion_perf[qwen_image_text] - - test_diffusion_perf[qwen_image_edit] - - etc. - """ - generate_fn = self.get_generate_fn(diffusion_server, case) - perf_record = self.run_and_collect( - diffusion_server, - generate_fn, - ) - self._validate_and_record(case, perf_record) diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 872bc0236845..d0089a44366c 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -33,8 +33,7 @@ class ToleranceConfig: """Tolerance ratios for performance validation.""" e2e: float - denoise_stage: float - non_denoise_stage: float + stage: float denoise_step: float denoise_agg: float @@ -69,15 +68,7 @@ def load(cls, path: Path) -> BaselineConfig: tol_data = data["tolerances"] tolerances = ToleranceConfig( e2e=float(os.getenv("SGLANG_E2E_TOLERANCE", tol_data["e2e"])), - denoise_stage=float( - os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["denoise_stage"]) - ), - non_denoise_stage=float( - os.getenv( - "SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE", - tol_data["non_denoise_stage"], - ) - ), + stage=float(os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["stage"])), denoise_step=float( os.getenv("SGLANG_DENOISE_STEP_TOLERANCE", tol_data["denoise_step"]) ), @@ -257,9 +248,17 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), -] - -TWO_GPU_CASES_A = [ + # NOTE(mick): flaky + # DiffusionTestCase( + # id="hunyuan_video", + # model_path="hunyuanvideo-community/HunyuanVideo", + # modality="video", + # prompt="A curious raccoon", + # output_size="720x480", + # warmup_text=0, + # warmup_edit=0, + # custom_validator="video", + # ), DiffusionTestCase( id="fast_hunyuan_video", model_path="FastVideo/FastHunyuan-diffusers", @@ -270,9 +269,7 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), -] - -TWO_GPU_CASES_B = [ + # === Text and Image to Video (TI2V) === DiffusionTestCase( id="wan2_2_ti2v_5b", model_path="Wan-AI/Wan2.2-TI2V-5B-Diffusers", @@ -299,7 +296,7 @@ def from_req_perf_record( ), ] -TWO_GPU_CASES = [ +TWO_GPU_CASES_A = [ DiffusionTestCase( id="wan2_2_i2v_a14b_2gpu", model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers", @@ -336,6 +333,9 @@ def from_req_perf_record( custom_validator="video", num_gpus=2, ), +] + +TWO_GPU_CASES_B = [ DiffusionTestCase( id="wan2_1_i2v_14b_480P_2gpu", model_path="Wan-AI/Wan2.1-I2V-14B-480P-Diffusers", From 32c809a8daa4725829efe485396f17b208276731 Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:15:33 +0800 Subject: [PATCH 04/11] upd --- .../test/server/test_server_2_gpu_a.py | 25 + .../test/server/test_server_2_gpu_b.py | 4 +- .../test/server/test_server_a.py | 31 + .../test/server/test_server_b.py | 31 + .../test/server/test_server_common.py | 564 ++++++++++++++++++ 5 files changed, 653 insertions(+), 2 deletions(-) create mode 100644 python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py create mode 100644 python/sglang/multimodal_gen/test/server/test_server_a.py create mode 100644 python/sglang/multimodal_gen/test/server/test_server_b.py create mode 100644 python/sglang/multimodal_gen/test/server/test_server_common.py diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py new file mode 100644 index 000000000000..4dac4dd49302 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py @@ -0,0 +1,25 @@ +""" +2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, + diffusion_server, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + TWO_GPU_CASES_A, + DiffusionTestCase, +) + + +class TestDiffusionServerTwoGpu(DiffusionServerBase): + """Performance tests for 2-GPU diffusion cases.""" + + @pytest.fixture(params=TWO_GPU_CASES_A, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 2-GPU test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py index cc4e18cae1f5..6c6e691325fb 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py @@ -7,7 +7,7 @@ import pytest from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 - DiffusionPerformanceBase, + DiffusionServerBase, diffusion_server, ) from sglang.multimodal_gen.test.server.testcase_configs import ( @@ -16,7 +16,7 @@ ) -class TestDiffusionPerformanceTwoGpu(DiffusionPerformanceBase): +class TestDiffusionServerTwoGpu(DiffusionServerBase): """Performance tests for 2-GPU diffusion cases.""" @pytest.fixture(params=TWO_GPU_CASES_B, ids=lambda c: c.id) diff --git a/python/sglang/multimodal_gen/test/server/test_server_a.py b/python/sglang/multimodal_gen/test/server/test_server_a.py new file mode 100644 index 000000000000..fdf072ec89e1 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_a.py @@ -0,0 +1,31 @@ +""" +Config-driven diffusion performance test with pytest parametrization. + + +If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, + diffusion_server, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + ONE_GPU_CASES_A, + DiffusionTestCase, +) + +logger = init_logger(__name__) + + +class TestDiffusionServerOneGpu(DiffusionServerBase): + """Performance tests for 1-GPU diffusion cases.""" + + @pytest.fixture(params=ONE_GPU_CASES_A, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 1-GPU test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_b.py b/python/sglang/multimodal_gen/test/server/test_server_b.py new file mode 100644 index 000000000000..1a0432db6f3b --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_b.py @@ -0,0 +1,31 @@ +""" +Config-driven diffusion performance test with pytest parametrization. + + +If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + DiffusionServerBase, + diffusion_server, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + ONE_GPU_CASES_B, + DiffusionTestCase, +) + +logger = init_logger(__name__) + + +class TestDiffusionServerOneGpu(DiffusionServerBase): + """Performance tests for 1-GPU diffusion cases.""" + + @pytest.fixture(params=ONE_GPU_CASES_B, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 1-GPU test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_common.py b/python/sglang/multimodal_gen/test/server/test_server_common.py new file mode 100644 index 000000000000..c7bf6fbadba3 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_common.py @@ -0,0 +1,564 @@ +""" +Config-driven diffusion performance test with pytest parametrization. + + +If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed +""" + +from __future__ import annotations + +import os +import time +from pathlib import Path +from typing import Any, Callable + +import pytest +from openai import OpenAI + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger +from sglang.multimodal_gen.runtime.utils.perf_logger import RequestPerfRecord +from sglang.multimodal_gen.test.server.conftest import _GLOBAL_PERF_RESULTS +from sglang.multimodal_gen.test.server.test_server_utils import ( + VALIDATOR_REGISTRY, + PerformanceValidator, + ServerContext, + ServerManager, + WarmupRunner, + download_image_from_url, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + BASELINE_CONFIG, + DiffusionTestCase, + PerformanceSummary, + ScenarioConfig, +) +from sglang.multimodal_gen.test.test_utils import ( + get_dynamic_server_port, + read_perf_logs, + validate_image, + validate_openai_video, + wait_for_req_perf_record, +) + +logger = init_logger(__name__) + + +@pytest.fixture +def diffusion_server(case: DiffusionTestCase) -> ServerContext: + """Start a diffusion server for a single case and tear it down afterwards.""" + default_port = get_dynamic_server_port() + port = int(os.environ.get("SGLANG_TEST_SERVER_PORT", default_port)) + + extra_args = os.environ.get("SGLANG_TEST_SERVE_ARGS", "") + extra_args += f" --num-gpus {case.num_gpus} --ulysses-degree {case.num_gpus}" + + # start server + manager = ServerManager( + model=case.model_path, + port=port, + wait_deadline=float(os.environ.get("SGLANG_TEST_WAIT_SECS", "1200")), + extra_args=extra_args, + ) + ctx = manager.start() + + try: + warmup = WarmupRunner( + port=ctx.port, + model=case.model_path, + prompt=case.prompt or "A colorful raccoon icon", + output_size=case.output_size, + ) + warmup.run_text_warmups(case.warmup_text) + + if case.warmup_edit > 0 and case.edit_prompt and case.image_path: + # Handle URL or local path + image_path = case.image_path + if case.is_image_url(): + image_path = download_image_from_url(str(case.image_path)) + else: + image_path = Path(case.image_path) + + warmup.run_edit_warmups( + count=case.warmup_edit, + edit_prompt=case.edit_prompt, + image_path=image_path, + ) + except Exception as exc: + logger.error("Warm-up failed for %s: %s", case.id, exc) + ctx.cleanup() + raise + + try: + yield ctx + finally: + ctx.cleanup() + + +class DiffusionServerBase: + """Performance tests for all diffusion models/scenarios. + + This single test class runs against all cases defined in ONE_GPU_CASES. + Each case gets its own server instance via the parametrized fixture. + """ + + _perf_results: list[dict[str, Any]] = [] + _improved_baselines: list[dict[str, Any]] = [] + + @classmethod + def setup_class(cls): + cls._perf_results = [] + cls._improved_baselines = [] + + @classmethod + def teardown_class(cls): + for result in cls._perf_results: + result["class_name"] = cls.__name__ + _GLOBAL_PERF_RESULTS.append(result) + + if cls._improved_baselines: + import json + + output = """ +--- POTENTIAL BASELINE IMPROVEMENTS DETECTED --- +The following test cases performed significantly better than their baselines. +Consider updating perf_baselines.json with the snippets below: +""" + for item in cls._improved_baselines: + output += ( + f'\n"{item["id"]}": {json.dumps(item["baseline"], indent=4)},\n' + ) + print(output) + + def _client(self, ctx: ServerContext) -> OpenAI: + """Get OpenAI client for the server.""" + return OpenAI( + api_key="sglang-anything", + base_url=f"http://localhost:{ctx.port}/v1", + ) + + def run_and_collect( + self, + ctx: ServerContext, + generate_fn: Callable[[], str], + ) -> RequestPerfRecord: + """Run generation and collect performance records.""" + log_path = ctx.perf_log_path + prev_len = len(read_perf_logs(log_path)) + log_wait_timeout = 30 + + rid = generate_fn() + + req_perf_record, _ = wait_for_req_perf_record( + rid, + prev_len, + log_path, + timeout=log_wait_timeout, + ) + + return req_perf_record + + def get_generate_fn( + self, + ctx: ServerContext, + case: DiffusionTestCase, + ) -> Callable[[], str]: + """Return appropriate generation function for the case.""" + client = self._client(ctx) + + def _create_and_download_video( + *, + model: str, + size: str, + prompt: str | None = None, + seconds: int | None = None, + input_reference: Any | None = None, + ) -> str: + """ + Create a video job via /v1/videos, poll until completion, + then download the binary content and validate it. + """ + create_kwargs: dict[str, Any] = { + "model": model, + "size": size, + } + if prompt is not None: + create_kwargs["prompt"] = prompt + if seconds is not None: + create_kwargs["seconds"] = seconds + if input_reference is not None: + create_kwargs["input_reference"] = input_reference # triggers multipart + + # create video job + job = client.videos.create(**create_kwargs) # type: ignore[attr-defined] + video_id = job.id + + job_completed = False + is_baseline_generation_mode = ( + os.environ.get("SGLANG_GEN_BASELINE", "0") == "1" + ) + timeout = 3600.0 if is_baseline_generation_mode else 1200.0 + deadline = time.time() + timeout + while True: + page = client.videos.list() # type: ignore[attr-defined] + item = next((v for v in page.data if v.id == video_id), None) + + if item and getattr(item, "status", None) == "completed": + job_completed = True + break + + if time.time() > deadline: + break + + time.sleep(1) + + if not job_completed: + if is_baseline_generation_mode: + logger.warning( + f"{case.id}: video job {video_id} timed out during baseline generation. " + "Attempting to collect performance data anyway." + ) + return video_id + + pytest.fail(f"{case.id}: video job {video_id} did not complete in time") + + # download video + resp = client.videos.download_content(video_id=video_id) # type: ignore[attr-defined] + content = resp.read() + validate_openai_video(content) + return video_id + + # for all tests, seconds = case.seconds or fallback 4 seconds + video_seconds = case.seconds or 4 + + # ------------------------- + # IMAGE MODE + # ------------------------- + + def generate_image() -> str: + """T2I: Text to Image generation.""" + if not case.prompt: + pytest.skip(f"{case.id}: no text prompt configured") + + response = client.images.with_raw_response.generate( + model=case.model_path, + prompt=case.prompt, + n=1, + size=case.output_size, + response_format="b64_json", + ) + result = response.parse() + validate_image(result.data[0].b64_json) + return str(result.created) + + def generate_image_edit() -> str: + """TI2I: Text + Image ? Image edit.""" + if not case.edit_prompt or not case.image_path: + pytest.skip(f"{case.id}: no edit config") + + # Handle URL or local path + if case.is_image_url(): + image_path = download_image_from_url(str(case.image_path)) + else: + image_path = Path(case.image_path) + if not image_path.exists(): + pytest.skip(f"{case.id}: file missing: {image_path}") + + with image_path.open("rb") as fh: + response = client.images.with_raw_response.edit( + model=case.model_path, + image=fh, + prompt=case.edit_prompt, + n=1, + size=case.output_size, + response_format="b64_json", + ) + rid = response.headers.get("x-request-id", "") + + result = response.parse() + validate_image(result.data[0].b64_json) + return rid + + # ------------------------- + # VIDEO MODE + # ------------------------- + + def generate_video() -> str: + """T2V: Text ? Video.""" + if not case.prompt: + pytest.skip(f"{case.id}: no text prompt configured") + + return _create_and_download_video( + model=case.model_path, + prompt=case.prompt, + size=case.output_size, + seconds=video_seconds, + ) + + def generate_image_to_video() -> str: + """I2V: Image ? Video (optional prompt).""" + if not case.image_path: + pytest.skip(f"{case.id}: no input image configured") + + # Handle URL or local path + if case.is_image_url(): + image_path = download_image_from_url(str(case.image_path)) + else: + image_path = Path(case.image_path) + if not image_path.exists(): + pytest.skip(f"{case.id}: file missing: {image_path}") + + with image_path.open("rb") as fh: + return _create_and_download_video( + model=case.model_path, + prompt=case.edit_prompt, + size=case.output_size, + seconds=video_seconds, + input_reference=fh, + ) + + def generate_text_image_to_video() -> str: + """TI2V: Text + Image ? Video.""" + if not case.edit_prompt or not case.image_path: + pytest.skip(f"{case.id}: no edit config") + + # Handle URL or local path + if case.is_image_url(): + image_path = download_image_from_url(str(case.image_path)) + else: + image_path = Path(case.image_path) + if not image_path.exists(): + pytest.skip(f"{case.id}: file missing: {image_path}") + + with image_path.open("rb") as fh: + return _create_and_download_video( + model=case.model_path, + prompt=case.edit_prompt, + size=case.output_size, + seconds=video_seconds, + input_reference=fh, + ) + + if case.modality == "video": + if case.image_path and case.edit_prompt: + return generate_text_image_to_video + elif case.image_path: + return generate_image_to_video + else: + return generate_video + + # Image modality + if case.edit_prompt and case.image_path: + return generate_image_edit + + return generate_image + + def _validate_and_record( + self, + case: DiffusionTestCase, + perf_record: RequestPerfRecord, + ) -> None: + """Validate metrics and record results.""" + is_baseline_generation_mode = os.environ.get("SGLANG_GEN_BASELINE", "0") == "1" + + scenario = BASELINE_CONFIG.scenarios.get(case.id) + missing_scenario = False + if scenario is None: + # Create dummy scenario to allow metric collection + scenario = type( + "DummyScenario", + (), + { + "expected_e2e_ms": 0, + "expected_avg_denoise_ms": 0, + "expected_median_denoise_ms": 0, + "stages_ms": {}, + "denoise_step_ms": {}, + }, + )() + if not is_baseline_generation_mode: + missing_scenario = True + + validator_name = case.custom_validator or "default" + validator_class = VALIDATOR_REGISTRY.get(validator_name, PerformanceValidator) + + validator = validator_class( + scenario=scenario, + tolerances=BASELINE_CONFIG.tolerances, + step_fractions=BASELINE_CONFIG.step_fractions, + ) + + summary = validator.collect_metrics(perf_record) + + if is_baseline_generation_mode or missing_scenario: + self._dump_baseline_for_testcase(case, summary, missing_scenario) + if missing_scenario: + pytest.fail(f"Testcase '{case.id}' not found in perf_baselines.json") + return + + self._check_for_improvement(case, summary, scenario) + + try: + validator.validate(perf_record, case.num_frames) + except AssertionError as e: + logger.error(f"Performance validation failed for {case.id}:\n{e}") + self._dump_baseline_for_testcase(case, summary, missing_scenario) + raise + + result = { + "test_name": case.id, + "modality": case.modality, + "e2e_ms": summary.e2e_ms, + "avg_denoise_ms": summary.avg_denoise_ms, + "median_denoise_ms": summary.median_denoise_ms, + "stage_metrics": summary.stage_metrics, + "sampled_steps": summary.sampled_steps, + } + + # video-specific metrics + if summary.frames_per_second: + result.update( + { + "frames_per_second": summary.frames_per_second, + "total_frames": summary.total_frames, + "avg_frame_time_ms": summary.avg_frame_time_ms, + } + ) + + self.__class__._perf_results.append(result) + + def _check_for_improvement( + self, + case: DiffusionTestCase, + summary: PerformanceSummary, + scenario: "ScenarioConfig", + ) -> None: + """Check for potential significant performance improvements and record them.""" + is_improved = False + threshold = BASELINE_CONFIG.improvement_threshold + + def is_sig_faster(actual, expected): + if expected == 0 or expected is None: + return False + return actual < expected * (1 - threshold) + + def safe_get_metric(metric_dict, key): + val = metric_dict.get(key) + return val if val is not None else float("inf") + + # Check for any significant improvement + if ( + is_sig_faster(summary.e2e_ms, scenario.expected_e2e_ms) + or is_sig_faster(summary.avg_denoise_ms, scenario.expected_avg_denoise_ms) + or is_sig_faster( + summary.median_denoise_ms, scenario.expected_median_denoise_ms + ) + ): + is_improved = True + # Combine metrics, always taking the better (lower) value + new_stages = { + stage: min( + safe_get_metric(summary.stage_metrics, stage), + safe_get_metric(scenario.stages_ms, stage), + ) + for stage in set(summary.stage_metrics) | set(scenario.stages_ms) + } + new_denoise_steps = { + step: min( + safe_get_metric(summary.all_denoise_steps, step), + safe_get_metric(scenario.denoise_step_ms, step), + ) + for step in set(summary.all_denoise_steps.keys()) + | set(scenario.denoise_step_ms) + } + + # Check for stage-level improvements + if not is_improved: + for stage, new_val in new_stages.items(): + if is_sig_faster(new_val, scenario.stages_ms.get(stage, float("inf"))): + is_improved = True + break + if not is_improved: + for step, new_val in new_denoise_steps.items(): + if is_sig_faster( + new_val, scenario.denoise_step_ms.get(step, float("inf")) + ): + is_improved = True + break + + if is_improved: + new_baseline = { + "stages_ms": {k: round(v, 2) for k, v in new_stages.items()}, + "denoise_step_ms": { + str(k): round(v, 2) for k, v in new_denoise_steps.items() + }, + "expected_e2e_ms": round( + min(summary.e2e_ms, scenario.expected_e2e_ms), 2 + ), + "expected_avg_denoise_ms": round( + min(summary.avg_denoise_ms, scenario.expected_avg_denoise_ms), 2 + ), + "expected_median_denoise_ms": round( + min(summary.median_denoise_ms, scenario.expected_median_denoise_ms), + 2, + ), + } + self._improved_baselines.append({"id": case.id, "baseline": new_baseline}) + + def _dump_baseline_for_testcase( + self, + case: DiffusionTestCase, + summary: "PerformanceSummary", + missing_scenario: bool = False, + ) -> None: + """Dump performance metrics as a JSON scenario for baselines.""" + import json + + denoise_steps_formatted = { + str(k): round(v, 2) for k, v in summary.all_denoise_steps.items() + } + stages_formatted = {k: round(v, 2) for k, v in summary.stage_metrics.items()} + + baseline = { + "stages_ms": stages_formatted, + "denoise_step_ms": denoise_steps_formatted, + "expected_e2e_ms": round(summary.e2e_ms, 2), + "expected_avg_denoise_ms": round(summary.avg_denoise_ms, 2), + "expected_median_denoise_ms": round(summary.median_denoise_ms, 2), + } + + # Video-specific metrics + if case.modality == "video": + if "per_frame_generation" not in baseline["stages_ms"]: + baseline["stages_ms"]["per_frame_generation"] = ( + round(summary.avg_frame_time_ms, 2) + if summary.avg_frame_time_ms + else None + ) + action = "add" if missing_scenario else "update" + output = f""" +{action} this baseline in the "scenarios" section of perf_baselines.json: + +"{case.id}": {json.dumps(baseline, indent=4)} + +""" + logger.error(output) + + def test_diffusion_perf( + self, + case: DiffusionTestCase, + diffusion_server: ServerContext, + ): + """Single parametrized test that runs for all cases. + + Pytest will execute this test once per case in ONE_GPU_CASES, + with test IDs like: + - test_diffusion_perf[qwen_image_text] + - test_diffusion_perf[qwen_image_edit] + - etc. + """ + generate_fn = self.get_generate_fn(diffusion_server, case) + perf_record = self.run_and_collect( + diffusion_server, + generate_fn, + ) + self._validate_and_record(case, perf_record) From 6686edb49fb7139549fa3917d7c0272e011633eb Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:24:54 +0800 Subject: [PATCH 05/11] upd --- .../multimodal_gen/test/server/testcase_configs.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index d0089a44366c..d02b4dd1d858 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -33,7 +33,8 @@ class ToleranceConfig: """Tolerance ratios for performance validation.""" e2e: float - stage: float + denoise_stage: float + non_denoise_stage: float denoise_step: float denoise_agg: float @@ -68,7 +69,15 @@ def load(cls, path: Path) -> BaselineConfig: tol_data = data["tolerances"] tolerances = ToleranceConfig( e2e=float(os.getenv("SGLANG_E2E_TOLERANCE", tol_data["e2e"])), - stage=float(os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["stage"])), + denoise_stage=float( + os.getenv("SGLANG_STAGE_TIME_TOLERANCE", tol_data["denoise_stage"]) + ), + non_denoise_stage=float( + os.getenv( + "SGLANG_NON_DENOISE_STAGE_TIME_TOLERANCE", + tol_data["non_denoise_stage"], + ) + ), denoise_step=float( os.getenv("SGLANG_DENOISE_STEP_TOLERANCE", tol_data["denoise_step"]) ), @@ -236,6 +245,7 @@ def from_req_perf_record( ), ] + ONE_GPU_CASES_B: list[DiffusionTestCase] = [ # === Text to Video (T2V) === DiffusionTestCase( From 134f98c8eb9b688c8a9ead51aa6d2161b1b10714 Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:33:22 +0800 Subject: [PATCH 06/11] upd --- .github/workflows/pr-test.yml | 2 +- .../sglang/multimodal_gen/test/run_suite.py | 23 ++++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index e387c96a9336..9f2f33d86086 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -402,7 +402,7 @@ jobs: cd python python3 sglang/multimodal_gen/test/run_suite.py \ --partition-id ${{ matrix.part }} \ - --total-partitions 1 \ + --total-partitions 2 \ --target-dir server \ --pattern "test_server_2_gpu*.py" diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py index 3eb03db398b5..0f4c9c3a3ace 100644 --- a/python/sglang/multimodal_gen/test/run_suite.py +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -4,8 +4,8 @@ This script scans the `multimodal_gen/test/server/` directory for test files matching a pattern and executes a subset of them based on the partition ID How to add a new test: -1. Create a new test file in `python/sglang/multimodal_gen/test/server/`. -2. Name it matching the pattern `test_server_*.py` (e.g., `test_server_c.py`). +1. Create a new test file in `python/sglang/multimodal_gen/test/server/` +2. Name it matching the pattern it belongs to 3. The CI will automatically pick it up and distribute it to one of the runners. """ @@ -47,7 +47,7 @@ def parse_args(): def get_test_files(base_dir, sub_dir, pattern): - """Find all test files matching the pattern.""" + """find all test files matching the pattern""" search_path = os.path.join(base_dir, sub_dir, pattern) files = sorted(glob.glob(search_path)) return files @@ -56,12 +56,10 @@ def get_test_files(base_dir, sub_dir, pattern): def main(): args = parse_args() - # Determine the absolute path of the test directory - # Assuming this script is located at python/sglang/multimodal_gen/test/run_suite.py current_file_path = Path(__file__).resolve() test_root_dir = current_file_path.parent - # 1. Discover Test Files + # 1. get all test files all_files = get_test_files(str(test_root_dir), args.target_dir, args.pattern) if not all_files: @@ -70,10 +68,7 @@ def main(): ) sys.exit(0) # Exit gracefully if no files found - print(f"Found {len(all_files)} test files total.") - - # 2. Partitioning (Distribute files across runners) - # Using simple interleaving: file 0 -> runner 0, file 1 -> runner 1, file 2 -> runner 0... + # 2. partitioning the test files my_files = [ f for i, f in enumerate(all_files) @@ -86,14 +81,10 @@ def main(): ) sys.exit(0) - print(f"Running {len(my_files)} files on this partition:") for f in my_files: print(f" - {os.path.basename(f)}") - # 3. Execute Pytest - # Construct the pytest command - # -s: show stdout - # -v: verbose + # 3. execute the tests, one file at a time cmd = [ sys.executable, "-m", @@ -103,7 +94,7 @@ def main(): "--log-cli-level=INFO", ] + my_files - print(f"Executing command: {' '.join(cmd)}") + print(f"Running command: {' '.join(cmd)}") result = subprocess.run(cmd) sys.exit(result.returncode) From c6c7a4664c6a0eddc48b7034f6a255b4e0b5e76f Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:37:18 +0800 Subject: [PATCH 07/11] upd --- .github/workflows/pr-test.yml | 9 +- .../sglang/multimodal_gen/test/run_suite.py | 115 ++++++++++-------- 2 files changed, 71 insertions(+), 53 deletions(-) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 9f2f33d86086..9744da045426 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -387,7 +387,7 @@ jobs: fail-fast: false max-parallel: 5 matrix: - part: [0, 1] + part: [0] steps: - name: Checkout code uses: actions/checkout@v4 @@ -396,15 +396,14 @@ jobs: run: | CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh diffusion - - name: Run diffusion server tests (2 GPU) + - name: Run diffusion server tests timeout-minutes: 60 run: | cd python python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 2-gpu \ --partition-id ${{ matrix.part }} \ - --total-partitions 2 \ - --target-dir server \ - --pattern "test_server_2_gpu*.py" + --total-partitions 1 unit-test-backend-1-gpu: needs: [check-changes, stage-a-test-1] diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py index 0f4c9c3a3ace..b705b1109407 100644 --- a/python/sglang/multimodal_gen/test/run_suite.py +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -1,24 +1,41 @@ """ -Helper script to discover and run multimodal generation tests +Test runner for multimodal_gen that manages test suites and parallel execution. -This script scans the `multimodal_gen/test/server/` directory for test files matching a pattern and executes a subset of them based on the partition ID +Usage: + python3 run_suite.py --suite --partition-id --total-partitions -How to add a new test: -1. Create a new test file in `python/sglang/multimodal_gen/test/server/` -2. Name it matching the pattern it belongs to -3. The CI will automatically pick it up and distribute it to one of the runners. +Example: + python3 run_suite.py --suite 1-gpu --partition-id 0 --total-partitions 2 """ import argparse -import glob import os import subprocess import sys from pathlib import Path +SUITES = { + "1-gpu": [ + "test_server_perf_a.py", + "test_server_perf_b.py", + # add new 1-gpu test files here + ], + "2-gpu": [ + "test_server_perf_2_gpu.py", + # add new 2-gpu test files here + ], +} + def parse_args(): parser = argparse.ArgumentParser(description="Run multimodal_gen test suite") + parser.add_argument( + "--suite", + type=str, + required=True, + choices=list(SUITES.keys()), + help="The test suite to run (e.g., 1-gpu, 2-gpu)", + ) parser.add_argument( "--partition-id", type=int, @@ -32,72 +49,74 @@ def parse_args(): help="Total number of partitions", ) parser.add_argument( - "--target-dir", + "--base-dir", type=str, default="server", - help="Sub-directory under multimodal_gen/test to look for tests", - ) - parser.add_argument( - "--pattern", - type=str, - default="test_server_*.py", - help="Glob pattern to match test files", + help="Base directory for tests relative to this script's parent", ) return parser.parse_args() -def get_test_files(base_dir, sub_dir, pattern): - """find all test files matching the pattern""" - search_path = os.path.join(base_dir, sub_dir, pattern) - files = sorted(glob.glob(search_path)) - return files +def run_pytest(files): + if not files: + print("No files to run.") + return 0 + + cmd = [sys.executable, "-m", "pytest", "-s", "-v", "--log-cli-level=INFO"] + files + + print(f"Executing command: {' '.join(cmd)}") + result = subprocess.run(cmd) + return result.returncode def main(): args = parse_args() + # 1. resolve base path current_file_path = Path(__file__).resolve() test_root_dir = current_file_path.parent + target_dir = test_root_dir / args.base_dir + + if not target_dir.exists(): + print(f"Error: Target directory {target_dir} does not exist.") + sys.exit(1) - # 1. get all test files - all_files = get_test_files(str(test_root_dir), args.target_dir, args.pattern) + # 2. get files from suite + suite_files_rel = SUITES[args.suite] - if not all_files: - print( - f"No test files found in {os.path.join(test_root_dir, args.target_dir)} matching {args.pattern}" - ) - sys.exit(0) # Exit gracefully if no files found + suite_files_abs = [] + for f_rel in suite_files_rel: + f_abs = target_dir / f_rel + if not f_abs.exists(): + print(f"Warning: Test file {f_rel} not found in {target_dir}. Skipping.") + continue + suite_files_abs.append(str(f_abs)) - # 2. partitioning the test files + if not suite_files_abs: + print(f"No valid test files found for suite '{args.suite}'.") + sys.exit(0) + + # 3. partitioning my_files = [ f - for i, f in enumerate(all_files) + for i, f in enumerate(suite_files_abs) if i % args.total_partitions == args.partition_id ] - if not my_files: - print( - f"Partition {args.partition_id}/{args.total_partitions} has no files to run. Skipping." - ) - sys.exit(0) - + print( + f"Suite: {args.suite} | Partition: {args.partition_id}/{args.total_partitions}" + ) + print(f"Selected {len(my_files)} files:") for f in my_files: print(f" - {os.path.basename(f)}") - # 3. execute the tests, one file at a time - cmd = [ - sys.executable, - "-m", - "pytest", - "-s", - "-v", - "--log-cli-level=INFO", - ] + my_files - - print(f"Running command: {' '.join(cmd)}") + if not my_files: + print("No files assigned to this partition. Exiting success.") + sys.exit(0) - result = subprocess.run(cmd) - sys.exit(result.returncode) + # 4. execute + exit_code = run_pytest(my_files) + sys.exit(exit_code) if __name__ == "__main__": From 77907d4b32a77b8fa9ef007283e5a22427682d0e Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:40:28 +0800 Subject: [PATCH 08/11] upd --- .../test/server/test_server_2_gpu_a.py | 2 +- .../test/server/test_server_2_gpu_b.py | 2 +- .../test/server/test_server_perf_2_gpu.py | 25 --------------- .../test/server/test_server_perf_a.py | 31 ------------------- .../test/server/test_server_perf_b.py | 31 ------------------- .../test/server/test_server_perf_common.py | 0 6 files changed, 2 insertions(+), 89 deletions(-) delete mode 100644 python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py delete mode 100644 python/sglang/multimodal_gen/test/server/test_server_perf_a.py delete mode 100644 python/sglang/multimodal_gen/test/server/test_server_perf_b.py delete mode 100644 python/sglang/multimodal_gen/test/server/test_server_perf_common.py diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py index 4dac4dd49302..3668f63e6334 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_a.py @@ -1,5 +1,5 @@ """ -2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. +2 GPU tests """ from __future__ import annotations diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py index 6c6e691325fb..2c9b5cdc7640 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_b.py @@ -1,5 +1,5 @@ """ -2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. +2 GPU tests """ from __future__ import annotations diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py b/python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py deleted file mode 100644 index 1a85e9a87523..000000000000 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_2_gpu.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -2 GPU Performance tests (A14B models) with --num-gpus 2 --ulysses-degree 2. -""" - -from __future__ import annotations - -import pytest - -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, - diffusion_server, -) -from sglang.multimodal_gen.test.server.testcase_configs import ( - TWO_GPU_CASES, - DiffusionTestCase, -) - - -class TestDiffusionPerformanceTwoGpu(DiffusionPerformanceBase): - """Performance tests for 2-GPU diffusion cases.""" - - @pytest.fixture(params=TWO_GPU_CASES, ids=lambda c: c.id) - def case(self, request) -> DiffusionTestCase: - """Provide a DiffusionTestCase for each 2-GPU test.""" - return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_a.py b/python/sglang/multimodal_gen/test/server/test_server_perf_a.py deleted file mode 100644 index f99b410bf14c..000000000000 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_a.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Config-driven diffusion performance test with pytest parametrization. - - -If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed -""" - -from __future__ import annotations - -import pytest - -from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, - diffusion_server, -) -from sglang.multimodal_gen.test.server.testcase_configs import ( - ONE_GPU_CASES_A, - DiffusionTestCase, -) - -logger = init_logger(__name__) - - -class TestDiffusionPerformanceOneGpu(DiffusionPerformanceBase): - """Performance tests for 1-GPU diffusion cases.""" - - @pytest.fixture(params=ONE_GPU_CASES_A, ids=lambda c: c.id) - def case(self, request) -> DiffusionTestCase: - """Provide a DiffusionTestCase for each 1-GPU test.""" - return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py b/python/sglang/multimodal_gen/test/server/test_server_perf_b.py deleted file mode 100644 index 0faa8fc647eb..000000000000 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Config-driven diffusion performance test with pytest parametrization. - - -If the actual run is significantly better than the baseline, the improved cases with their updated baseline will be printed -""" - -from __future__ import annotations - -import pytest - -from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger -from sglang.multimodal_gen.test.server.test_server_perf_common import ( # noqa: F401 - DiffusionPerformanceBase, - diffusion_server, -) -from sglang.multimodal_gen.test.server.testcase_configs import ( - ONE_GPU_CASES_B, - DiffusionTestCase, -) - -logger = init_logger(__name__) - - -class TestDiffusionPerformanceOneGpu(DiffusionPerformanceBase): - """Performance tests for 1-GPU diffusion cases.""" - - @pytest.fixture(params=ONE_GPU_CASES_B, ids=lambda c: c.id) - def case(self, request) -> DiffusionTestCase: - """Provide a DiffusionTestCase for each 1-GPU test.""" - return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_common.py b/python/sglang/multimodal_gen/test/server/test_server_perf_common.py deleted file mode 100644 index e69de29bb2d1..000000000000 From cfe1a1522ced371978a96ffd4ce2da40c9e7a31f Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:42:29 +0800 Subject: [PATCH 09/11] upd --- python/sglang/multimodal_gen/test/run_suite.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py index b705b1109407..6000fb4951f7 100644 --- a/python/sglang/multimodal_gen/test/run_suite.py +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -16,12 +16,13 @@ SUITES = { "1-gpu": [ - "test_server_perf_a.py", - "test_server_perf_b.py", + "test_server_a.py", + "test_server_b.py", # add new 1-gpu test files here ], "2-gpu": [ - "test_server_perf_2_gpu.py", + "test_server_2_gpu_a.py", + "test_server_2_gpu_b.py", # add new 2-gpu test files here ], } From d17f4e9145538a57477582aaef60358bae5a0a48 Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 18:43:16 +0800 Subject: [PATCH 10/11] upd --- .github/workflows/pr-test.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 9744da045426..7648abe1e797 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -373,10 +373,9 @@ jobs: run: | cd python python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 1-gpu \ --partition-id ${{ matrix.part }} \ --total-partitions 2 \ - --target-dir server \ - --pattern "test_server_([A-Za-z].*)\.p" multimodal-gen-test-2-gpu: @@ -387,7 +386,7 @@ jobs: fail-fast: false max-parallel: 5 matrix: - part: [0] + part: [0, 1] steps: - name: Checkout code uses: actions/checkout@v4 @@ -403,7 +402,7 @@ jobs: python3 sglang/multimodal_gen/test/run_suite.py \ --suite 2-gpu \ --partition-id ${{ matrix.part }} \ - --total-partitions 1 + --total-partitions 2 unit-test-backend-1-gpu: needs: [check-changes, stage-a-test-1] From b1b9f3b7e9248e0cbbb5af14a6f88398677b51df Mon Sep 17 00:00:00 2001 From: Mick Date: Sun, 23 Nov 2025 19:04:57 +0800 Subject: [PATCH 11/11] update baseline --- .../sglang/multimodal_gen/test/run_suite.py | 6 +- .../test/server/perf_baselines.json | 115 +++++++++--------- 2 files changed, 63 insertions(+), 58 deletions(-) diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py index 6000fb4951f7..02442882418c 100644 --- a/python/sglang/multimodal_gen/test/run_suite.py +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -14,6 +14,10 @@ import sys from pathlib import Path +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger + +logger = init_logger(__name__) + SUITES = { "1-gpu": [ "test_server_a.py", @@ -65,7 +69,7 @@ def run_pytest(files): cmd = [sys.executable, "-m", "pytest", "-s", "-v", "--log-cli-level=INFO"] + files - print(f"Executing command: {' '.join(cmd)}") + logger.info(f"Running command: {' '.join(cmd)}") result = subprocess.run(cmd) return result.returncode diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 0d30b1b3059b..6661f495f848 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -597,70 +597,71 @@ }, "wan2_1_i2v_14b_480P_2gpu": { "stages_ms": { - "InputValidationStage": 33.57, - "TextEncodingStage": 2424.73, + "InputValidationStage": 38.23, + "TextEncodingStage": 3550.36, "ImageEncodingStage": 3462.55, "ConditioningStage": 0.01, - "TimestepPreparationStage": 2.69, + "TimestepPreparationStage": 2.6, "LatentPreparationStage": 9.73, "ImageVAEEncodingStage": 2290.98, - "DenoisingStage": 414428.85, - "DecodingStage": 3016.1 + "DenoisingStage": 415021.17, + "DecodingStage": 3016.1, + "per_frame_generation": null }, "denoise_step_ms": { - "0": 9304.67, - "1": 8218.78, - "2": 8269.27, - "3": 8291.59, - "4": 8308.29, - "5": 8300.75, - "6": 8302.76, - "7": 8297.95, - "8": 8295.26, - "9": 8296.45, - "10": 8287.48, - "11": 8275.98, - "12": 8281.9, - "13": 8283.39, - "14": 8264.96, - "15": 8275.66, - "16": 8271.89, - "17": 8273.77, - "18": 8279.34, - "19": 8271.89, - "20": 8265.83, - "21": 8259.99, - "22": 8260.36, - "23": 8270.06, - "24": 8271.58, - "25": 8272.39, - "26": 8267.87, - "27": 8277.09, - "28": 8264.49, - "29": 8266.14, - "30": 8263.67, - "31": 8273.82, - "32": 8260.5, - "33": 8268.44, - "34": 8253.2, - "35": 8244.32, - "36": 8258.15, - "37": 8256.65, - "38": 8255.48, - "39": 8260.09, - "40": 8250.99, - "41": 8253.52, - "42": 8247.39, - "43": 8252.7, - "44": 8243.67, - "45": 8251.94, - "46": 8258.73, - "47": 8240.57, - "48": 8249.64, - "49": 8248.14 + "0": 10200.25, + "1": 8222.39, + "2": 8279.38, + "3": 8301.48, + "4": 8338.87, + "5": 8352.39, + "6": 8354.64, + "7": 8353.64, + "8": 8315.58, + "9": 8308.48, + "10": 8299.65, + "11": 8292.7, + "12": 8292.73, + "13": 8285.21, + "14": 8276.06, + "15": 8270.41, + "16": 8273.04, + "17": 8266.04, + "18": 8267.7, + "19": 8264.06, + "20": 8259.32, + "21": 8257.26, + "22": 8253.02, + "23": 8251.77, + "24": 8260.97, + "25": 8251.39, + "26": 8237.43, + "27": 8241.33, + "28": 8235.96, + "29": 8240.6, + "30": 8232.48, + "31": 8237.85, + "32": 8244.3, + "33": 8236.79, + "34": 8239.83, + "35": 8239.89, + "36": 8239.12, + "37": 8246.74, + "38": 8235.67, + "39": 8242.77, + "40": 8241.17, + "41": 8240.24, + "42": 8237.01, + "43": 8231.26, + "44": 8232.85, + "45": 8226.56, + "46": 8236.98, + "47": 8226.73, + "48": 8220.49, + "49": 8217.04 }, - "expected_e2e_ms": 425569.98, - "expected_avg_denoise_ms": 8288.39, + "expected_e2e_ms": 426697.37, + "expected_avg_denoise_ms": 8300.19, "expected_median_denoise_ms": 8267.01 }, "wan2_1_i2v_14b_720P_2gpu": {