diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 2158b5b6a567..7fd88fd678b8 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -5,9 +5,9 @@ "description": "Reference numbers captured from the CI diffusion server baseline run" }, "tolerances": { - "e2e": 0.05, + "e2e": 0.1, "denoise_stage": 0.05, - "non_denoise_stage": 0.3, + "non_denoise_stage": 0.4, "denoise_step": 0.2, "denoise_agg": 0.08 }, @@ -41,7 +41,7 @@ "TimestepPreparationStage": 10.6, "LatentPreparationStage": 11.8, "DenoisingStage": 21202.6, - "DecodingStage": 512.6 + "DecodingStage": 751.1 }, "denoise_step_ms": { "0": 1077.77, @@ -297,7 +297,6 @@ "expected_avg_denoise_ms": 1820.02, "expected_median_denoise_ms": 1798.65 }, - "wan2_2_ti2v_5b": { "stages_ms": { "InputValidationStage": 96.27, @@ -387,26 +386,25 @@ }, "fast_hunyuan_video": { "stages_ms": { - "InputValidationStage": 0.07, - "TextEncodingStage": 598.66, - "ConditioningStage": 0.01, - "TimestepPreparationStage": 57.26, - "LatentPreparationStage": 15.38, - "DenoisingStage": 3367.01, - "DecodingStage": 2457.57, - "per_frame_generation": null + "InputValidationStage": 0.09, + "TextEncodingStage": 845.64, + "ConditioningStage": 0.04, + "TimestepPreparationStage": 125.22, + "LatentPreparationStage": 29.34, + "DenoisingStage": 3860.64, + "DecodingStage": 2580.55 }, "denoise_step_ms": { - "0": 1577.86, - "1": 161.04, - "2": 406.81, - "3": 408.67, - "4": 403.55, - "5": 405.31 + "0": 2063.08, + "1": 164.02, + "2": 406.99, + "3": 407.95, + "4": 407.51, + "5": 404.2 }, - "expected_e2e_ms": 6521.03, - "expected_avg_denoise_ms": 560.54, - "expected_median_denoise_ms": 406.06 + "expected_e2e_ms": 7487.87, + "expected_avg_denoise_ms": 642.29, + "expected_median_denoise_ms": 407.25 }, "wan2_2_i2v_a14b_2gpu": { "stages_ms": { @@ -415,7 +413,7 @@ "ConditioningStage": 0.02, "TimestepPreparationStage": 2.2, "LatentPreparationStage": 8.93, - "ImageVAEEncodingStage": 2075.47, + "ImageVAEEncodingStage": 2075.47, "DenoisingStage": 382628.41, "DecodingStage": 2820.89 }, diff --git a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py b/python/sglang/multimodal_gen/test/server/test_server_perf_b.py index f99b410bf14c..0faa8fc647eb 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_perf_b.py +++ b/python/sglang/multimodal_gen/test/server/test_server_perf_b.py @@ -15,7 +15,7 @@ diffusion_server, ) from sglang.multimodal_gen.test.server.testcase_configs import ( - ONE_GPU_CASES_A, + ONE_GPU_CASES_B, DiffusionTestCase, ) @@ -25,7 +25,7 @@ class TestDiffusionPerformanceOneGpu(DiffusionPerformanceBase): """Performance tests for 1-GPU diffusion cases.""" - @pytest.fixture(params=ONE_GPU_CASES_A, ids=lambda c: c.id) + @pytest.fixture(params=ONE_GPU_CASES_B, ids=lambda c: c.id) def case(self, request) -> DiffusionTestCase: """Provide a DiffusionTestCase for each 1-GPU test.""" return request.param