From 0593cedf8728bc9bd1bf6d08b313956d60ab040b Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Mon, 23 Feb 2026 21:13:00 -0500 Subject: [PATCH 01/10] add --- .../test/server/cache_dit_config_1gpu.yaml | 10 ++++++++++ .../test/server/cache_dit_config_2gpu.yaml | 15 +++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml create mode 100644 python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml b/python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml new file mode 100644 index 000000000000..371a9e6dfab3 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml @@ -0,0 +1,10 @@ +cache_config: + max_warmup_steps: 8 + warmup_interval: 2 + max_cached_steps: -1 + max_continuous_cached_steps: 2 + Fn_compute_blocks: 1 + Bn_compute_blocks: 0 + residual_diff_threshold: 0.12 + enable_taylorseer: true + taylorseer_order: 1 diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml b/python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml new file mode 100644 index 000000000000..590137703fb8 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml @@ -0,0 +1,15 @@ +cache_config: + max_warmup_steps: 8 + warmup_interval: 2 + max_cached_steps: -1 + max_continuous_cached_steps: 2 + Fn_compute_blocks: 1 + Bn_compute_blocks: 0 + residual_diff_threshold: 0.12 + enable_taylorseer: true + taylorseer_order: 1 +parallelism_config: + ulysses_size: 2 + parallel_kwargs: + attention_backend: native + extra_parallel_modules: ["text_encoder", "vae"] From 5c45e52ac548bd50644c08c96b0e2cc670e4efdb Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Mon, 23 Feb 2026 21:48:49 -0500 Subject: [PATCH 02/10] fix --- python/sglang/multimodal_gen/test/server/test_server_utils.py | 2 ++ python/sglang/multimodal_gen/test/server/testcase_configs.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/python/sglang/multimodal_gen/test/server/test_server_utils.py b/python/sglang/multimodal_gen/test/server/test_server_utils.py index aedaaf1e6dbd..915da2a5cbfb 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_utils.py +++ b/python/sglang/multimodal_gen/test/server/test_server_utils.py @@ -943,6 +943,8 @@ def generate_image(case_id, client) -> tuple[str, bytes]: # Build extra_body for optional features extra_body = dict(sampling_params.extras) + if sampling_params.diffusers_kwargs: + extra_body["diffusers_kwargs"] = sampling_params.diffusers_kwargs response = client.images.with_raw_response.generate( model=model_path, diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index a140b3003e84..48be5e912771 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -247,6 +247,8 @@ class DiffusionSamplingParams: # merged directly into the OpenAI extra_body dict. extras: dict = field(default_factory=dict) + diffusers_kwargs: dict | None = None + @dataclass(frozen=True) class DiffusionTestCase: From 10ffede7a24e18b19f9392ec7bceb324fdf0f614 Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Mon, 23 Feb 2026 22:30:55 -0500 Subject: [PATCH 03/10] add --- python/sglang/multimodal_gen/test/server/test_server_utils.py | 1 - python/sglang/multimodal_gen/test/server/testcase_configs.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sglang/multimodal_gen/test/server/test_server_utils.py b/python/sglang/multimodal_gen/test/server/test_server_utils.py index 915da2a5cbfb..b5fcf59ba3c5 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_utils.py +++ b/python/sglang/multimodal_gen/test/server/test_server_utils.py @@ -945,7 +945,6 @@ def generate_image(case_id, client) -> tuple[str, bytes]: extra_body = dict(sampling_params.extras) if sampling_params.diffusers_kwargs: extra_body["diffusers_kwargs"] = sampling_params.diffusers_kwargs - response = client.images.with_raw_response.generate( model=model_path, prompt=sampling_params.prompt, diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 48be5e912771..c50a78cf3ef4 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -249,6 +249,8 @@ class DiffusionSamplingParams: diffusers_kwargs: dict | None = None + diffusers_kwargs: dict | None = None + @dataclass(frozen=True) class DiffusionTestCase: From 2ff693b022ff82bc483e5350c8b657468de8c58b Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Tue, 24 Feb 2026 03:45:20 +0000 Subject: [PATCH 04/10] fix --- .../test/server/perf_baselines.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 08685b75c6c8..0aafeed61f85 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -2042,6 +2042,24 @@ "expected_median_denoise_ms": 268.51, "estimated_full_test_time_s": 122.7 }, + "qwen_image_t2i_cache_dit_config_diffusers_1gpu": { + "stages_ms": { + "DiffusersExecutionStage": 5070.86 + }, + "denoise_step_ms": {}, + "expected_e2e_ms": 5079.29, + "expected_avg_denoise_ms": 0.0, + "expected_median_denoise_ms": 0.0 + }, + "qwen_image_t2i_cache_dit_config_diffusers_2gpu": { + "stages_ms": { + "DiffusersExecutionStage": 6981.05 + }, + "denoise_step_ms": {}, + "expected_e2e_ms": 6983.64, + "expected_avg_denoise_ms": 0.0, + "expected_median_denoise_ms": 0.0 + }, "hunyuan3d_shape_gen": { "stages_ms": { "Hunyuan3DShapeBeforeDenoisingStage": 544.59, From 018ba0bb7f3800de2259d68ae3ad6a6a7b5cc97e Mon Sep 17 00:00:00 2001 From: Chi Date: Wed, 25 Feb 2026 00:39:27 -0500 Subject: [PATCH 05/10] scm --- .../test/server/cache_dit_scm_config.yaml | 13 +++++++++++++ .../multimodal_gen/test/server/perf_baselines.json | 9 +++++++++ 2 files changed, 22 insertions(+) create mode 100644 python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml b/python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml new file mode 100644 index 000000000000..d5252d78c993 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml @@ -0,0 +1,13 @@ +cache_config: + max_warmup_steps: 8 + warmup_interval: 2 + max_cached_steps: -1 + max_continuous_cached_steps: 2 + Fn_compute_blocks: 1 + Bn_compute_blocks: 0 + residual_diff_threshold: 0.12 + enable_taylorseer: true + taylorseer_order: 1 + num_inference_steps: 50 + steps_computation_mask: "medium" + steps_computation_policy: dynamic diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 0aafeed61f85..3529cbca1fc6 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -2051,6 +2051,15 @@ "expected_avg_denoise_ms": 0.0, "expected_median_denoise_ms": 0.0 }, + "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu": { + "stages_ms": { + "DiffusersExecutionStage": 5070.86 + }, + "denoise_step_ms": {}, + "expected_e2e_ms": 5079.29, + "expected_avg_denoise_ms": 0.0, + "expected_median_denoise_ms": 0.0 + }, "qwen_image_t2i_cache_dit_config_diffusers_2gpu": { "stages_ms": { "DiffusersExecutionStage": 6981.05 From 57bbc8bb3ab553291e7c9b4edc3fcf171614b287 Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Wed, 25 Feb 2026 06:14:22 +0000 Subject: [PATCH 06/10] fix baseline --- python/sglang/multimodal_gen/test/server/perf_baselines.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 3529cbca1fc6..2c670ed564ac 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -2053,10 +2053,10 @@ }, "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu": { "stages_ms": { - "DiffusersExecutionStage": 5070.86 + "DiffusersExecutionStage": 6160.17 }, "denoise_step_ms": {}, - "expected_e2e_ms": 5079.29, + "expected_e2e_ms": 6170.56, "expected_avg_denoise_ms": 0.0, "expected_median_denoise_ms": 0.0 }, From 6bad7e2d151d8c9bb03374e7f20b2240cc0fba95 Mon Sep 17 00:00:00 2001 From: Chi Date: Tue, 10 Mar 2026 01:08:42 -0400 Subject: [PATCH 07/10] configs --- .../test/server/{ => configs}/cache_dit_config_1gpu.yaml | 0 .../test/server/{ => configs}/cache_dit_config_2gpu.yaml | 0 .../test/server/{ => configs}/cache_dit_scm_config.yaml | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename python/sglang/multimodal_gen/test/server/{ => configs}/cache_dit_config_1gpu.yaml (100%) rename python/sglang/multimodal_gen/test/server/{ => configs}/cache_dit_config_2gpu.yaml (100%) rename python/sglang/multimodal_gen/test/server/{ => configs}/cache_dit_scm_config.yaml (100%) diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml similarity index 100% rename from python/sglang/multimodal_gen/test/server/cache_dit_config_1gpu.yaml rename to python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml similarity index 100% rename from python/sglang/multimodal_gen/test/server/cache_dit_config_2gpu.yaml rename to python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml diff --git a/python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml similarity index 100% rename from python/sglang/multimodal_gen/test/server/cache_dit_scm_config.yaml rename to python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml From 0c272f8fc01510fa68139ae7746d7bbc8d449cb6 Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Sat, 9 May 2026 05:21:21 +0000 Subject: [PATCH 08/10] add --- .../server/configs/cache_dit_config_2gpu.yaml | 15 ---- .../multimodal_gen/test/server/gpu_cases.py | 43 +++++++++ .../test/server/perf_baselines.json | 90 +++++++++++++++---- .../test/server/testcase_configs.py | 2 - 4 files changed, 118 insertions(+), 32 deletions(-) delete mode 100644 python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml diff --git a/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml deleted file mode 100644 index 590137703fb8..000000000000 --- a/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_2gpu.yaml +++ /dev/null @@ -1,15 +0,0 @@ -cache_config: - max_warmup_steps: 8 - warmup_interval: 2 - max_cached_steps: -1 - max_continuous_cached_steps: 2 - Fn_compute_blocks: 1 - Bn_compute_blocks: 0 - residual_diff_threshold: 0.12 - enable_taylorseer: true - taylorseer_order: 1 -parallelism_config: - ulysses_size: 2 - parallel_kwargs: - attention_backend: native - extra_parallel_modules: ["text_encoder", "vae"] diff --git a/python/sglang/multimodal_gen/test/server/gpu_cases.py b/python/sglang/multimodal_gen/test/server/gpu_cases.py index 9407625a5ee9..9a940a110d24 100644 --- a/python/sglang/multimodal_gen/test/server/gpu_cases.py +++ b/python/sglang/multimodal_gen/test/server/gpu_cases.py @@ -1,3 +1,5 @@ +from pathlib import Path + from sglang.multimodal_gen.runtime.platforms import current_platform from sglang.multimodal_gen.test.server.testcase_configs import ( MODELOPT_FLUX1_FP8_TRANSFORMER, @@ -68,6 +70,36 @@ ), T2I_sampling_params, ), + DiffusionTestCase( + "qwen_image_t2i_cache_dit_config_diffusers_1gpu", + DiffusionServerArgs( + model_path=DEFAULT_QWEN_IMAGE_MODEL_NAME_FOR_TEST, + extras=[ + "--backend", + "diffusers", + "--cache-dit-config", + str(Path(__file__).parent / "configs" / "cache_dit_config_1gpu.yaml"), + ], + ), + T2I_sampling_params, + run_consistency_check=False, + run_component_accuracy_check=False, + ), + DiffusionTestCase( + "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu", + DiffusionServerArgs( + model_path=DEFAULT_QWEN_IMAGE_MODEL_NAME_FOR_TEST, + extras=[ + "--backend", + "diffusers", + "--cache-dit-config", + str(Path(__file__).parent / "configs" / "cache_dit_scm_config.yaml"), + ], + ), + T2I_sampling_params, + run_consistency_check=False, + run_component_accuracy_check=False, + ), DiffusionTestCase( "flux_image_t2i", DiffusionServerArgs(model_path=DEFAULT_FLUX_1_DEV_MODEL_NAME_FOR_TEST), @@ -521,6 +553,17 @@ ), T2V_sampling_params, ), + DiffusionTestCase( + "wan2_1_t2v_1_3b_cache_dit_sp_only_2gpu", + DiffusionServerArgs( + model_path=DEFAULT_WAN_2_1_T2V_1_3B_MODEL_NAME_FOR_TEST, + ulysses_degree=2, + enable_cache_dit=True, + ), + T2V_sampling_params, + run_consistency_check=False, + run_component_accuracy_check=False, + ), DiffusionTestCase( "fsdp-inference", DiffusionServerArgs( diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 2c670ed564ac..48d7401ecb88 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -1073,6 +1073,73 @@ "expected_median_denoise_ms": 100.05, "estimated_full_test_time_s": 127.6 }, + "wan2_1_t2v_1_3b_cache_dit_sp_only_2gpu": { + "stages_ms": { + "InputValidationStage": 0.08, + "TextEncodingStage": 853.63, + "LatentPreparationStage": 0.09, + "TimestepPreparationStage": 1.51, + "DenoisingStage": 2102.64, + "DecodingStage": 438.71, + "per_frame_generation": null + }, + "denoise_step_ms": { + "0": 107.59, + "1": 103.11, + "2": 98.13, + "3": 94.12, + "4": 45.26, + "5": 8.66, + "6": 8.87, + "7": 105.03, + "8": 17.53, + "9": 9.26, + "10": 15.2, + "11": 93.17, + "12": 27.27, + "13": 7.78, + "14": 8.8, + "15": 96.54, + "16": 24.72, + "17": 9.27, + "18": 8.53, + "19": 106.58, + "20": 13.47, + "21": 10.13, + "22": 10.23, + "23": 96.49, + "24": 21.93, + "25": 8.73, + "26": 9.72, + "27": 103.29, + "28": 14.4, + "29": 9.07, + "30": 8.71, + "31": 105.88, + "32": 16.93, + "33": 9.15, + "34": 9.83, + "35": 100.03, + "36": 21.5, + "37": 9.41, + "38": 9.18, + "39": 105.14, + "40": 12.36, + "41": 10.1, + "42": 8.29, + "43": 102.88, + "44": 18.36, + "45": 7.67, + "46": 10.27, + "47": 103.19, + "48": 25.53, + "49": 106.23 + }, + "expected_e2e_ms": 3402.6, + "expected_avg_denoise_ms": 41.87, + "expected_median_denoise_ms": 16.07, + "estimated_full_test_time_s": 94.0 + }, "turbo_wan2_1_t2v_1.3b": { "stages_ms": { "InputValidationStage": 0.06, @@ -2044,30 +2111,23 @@ }, "qwen_image_t2i_cache_dit_config_diffusers_1gpu": { "stages_ms": { - "DiffusersExecutionStage": 5070.86 + "DiffusersExecutionStage": 64520.18 }, "denoise_step_ms": {}, - "expected_e2e_ms": 5079.29, + "expected_e2e_ms": 64523.93, "expected_avg_denoise_ms": 0.0, - "expected_median_denoise_ms": 0.0 + "expected_median_denoise_ms": 0.0, + "estimated_full_test_time_s": 142.2 }, "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu": { "stages_ms": { - "DiffusersExecutionStage": 6160.17 + "DiffusersExecutionStage": 61106.66 }, "denoise_step_ms": {}, - "expected_e2e_ms": 6170.56, + "expected_e2e_ms": 61110.0, "expected_avg_denoise_ms": 0.0, - "expected_median_denoise_ms": 0.0 - }, - "qwen_image_t2i_cache_dit_config_diffusers_2gpu": { - "stages_ms": { - "DiffusersExecutionStage": 6981.05 - }, - "denoise_step_ms": {}, - "expected_e2e_ms": 6983.64, - "expected_avg_denoise_ms": 0.0, - "expected_median_denoise_ms": 0.0 + "expected_median_denoise_ms": 0.0, + "estimated_full_test_time_s": 135.3 }, "hunyuan3d_shape_gen": { "stages_ms": { diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index c50a78cf3ef4..48be5e912771 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -249,8 +249,6 @@ class DiffusionSamplingParams: diffusers_kwargs: dict | None = None - diffusers_kwargs: dict | None = None - @dataclass(frozen=True) class DiffusionTestCase: From 1819297e8c4bdc26ad01e177e73fb6d72d39cc79 Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Sat, 9 May 2026 15:32:43 +0000 Subject: [PATCH 09/10] fix --- .../server/configs/cache_dit_config_1gpu.yaml | 10 --- .../server/configs/cache_dit_scm_config.yaml | 4 +- .../multimodal_gen/test/server/gpu_cases.py | 44 ++++++---- .../test/server/perf_baselines.json | 87 ------------------- .../test/server/test_server_utils.py | 3 +- .../test/server/testcase_configs.py | 2 - 6 files changed, 29 insertions(+), 121 deletions(-) delete mode 100644 python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml diff --git a/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml deleted file mode 100644 index 371a9e6dfab3..000000000000 --- a/python/sglang/multimodal_gen/test/server/configs/cache_dit_config_1gpu.yaml +++ /dev/null @@ -1,10 +0,0 @@ -cache_config: - max_warmup_steps: 8 - warmup_interval: 2 - max_cached_steps: -1 - max_continuous_cached_steps: 2 - Fn_compute_blocks: 1 - Bn_compute_blocks: 0 - residual_diff_threshold: 0.12 - enable_taylorseer: true - taylorseer_order: 1 diff --git a/python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml b/python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml index d5252d78c993..3f23615393ac 100644 --- a/python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml +++ b/python/sglang/multimodal_gen/test/server/configs/cache_dit_scm_config.yaml @@ -1,5 +1,5 @@ cache_config: - max_warmup_steps: 8 + max_warmup_steps: 2 warmup_interval: 2 max_cached_steps: -1 max_continuous_cached_steps: 2 @@ -8,6 +8,6 @@ cache_config: residual_diff_threshold: 0.12 enable_taylorseer: true taylorseer_order: 1 - num_inference_steps: 50 + num_inference_steps: 8 steps_computation_mask: "medium" steps_computation_policy: dynamic diff --git a/python/sglang/multimodal_gen/test/server/gpu_cases.py b/python/sglang/multimodal_gen/test/server/gpu_cases.py index 9a940a110d24..c3e37cffde3a 100644 --- a/python/sglang/multimodal_gen/test/server/gpu_cases.py +++ b/python/sglang/multimodal_gen/test/server/gpu_cases.py @@ -1,3 +1,4 @@ +from dataclasses import replace from pathlib import Path from sglang.multimodal_gen.runtime.platforms import current_platform @@ -51,6 +52,21 @@ DEFAULT_WAN_2_2_TI2V_5B_MODEL_NAME_FOR_TEST, ) +_CACHE_DIT_CONFIG_DIR = Path(__file__).parent / "configs" + +CACHE_DIT_T2I_SMOKE_sampling_params = replace( + T2I_sampling_params, + output_size="512x512", + extras={"num_inference_steps": 8, "seed": 0}, +) + +CACHE_DIT_T2V_SP_SMOKE_sampling_params = replace( + T2V_sampling_params, + output_size="832x480", + num_frames=5, + extras={"num_inference_steps": 8, "seed": 0}, +) + # All test cases with clean default values # To test different models, simply add more DiffusionCase entries ONE_GPU_CASES: list[DiffusionTestCase] = [ @@ -70,21 +86,6 @@ ), T2I_sampling_params, ), - DiffusionTestCase( - "qwen_image_t2i_cache_dit_config_diffusers_1gpu", - DiffusionServerArgs( - model_path=DEFAULT_QWEN_IMAGE_MODEL_NAME_FOR_TEST, - extras=[ - "--backend", - "diffusers", - "--cache-dit-config", - str(Path(__file__).parent / "configs" / "cache_dit_config_1gpu.yaml"), - ], - ), - T2I_sampling_params, - run_consistency_check=False, - run_component_accuracy_check=False, - ), DiffusionTestCase( "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu", DiffusionServerArgs( @@ -93,12 +94,15 @@ "--backend", "diffusers", "--cache-dit-config", - str(Path(__file__).parent / "configs" / "cache_dit_scm_config.yaml"), + str(_CACHE_DIT_CONFIG_DIR / "cache_dit_scm_config.yaml"), ], ), - T2I_sampling_params, + CACHE_DIT_T2I_SMOKE_sampling_params, + run_perf_check=False, run_consistency_check=False, run_component_accuracy_check=False, + run_models_api_check=False, + run_t2v_input_reference_check=False, ), DiffusionTestCase( "flux_image_t2i", @@ -559,10 +563,14 @@ model_path=DEFAULT_WAN_2_1_T2V_1_3B_MODEL_NAME_FOR_TEST, ulysses_degree=2, enable_cache_dit=True, + env_vars={"SGLANG_CACHE_DIT_WARMUP": "2"}, ), - T2V_sampling_params, + CACHE_DIT_T2V_SP_SMOKE_sampling_params, + run_perf_check=False, run_consistency_check=False, run_component_accuracy_check=False, + run_models_api_check=False, + run_t2v_input_reference_check=False, ), DiffusionTestCase( "fsdp-inference", diff --git a/python/sglang/multimodal_gen/test/server/perf_baselines.json b/python/sglang/multimodal_gen/test/server/perf_baselines.json index 48d7401ecb88..08685b75c6c8 100644 --- a/python/sglang/multimodal_gen/test/server/perf_baselines.json +++ b/python/sglang/multimodal_gen/test/server/perf_baselines.json @@ -1073,73 +1073,6 @@ "expected_median_denoise_ms": 100.05, "estimated_full_test_time_s": 127.6 }, - "wan2_1_t2v_1_3b_cache_dit_sp_only_2gpu": { - "stages_ms": { - "InputValidationStage": 0.08, - "TextEncodingStage": 853.63, - "LatentPreparationStage": 0.09, - "TimestepPreparationStage": 1.51, - "DenoisingStage": 2102.64, - "DecodingStage": 438.71, - "per_frame_generation": null - }, - "denoise_step_ms": { - "0": 107.59, - "1": 103.11, - "2": 98.13, - "3": 94.12, - "4": 45.26, - "5": 8.66, - "6": 8.87, - "7": 105.03, - "8": 17.53, - "9": 9.26, - "10": 15.2, - "11": 93.17, - "12": 27.27, - "13": 7.78, - "14": 8.8, - "15": 96.54, - "16": 24.72, - "17": 9.27, - "18": 8.53, - "19": 106.58, - "20": 13.47, - "21": 10.13, - "22": 10.23, - "23": 96.49, - "24": 21.93, - "25": 8.73, - "26": 9.72, - "27": 103.29, - "28": 14.4, - "29": 9.07, - "30": 8.71, - "31": 105.88, - "32": 16.93, - "33": 9.15, - "34": 9.83, - "35": 100.03, - "36": 21.5, - "37": 9.41, - "38": 9.18, - "39": 105.14, - "40": 12.36, - "41": 10.1, - "42": 8.29, - "43": 102.88, - "44": 18.36, - "45": 7.67, - "46": 10.27, - "47": 103.19, - "48": 25.53, - "49": 106.23 - }, - "expected_e2e_ms": 3402.6, - "expected_avg_denoise_ms": 41.87, - "expected_median_denoise_ms": 16.07, - "estimated_full_test_time_s": 94.0 - }, "turbo_wan2_1_t2v_1.3b": { "stages_ms": { "InputValidationStage": 0.06, @@ -2109,26 +2042,6 @@ "expected_median_denoise_ms": 268.51, "estimated_full_test_time_s": 122.7 }, - "qwen_image_t2i_cache_dit_config_diffusers_1gpu": { - "stages_ms": { - "DiffusersExecutionStage": 64520.18 - }, - "denoise_step_ms": {}, - "expected_e2e_ms": 64523.93, - "expected_avg_denoise_ms": 0.0, - "expected_median_denoise_ms": 0.0, - "estimated_full_test_time_s": 142.2 - }, - "qwen_image_t2i_cache_dit_scm_config_diffusers_1gpu": { - "stages_ms": { - "DiffusersExecutionStage": 61106.66 - }, - "denoise_step_ms": {}, - "expected_e2e_ms": 61110.0, - "expected_avg_denoise_ms": 0.0, - "expected_median_denoise_ms": 0.0, - "estimated_full_test_time_s": 135.3 - }, "hunyuan3d_shape_gen": { "stages_ms": { "Hunyuan3DShapeBeforeDenoisingStage": 544.59, diff --git a/python/sglang/multimodal_gen/test/server/test_server_utils.py b/python/sglang/multimodal_gen/test/server/test_server_utils.py index b5fcf59ba3c5..aedaaf1e6dbd 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_utils.py +++ b/python/sglang/multimodal_gen/test/server/test_server_utils.py @@ -943,8 +943,7 @@ def generate_image(case_id, client) -> tuple[str, bytes]: # Build extra_body for optional features extra_body = dict(sampling_params.extras) - if sampling_params.diffusers_kwargs: - extra_body["diffusers_kwargs"] = sampling_params.diffusers_kwargs + response = client.images.with_raw_response.generate( model=model_path, prompt=sampling_params.prompt, diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 48be5e912771..a140b3003e84 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -247,8 +247,6 @@ class DiffusionSamplingParams: # merged directly into the OpenAI extra_body dict. extras: dict = field(default_factory=dict) - diffusers_kwargs: dict | None = None - @dataclass(frozen=True) class DiffusionTestCase: From 17b1be2d7e3ed884b48ff1857194ec3bd3666b75 Mon Sep 17 00:00:00 2001 From: Chi McIsaac Date: Sat, 9 May 2026 16:37:36 +0000 Subject: [PATCH 10/10] fix --- .../multimodal_gen/test/server/gpu_cases.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/python/sglang/multimodal_gen/test/server/gpu_cases.py b/python/sglang/multimodal_gen/test/server/gpu_cases.py index c3e37cffde3a..2f5c000c3207 100644 --- a/python/sglang/multimodal_gen/test/server/gpu_cases.py +++ b/python/sglang/multimodal_gen/test/server/gpu_cases.py @@ -54,19 +54,6 @@ _CACHE_DIT_CONFIG_DIR = Path(__file__).parent / "configs" -CACHE_DIT_T2I_SMOKE_sampling_params = replace( - T2I_sampling_params, - output_size="512x512", - extras={"num_inference_steps": 8, "seed": 0}, -) - -CACHE_DIT_T2V_SP_SMOKE_sampling_params = replace( - T2V_sampling_params, - output_size="832x480", - num_frames=5, - extras={"num_inference_steps": 8, "seed": 0}, -) - # All test cases with clean default values # To test different models, simply add more DiffusionCase entries ONE_GPU_CASES: list[DiffusionTestCase] = [ @@ -97,7 +84,11 @@ str(_CACHE_DIT_CONFIG_DIR / "cache_dit_scm_config.yaml"), ], ), - CACHE_DIT_T2I_SMOKE_sampling_params, + replace( + T2I_sampling_params, + output_size="512x512", + extras={"num_inference_steps": 8, "seed": 0}, + ), run_perf_check=False, run_consistency_check=False, run_component_accuracy_check=False, @@ -565,7 +556,12 @@ enable_cache_dit=True, env_vars={"SGLANG_CACHE_DIT_WARMUP": "2"}, ), - CACHE_DIT_T2V_SP_SMOKE_sampling_params, + replace( + T2V_sampling_params, + output_size="832x480", + num_frames=5, + extras={"num_inference_steps": 8, "seed": 0}, + ), run_perf_check=False, run_consistency_check=False, run_component_accuracy_check=False,