diff --git a/python/sglang/multimodal_gen/configs/sample/sampling_params.py b/python/sglang/multimodal_gen/configs/sample/sampling_params.py index bd8256de5e07..0eb36b0166c3 100644 --- a/python/sglang/multimodal_gen/configs/sample/sampling_params.py +++ b/python/sglang/multimodal_gen/configs/sample/sampling_params.py @@ -156,6 +156,9 @@ class SamplingParams: # TeaCache parameters enable_teacache: bool = False + teacache_params: Any = ( + None # TeaCacheParams or WanTeaCacheParams, set by model-specific subclass + ) # Profiling profile: bool = False diff --git a/python/sglang/multimodal_gen/configs/sample/wan.py b/python/sglang/multimodal_gen/configs/sample/wan.py index 2c405b2f050b..a5faf50214f0 100644 --- a/python/sglang/multimodal_gen/configs/sample/wan.py +++ b/python/sglang/multimodal_gen/configs/sample/wan.py @@ -212,6 +212,11 @@ class Wan2_2_Base_SamplingParams(SamplingParams): "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走" ) + # TODO(Wan2.2): TeaCache coefficients need to be calibrated for Wan2.2 by + # profiling L1 distances across timesteps. Until then, teacache_params is None + # and enable_teacache will be accepted but silently no-op. + # Consider using Cache-DiT (SGLANG_CACHE_DIT_ENABLED=1) as an alternative. + @dataclass class Wan2_2_TI2V_5B_SamplingParam(Wan2_2_Base_SamplingParams): diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index fe9dac4e82a3..f213b9f2a4e4 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -768,6 +768,23 @@ def from_req_perf_record( prompt=T2V_PROMPT, ), ), + # TeaCache smoke test for Wan2.2 T2V A14B — verifies enable_teacache=True + # doesn't crash. Perf check disabled because Wan2.2-specific TeaCache + # coefficients are not yet calibrated (teacache_params=None, so no speedup). + DiffusionTestCase( + "wan2_2_t2v_a14b_teacache_2gpu", + DiffusionServerArgs( + model_path=DEFAULT_WAN_2_2_T2V_A14B_MODEL_NAME_FOR_TEST, + modality="video", + custom_validator="video", + num_gpus=2, + ), + DiffusionSamplingParams( + prompt=T2V_PROMPT, + extras={"enable_teacache": True}, + ), + run_perf_check=False, + ), # LoRA test case for transformer_2 support DiffusionTestCase( "wan2_2_t2v_a14b_lora_2gpu",