diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 75208a56e6d6..250ef6c9a6c2 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -214,6 +214,24 @@ def from_req_perf_record( ) +T2I_sampling_params = DiffusionSamplingParams( + prompt="Doraemon is eating dorayaki", + output_size="1024x1024", +) + +TI2I_sampling_params = DiffusionSamplingParams( + prompt="Convert 2D style to 3D style", + image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", +) + +T2V_PROMPT = "A curious raccoon" + +TI2V_sampling_params = DiffusionSamplingParams( + output_size="832x1104", + prompt="The man in the picture slowly turns his head, his expression enigmatic and otherworldly. The camera performs a slow, cinematic dolly out, focusing on his face. Moody lighting, neon signs glowing in the background, shallow depth of field.", + image_path="https://is1-ssl.mzstatic.com/image/thumb/Music114/v4/5f/fa/56/5ffa56c2-ea1f-7a17-6bad-192ff9b6476d/825646124206.jpg/600x600bb.jpg", +) + # All test cases with clean default values # To test different models, simply add more DiffusionCase entries ONE_GPU_CASES_A: list[DiffusionTestCase] = [ @@ -226,10 +244,7 @@ def from_req_perf_record( warmup_text=1, warmup_edit=0, ), - DiffusionSamplingParams( - prompt="A futuristic cityscape at sunset with flying cars", - output_size="1024x1024", - ), + T2I_sampling_params, ), DiffusionTestCase( "flux_image_t2i", @@ -239,10 +254,7 @@ def from_req_perf_record( warmup_text=1, warmup_edit=0, ), - DiffusionSamplingParams( - prompt="A futuristic cityscape at sunset with flying cars", - output_size="1024x1024", - ), + T2I_sampling_params, ), DiffusionTestCase( "flux_2_image_t2i", @@ -252,10 +264,7 @@ def from_req_perf_record( warmup_text=1, warmup_edit=0, ), - DiffusionSamplingParams( - prompt="A futuristic cityscape at sunset with flying cars", - output_size="1024x1024", - ), + T2I_sampling_params, ), DiffusionTestCase( "zimage_image_t2i", @@ -265,10 +274,7 @@ def from_req_perf_record( warmup_text=1, warmup_edit=0, ), - DiffusionSamplingParams( - prompt="Doraemon is eating dorayaki.", - output_size="1024x1024", - ), + T2I_sampling_params, ), # === Text and Image to Image (TI2I) === DiffusionTestCase( @@ -279,10 +285,7 @@ def from_req_perf_record( warmup_text=0, warmup_edit=1, ), - DiffusionSamplingParams( - prompt="Convert 2D style to 3D style", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - ), + TI2I_sampling_params, ), ] @@ -298,7 +301,7 @@ def from_req_perf_record( custom_validator="video", ), DiffusionSamplingParams( - prompt="A curious raccoon", + prompt=T2V_PROMPT, output_size="848x480", ), ), @@ -339,10 +342,7 @@ def from_req_perf_record( warmup_text=0, warmup_edit=1, ), - DiffusionSamplingParams( - prompt="Convert 2D style to 3D style", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - ), + TI2I_sampling_params, ), DiffusionTestCase( "fast_hunyuan_video", @@ -354,7 +354,7 @@ def from_req_perf_record( custom_validator="video", ), DiffusionSamplingParams( - prompt="A curious raccoon", + prompt=T2V_PROMPT, output_size="720x480", ), ), @@ -368,11 +368,7 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), - DiffusionSamplingParams( - output_size="832x1104", - prompt="Add dynamic motion to the scene", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - ), + TI2V_sampling_params, ), DiffusionTestCase( "fastwan2_2_ti2v_5b", @@ -383,30 +379,23 @@ def from_req_perf_record( warmup_edit=0, custom_validator="video", ), - DiffusionSamplingParams( - output_size="832x1104", - prompt="Add dynamic motion to the scene", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - ), + TI2V_sampling_params, ), ] TWO_GPU_CASES_A = [ # TODO: Timeout with Torch2.9. Add back when it can pass CI - # DiffusionTestCase( - # id="wan2_2_i2v_a14b_2gpu", - # model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers", - # modality="video", - # prompt="generate", - # warmup_text=0, - # warmup_edit=0, - # output_size="832x1104", - # edit_prompt="generate", - # image_path="https://github.com/Wan-Video/Wan2.2/blob/990af50de458c19590c245151197326e208d7191/examples/i2v_input.JPG?raw=true", - # custom_validator="video", - # num_gpus=2, - # num_frames=1, - # ), + DiffusionTestCase( + "wan2_2_i2v_a14b_2gpu", + DiffusionServerArgs( + model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers", + modality="video", + warmup_text=0, + warmup_edit=0, + custom_validator="video", + ), + TI2V_sampling_params, + ), DiffusionTestCase( "wan2_2_t2v_a14b_2gpu", DiffusionServerArgs( @@ -418,7 +407,7 @@ def from_req_perf_record( num_gpus=2, ), DiffusionSamplingParams( - prompt="A curious raccoon", + prompt=T2V_PROMPT, output_size="720x480", ), ), @@ -450,7 +439,7 @@ def from_req_perf_record( custom_validator="video", ), DiffusionSamplingParams( - prompt="A curious raccoon", + prompt=T2V_PROMPT, output_size="720x480", ), ), @@ -467,11 +456,7 @@ def from_req_perf_record( custom_validator="video", num_gpus=2, ), - DiffusionSamplingParams( - output_size="832x1104", - prompt="Add dynamic motion to the scene", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - ), + TI2V_sampling_params, ), # I2V LoRA test case DiffusionTestCase( @@ -485,11 +470,7 @@ def from_req_perf_record( num_gpus=2, lora_path="starsfriday/Wan2.1-Divine-Power-LoRA", ), - DiffusionSamplingParams( - prompt="faxiang, the person raises hands, a giant translucent golden figure appears behind", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - output_size="832x1104", - ), + TI2V_sampling_params, ), DiffusionTestCase( "wan2_1_i2v_14b_720P_2gpu", @@ -501,11 +482,7 @@ def from_req_perf_record( custom_validator="video", num_gpus=2, ), - DiffusionSamplingParams( - prompt="Add dynamic motion to the scene", - image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg", - output_size="832x1104", - ), + TI2V_sampling_params, ), DiffusionTestCase( "qwen_image_t2i_2_gpus", @@ -516,10 +493,7 @@ def from_req_perf_record( warmup_edit=0, num_gpus=2, ), - DiffusionSamplingParams( - prompt="A futuristic cityscape at sunset with flying cars", - output_size="1024x1024", - ), + T2I_sampling_params, ), DiffusionTestCase( "flux_image_t2i_2_gpus", @@ -529,10 +503,7 @@ def from_req_perf_record( warmup_text=1, warmup_edit=0, ), - DiffusionSamplingParams( - prompt="A futuristic cityscape at sunset with flying cars", - output_size="1024x1024", - ), + T2I_sampling_params, ), ]