Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 46 additions & 75 deletions python/sglang/multimodal_gen/test/server/testcase_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,24 @@ def from_req_perf_record(
)


T2I_sampling_params = DiffusionSamplingParams(
prompt="Doraemon is eating dorayaki",
output_size="1024x1024",
)

TI2I_sampling_params = DiffusionSamplingParams(
prompt="Convert 2D style to 3D style",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
)

T2V_PROMPT = "A curious raccoon"

TI2V_sampling_params = DiffusionSamplingParams(
output_size="832x1104",
prompt="The man in the picture slowly turns his head, his expression enigmatic and otherworldly. The camera performs a slow, cinematic dolly out, focusing on his face. Moody lighting, neon signs glowing in the background, shallow depth of field.",
image_path="https://is1-ssl.mzstatic.com/image/thumb/Music114/v4/5f/fa/56/5ffa56c2-ea1f-7a17-6bad-192ff9b6476d/825646124206.jpg/600x600bb.jpg",
)

# All test cases with clean default values
# To test different models, simply add more DiffusionCase entries
ONE_GPU_CASES_A: list[DiffusionTestCase] = [
Expand All @@ -226,10 +244,7 @@ def from_req_perf_record(
warmup_text=1,
warmup_edit=0,
),
DiffusionSamplingParams(
prompt="A futuristic cityscape at sunset with flying cars",
output_size="1024x1024",
),
T2I_sampling_params,
),
DiffusionTestCase(
"flux_image_t2i",
Expand All @@ -239,10 +254,7 @@ def from_req_perf_record(
warmup_text=1,
warmup_edit=0,
),
DiffusionSamplingParams(
prompt="A futuristic cityscape at sunset with flying cars",
output_size="1024x1024",
),
T2I_sampling_params,
),
DiffusionTestCase(
"flux_2_image_t2i",
Expand All @@ -252,10 +264,7 @@ def from_req_perf_record(
warmup_text=1,
warmup_edit=0,
),
DiffusionSamplingParams(
prompt="A futuristic cityscape at sunset with flying cars",
output_size="1024x1024",
),
T2I_sampling_params,
),
DiffusionTestCase(
"zimage_image_t2i",
Expand All @@ -265,10 +274,7 @@ def from_req_perf_record(
warmup_text=1,
warmup_edit=0,
),
DiffusionSamplingParams(
prompt="Doraemon is eating dorayaki.",
output_size="1024x1024",
),
T2I_sampling_params,
),
# === Text and Image to Image (TI2I) ===
DiffusionTestCase(
Expand All @@ -279,10 +285,7 @@ def from_req_perf_record(
warmup_text=0,
warmup_edit=1,
),
DiffusionSamplingParams(
prompt="Convert 2D style to 3D style",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
),
TI2I_sampling_params,
),
]

Expand All @@ -298,7 +301,7 @@ def from_req_perf_record(
custom_validator="video",
),
DiffusionSamplingParams(
prompt="A curious raccoon",
prompt=T2V_PROMPT,
output_size="848x480",
),
),
Expand Down Expand Up @@ -339,10 +342,7 @@ def from_req_perf_record(
warmup_text=0,
warmup_edit=1,
),
DiffusionSamplingParams(
prompt="Convert 2D style to 3D style",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
),
TI2I_sampling_params,
),
DiffusionTestCase(
"fast_hunyuan_video",
Expand All @@ -354,7 +354,7 @@ def from_req_perf_record(
custom_validator="video",
),
DiffusionSamplingParams(
prompt="A curious raccoon",
prompt=T2V_PROMPT,
output_size="720x480",
),
),
Expand All @@ -368,11 +368,7 @@ def from_req_perf_record(
warmup_edit=0,
custom_validator="video",
),
DiffusionSamplingParams(
output_size="832x1104",
prompt="Add dynamic motion to the scene",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
),
TI2V_sampling_params,
),
DiffusionTestCase(
"fastwan2_2_ti2v_5b",
Expand All @@ -383,30 +379,23 @@ def from_req_perf_record(
warmup_edit=0,
custom_validator="video",
),
DiffusionSamplingParams(
output_size="832x1104",
prompt="Add dynamic motion to the scene",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
),
TI2V_sampling_params,
),
]

TWO_GPU_CASES_A = [
# TODO: Timeout with Torch2.9. Add back when it can pass CI
# DiffusionTestCase(
# id="wan2_2_i2v_a14b_2gpu",
# model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers",
# modality="video",
# prompt="generate",
# warmup_text=0,
# warmup_edit=0,
# output_size="832x1104",
# edit_prompt="generate",
# image_path="https://github.com/Wan-Video/Wan2.2/blob/990af50de458c19590c245151197326e208d7191/examples/i2v_input.JPG?raw=true",
# custom_validator="video",
# num_gpus=2,
# num_frames=1,
# ),
DiffusionTestCase(
"wan2_2_i2v_a14b_2gpu",
DiffusionServerArgs(
model_path="Wan-AI/Wan2.2-I2V-A14B-Diffusers",
modality="video",
warmup_text=0,
warmup_edit=0,
custom_validator="video",
),
TI2V_sampling_params,
),
DiffusionTestCase(
"wan2_2_t2v_a14b_2gpu",
DiffusionServerArgs(
Expand All @@ -418,7 +407,7 @@ def from_req_perf_record(
num_gpus=2,
),
DiffusionSamplingParams(
prompt="A curious raccoon",
prompt=T2V_PROMPT,
output_size="720x480",
),
),
Expand Down Expand Up @@ -450,7 +439,7 @@ def from_req_perf_record(
custom_validator="video",
),
DiffusionSamplingParams(
prompt="A curious raccoon",
prompt=T2V_PROMPT,
output_size="720x480",
),
),
Expand All @@ -467,11 +456,7 @@ def from_req_perf_record(
custom_validator="video",
num_gpus=2,
),
DiffusionSamplingParams(
output_size="832x1104",
prompt="Add dynamic motion to the scene",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
),
TI2V_sampling_params,
),
# I2V LoRA test case
DiffusionTestCase(
Expand All @@ -485,11 +470,7 @@ def from_req_perf_record(
num_gpus=2,
lora_path="starsfriday/Wan2.1-Divine-Power-LoRA",
),
DiffusionSamplingParams(
prompt="faxiang, the person raises hands, a giant translucent golden figure appears behind",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
output_size="832x1104",
),
TI2V_sampling_params,
),
DiffusionTestCase(
"wan2_1_i2v_14b_720P_2gpu",
Expand All @@ -501,11 +482,7 @@ def from_req_perf_record(
custom_validator="video",
num_gpus=2,
),
DiffusionSamplingParams(
prompt="Add dynamic motion to the scene",
image_path="https://github.com/lm-sys/lm-sys.github.io/releases/download/test/TI2I_Qwen_Image_Edit_Input.jpg",
output_size="832x1104",
),
TI2V_sampling_params,
),
DiffusionTestCase(
"qwen_image_t2i_2_gpus",
Expand All @@ -516,10 +493,7 @@ def from_req_perf_record(
warmup_edit=0,
num_gpus=2,
),
DiffusionSamplingParams(
prompt="A futuristic cityscape at sunset with flying cars",
output_size="1024x1024",
),
T2I_sampling_params,
),
DiffusionTestCase(
"flux_image_t2i_2_gpus",
Expand All @@ -529,10 +503,7 @@ def from_req_perf_record(
warmup_text=1,
warmup_edit=0,
),
DiffusionSamplingParams(
prompt="A futuristic cityscape at sunset with flying cars",
output_size="1024x1024",
),
T2I_sampling_params,
),
]

Expand Down
Loading