diff --git a/tests/e2e/online_serving/test_bagel_expansion.py b/tests/e2e/online_serving/test_bagel_expansion.py new file mode 100644 index 00000000000..75e18ecfa3d --- /dev/null +++ b/tests/e2e/online_serving/test_bagel_expansion.py @@ -0,0 +1,132 @@ +""" +Comprehensive tests of diffusion features that are available in online serving mode +and are supported by the following models: +- ByteDance-Seed/BAGEL-7B-MoT +Coverage: +- TeaCache +- Cache-DiT +- CFG-Parallel +- Tensor-Parallel + +assert_diffusion_response validates successful generation and the expected +512x512 resolution. +""" + +import pytest + +from tests.conftest import ( + OmniServer, + OmniServerParams, + OpenAIClientHandler, + dummy_messages_from_mix_data, +) +from tests.utils import hardware_marks + +PROMPT = "A futuristic city skyline at twilight, cyberpunk style, ultra-detailed, high resolution." +NEGATIVE_PROMPT = "low quality, blurry, distorted, deformed, watermark" + +SINGLE_CARD_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"}) +PARALLEL_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"}, num_cards=2) + + +def _get_diffusion_feature_cases(model: str): + """Return L4 diffusion feature cases for Bagel. + TeaCache, Cache-DiT, CFG-Parallel, Tensor-Parallel. + """ + + return [ + # TeaCache (single-card) + pytest.param( + OmniServerParams( + model=model, + server_args=[ + "--cache-backend", + "tea_cache", + ], + ), + id="single_card_teacache", + marks=SINGLE_CARD_FEATURE_MARKS, + ), + # Cache-DiT (single-card) + pytest.param( + OmniServerParams( + model=model, + server_args=[ + "--cache-backend", + "cache_dit", + ], + ), + id="single_card_cache_dit", + marks=SINGLE_CARD_FEATURE_MARKS, + ), + # CFG-Parallel size 2 (2 GPUs, TeaCache backend) + pytest.param( + OmniServerParams( + model=model, + server_args=[ + "--cache-backend", + "tea_cache", + "--cfg-parallel-size", + "2", + ], + ), + id="parallel_cfg_2", + marks=PARALLEL_FEATURE_MARKS, + ), + # Tensor-Parallel size 2 (2 GPUs, Cache-DiT backend) + pytest.param( + OmniServerParams( + model=model, + server_args=[ + "--cache-backend", + "cache_dit", + "--tensor-parallel-size", + "2", + ], + ), + id="parallel_tp_2", + marks=PARALLEL_FEATURE_MARKS, + ), + ] + + +@pytest.mark.advanced_model +@pytest.mark.diffusion +@pytest.mark.parametrize( + "omni_server", + _get_diffusion_feature_cases("ByteDance-Seed/BAGEL-7B-MoT"), + indirect=True, +) +def test_bagel( + omni_server: OmniServer, + openai_client: OpenAIClientHandler, +): + """L4 diffusion feature coverage for Bagel on H100. + + This test exercises: + - TeaCache + - Cache-DiT + - CFG-Parallel (size=2) + - Tensor-Parallel (size=2) + + Validation is delegated to assert_diffusion_response in tests.conftest, + which checks output dimensions and basic correctness. + """ + + messages = dummy_messages_from_mix_data(content_text=PROMPT) + + request_config = { + "model": omni_server.model, + "messages": messages, + "extra_body": { + "height": 512, + "width": 512, + "num_inference_steps": 2, + # Enable CFG for models that use classifier-free guidance + "negative_prompt": NEGATIVE_PROMPT, + "true_cfg_scale": 4.0, + "seed": 42, + }, + } + + openai_client.send_diffusion_request(request_config)