diff --git a/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml b/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml index 590244acd26..1f0d06cb8c0 100644 --- a/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml +++ b/tests/e2e/offline_inference/stage_configs/bagel_mooncake_ci.yaml @@ -47,15 +47,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: mp - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 - load_format: dummy omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml b/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml index b7999652e23..36b1d2bbe48 100644 --- a/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml +++ b/tests/e2e/offline_inference/stage_configs/bagel_sharedmemory_ci.yaml @@ -46,15 +46,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 - load_format: dummy omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/tests/test_diffusion_config_fields.py b/tests/test_diffusion_config_fields.py new file mode 100644 index 00000000000..b87ceec1df6 --- /dev/null +++ b/tests/test_diffusion_config_fields.py @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Ensure diffusion stage YAML configs only use valid OmniDiffusionConfig fields. + +Regression test for https://github.com/vllm-project/vllm-omni/issues/2563 +""" + +from dataclasses import fields +from pathlib import Path + +import pytest +import yaml + +pytestmark = [pytest.mark.core_model, pytest.mark.cpu] + +try: + from vllm_omni.diffusion.data import OmniDiffusionConfig +except Exception: + OmniDiffusionConfig = None + + +@pytest.mark.skipif( + OmniDiffusionConfig is None, + reason="OmniDiffusionConfig could not be imported (missing torch?)", +) +def test_diffusion_stage_configs_only_contain_valid_fields(): + """Diffusion stage engine_args must only contain OmniDiffusionConfig fields. + + Regression test for https://github.com/vllm-project/vllm-omni/issues/2563 + """ + # Scan both main configs and test configs + repo_root = Path(__file__).parent.parent + config_dirs = [ + repo_root / "vllm_omni" / "model_executor" / "stage_configs", + ] + # Also scan test directories recursively + test_dir = repo_root / "tests" + + yaml_paths: list[Path] = [] + for config_dir in config_dirs: + yaml_paths.extend(sorted(config_dir.glob("*.yaml"))) + yaml_paths.extend(sorted(test_dir.rglob("*.yaml"))) + + valid_fields = {f.name for f in fields(OmniDiffusionConfig)} + # model_stage is consumed by the stage init layer, not OmniDiffusionConfig + valid_fields.add("model_stage") + # model_arch is consumed by the stage init layer for diffusion model class resolution + valid_fields.add("model_arch") + # "quantization" is mapped to "quantization_config" by from_kwargs() backwards-compat + valid_fields.add("quantization") + + invalid_entries: list[tuple[str, set[str]]] = [] + for yaml_path in yaml_paths: + with open(yaml_path) as fh: + config = yaml.safe_load(fh) + + stages = config.get("stage_args", config.get("stages", [])) + for stage in stages: + if stage.get("stage_type") != "diffusion": + continue + engine_args = stage.get("engine_args", {}) + invalid = set(engine_args.keys()) - valid_fields + if invalid: + invalid_entries.append((yaml_path.relative_to(repo_root), invalid)) + + assert not invalid_entries, "Diffusion stage configs contain fields not in OmniDiffusionConfig:\n" + "\n".join( + f" {name}: {sorted(bad)}" for name, bad in invalid_entries + ) diff --git a/vllm_omni/model_executor/stage_configs/bagel.yaml b/vllm_omni/model_executor/stage_configs/bagel.yaml index d1031b574a8..dfe9da1c26d 100644 --- a/vllm_omni/model_executor/stage_configs/bagel.yaml +++ b/vllm_omni/model_executor/stage_configs/bagel.yaml @@ -52,14 +52,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/vllm_omni/model_executor/stage_configs/bagel_multiconnector.yaml b/vllm_omni/model_executor/stage_configs/bagel_multiconnector.yaml index 4919395cad7..af038f59fb8 100644 --- a/vllm_omni/model_executor/stage_configs/bagel_multiconnector.yaml +++ b/vllm_omni/model_executor/stage_configs/bagel_multiconnector.yaml @@ -45,14 +45,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml b/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml index 2c1d84af493..bb24763f906 100644 --- a/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml +++ b/vllm_omni/model_executor/stage_configs/bagel_single_stage.yaml @@ -9,14 +9,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 final_output: true final_output_type: image diff --git a/vllm_omni/model_executor/stage_configs/bagel_think.yaml b/vllm_omni/model_executor/stage_configs/bagel_think.yaml index c4cf32c707e..0d2098a2034 100644 --- a/vllm_omni/model_executor/stage_configs/bagel_think.yaml +++ b/vllm_omni/model_executor/stage_configs/bagel_think.yaml @@ -49,14 +49,9 @@ stage_args: engine_args: model_stage: dit max_num_seqs: 1 - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 omni_kv_config: need_recv_cache: true engine_input_source: [0] diff --git a/vllm_omni/model_executor/stage_configs/bagel_usp2.yaml b/vllm_omni/model_executor/stage_configs/bagel_usp2.yaml index 632c227f360..33002b9aa5c 100644 --- a/vllm_omni/model_executor/stage_configs/bagel_usp2.yaml +++ b/vllm_omni/model_executor/stage_configs/bagel_usp2.yaml @@ -45,14 +45,9 @@ stage_args: max_batch_size: 1 engine_args: model_stage: dit - gpu_memory_utilization: 0.45 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - tensor_parallel_size: 1 parallel_config: ulysses_degree: 2 # ring_degree: 2 diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit.yaml index 0b812ff376b..a60fe9a5b5b 100644 --- a/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit.yaml +++ b/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit.yaml @@ -11,13 +11,9 @@ stage_args: engine_args: max_num_seqs: 1 model_stage: dit - gpu_memory_utilization: 0.65 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 parallel_config: tensor_parallel_size: 4 enable_expert_parallel: true diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit_2gpu_fp8.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit_2gpu_fp8.yaml index 51110c28587..aeef27a9746 100644 --- a/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit_2gpu_fp8.yaml +++ b/vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit_2gpu_fp8.yaml @@ -11,13 +11,9 @@ stage_args: max_batch_size: 1 engine_args: model_stage: dit - gpu_memory_utilization: 0.9 enforce_eager: true trust_remote_code: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 quantization: "fp8" parallel_config: tensor_parallel_size: 2 diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml index 6f4ba306a50..808b4619f74 100644 --- a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml +++ b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml @@ -50,12 +50,8 @@ stage_args: max_batch_size: 1 engine_args: model_stage: diffusion - gpu_memory_utilization: 0.9 enforce_eager: true - engine_output_type: image distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 vae_use_slicing: false vae_use_tiling: false cache_backend: null diff --git a/vllm_omni/model_executor/stage_configs/omnivoice.yaml b/vllm_omni/model_executor/stage_configs/omnivoice.yaml index 49f11e9674d..546e3b3dc2a 100644 --- a/vllm_omni/model_executor/stage_configs/omnivoice.yaml +++ b/vllm_omni/model_executor/stage_configs/omnivoice.yaml @@ -10,10 +10,8 @@ stage_args: engine_args: model_stage: dit model_class_name: "OmniVoicePipeline" - gpu_memory_utilization: 0.5 enforce_eager: true trust_remote_code: true - engine_output_type: audio distributed_executor_backend: "mp" dtype: "float32" final_output: true