From fd42b507dde038153e2136fed15669b441327c03 Mon Sep 17 00:00:00 2001 From: dengyunyang <584797741@qq.com> Date: Sat, 28 Mar 2026 11:46:36 +0800 Subject: [PATCH] config priority fix Signed-off-by: dengyunyang <584797741@qq.com> --- .../hunyuan_image3/image_to_text.py | 1 - .../text_to_image/text_to_image.py | 8 +- vllm_omni/entrypoints/cli/serve.py | 6 + vllm_omni/entrypoints/utils.py | 108 ++---------------- .../stage_configs/hunyuan_image_3_moe.yaml | 39 ------- .../stage_configs/hunyuan_image_3_moe.yaml | 39 ------- 6 files changed, 23 insertions(+), 178 deletions(-) diff --git a/examples/offline_inference/hunyuan_image3/image_to_text.py b/examples/offline_inference/hunyuan_image3/image_to_text.py index d40134ac0a0..022c356e6c2 100644 --- a/examples/offline_inference/hunyuan_image3/image_to_text.py +++ b/examples/offline_inference/hunyuan_image3/image_to_text.py @@ -60,7 +60,6 @@ def main(args: argparse.Namespace) -> None: omni = Omni( model=args.model, enable_diffusion_pipeline_profiler=args.enable_diffusion_pipeline_profiler, - mode="image-to-text", ) prompt = "<|startoftext|>You are an assistant that understands images and outputs text." + args.prompt diff --git a/examples/offline_inference/text_to_image/text_to_image.py b/examples/offline_inference/text_to_image/text_to_image.py index 58dc2a159f3..1018468b003 100644 --- a/examples/offline_inference/text_to_image/text_to_image.py +++ b/examples/offline_inference/text_to_image/text_to_image.py @@ -242,6 +242,12 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Enable logging of diffusion pipeline stats.", ) + parser.add_argument( + "--diffusion-only", + action="store_true", + help="Start only the diffusion (DIT) stage for model support multiple startup modes," + "e.g., HunyuanImage-3.0 support both DIT and AR+DIT.", + ) return parser.parse_args() @@ -330,7 +336,7 @@ def main(): "parallel_config": parallel_config, "enforce_eager": args.enforce_eager, "enable_cpu_offload": args.enable_cpu_offload, - "mode": "text-to-image", + "diffusion_only": args.diffusion_only, "log_stats": args.log_stats, "enable_diffusion_pipeline_profiler": args.enable_diffusion_pipeline_profiler, **lora_args, diff --git a/vllm_omni/entrypoints/cli/serve.py b/vllm_omni/entrypoints/cli/serve.py index f924d64c391..3556844de3d 100644 --- a/vllm_omni/entrypoints/cli/serve.py +++ b/vllm_omni/entrypoints/cli/serve.py @@ -248,6 +248,12 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu default=1, help="Number of replica groups for HSDP. Each group holds a full sharded copy.", ) + omni_config_group.add_argument( + "--diffusion-only", + action="store_true", + help="Start only the diffusion (DIT) stage for model support multiple startup modes," + "e.g., HunyuanImage-3.0 support both DIT and AR+DIT.", + ) # Cache optimization parameters omni_config_group.add_argument( diff --git a/vllm_omni/entrypoints/utils.py b/vllm_omni/entrypoints/utils.py index c664fe80a09..3cce82d7ae9 100644 --- a/vllm_omni/entrypoints/utils.py +++ b/vllm_omni/entrypoints/utils.py @@ -300,100 +300,6 @@ def load_stage_configs_from_yaml(config_path: str, base_engine_args: dict | None return stage_args -def filter_stages( - config_path: str | None, - stage_configs: list, - kwargs: dict | None, -) -> list: - """Filter stage configs by mode when YAML defines a `modes` section. - - The YAML can define, e.g.: - - modes: - - mode: text-to-image - stages: [1] - - mode: image-to-text - stages: [0] - - When users pass `mode="image-to-text"` into Omni(**kwargs), only the stages - listed for that mode are returned. If no mode is provided, defaults to - "text-to-image". If no modes are defined or filtering fails, returns the - original stage_configs unchanged. - - Args: - config_path: Path to the YAML config (used to read `modes`). - stage_configs: Loaded list of stage configs. - kwargs: Engine/caller kwargs; may contain "mode". - - Returns: - Filtered list of stage configs (or original list if filtering not applied). - """ - if not stage_configs or config_path is None: - return stage_configs - - try: - cfg = load_yaml_config(config_path) - yaml_modes = getattr(cfg, "modes", None) - if yaml_modes is None: - return stage_configs - - mode_to_stage_ids: dict[str, list[int]] = {} - if yaml_modes is not None: - for entry in yaml_modes: - mode_name = None - stages = None - if hasattr(entry, "mode") or hasattr(entry, "stages"): - mode_name = getattr(entry, "mode", None) - stages = getattr(entry, "stages", None) - elif isinstance(entry, dict): - mode_name = entry.get("mode") - stages = entry.get("stages") - - if mode_name is None or stages is None: - continue - - if isinstance(stages, int): - stage_list = [stages] - else: - stage_list = list(stages) - - mode_to_stage_ids[str(mode_name)] = [int(sid) for sid in stage_list] - - # No modes section or empty mapping: use all stages and return early. - active_mode: str | None = None - if isinstance(kwargs, dict): - active_mode = kwargs.get("mode") - - if active_mode is None: - active_mode = "text-to-image" - - if active_mode not in mode_to_stage_ids: - logger.warning( - "Requested mode '%s' not found in config '%s'; available modes: %s. Using all stages.", - active_mode, - config_path, - sorted(mode_to_stage_ids.keys()), - ) - return stage_configs - - allowed_ids = set(mode_to_stage_ids[active_mode]) - filtered_stage_configs = [sc for sc in stage_configs if getattr(sc, "stage_id", None) in allowed_ids] - if not filtered_stage_configs: - logger.warning( - "Mode '%s' in config '%s' resolved to stage ids %s, but none matched loaded stage_args. " - "Falling back to all stages.", - active_mode, - config_path, - sorted(allowed_ids), - ) - return stage_configs - - return filtered_stage_configs - except Exception as e: - logger.warning("Failed to apply mode-based stage filtering: %s", e) - return stage_configs - - def load_and_resolve_stage_configs( model: str, stage_configs_path: str | None, @@ -408,14 +314,22 @@ def load_and_resolve_stage_configs( kwargs: Engine arguments to merge with stage configs default_stage_cfg_factory: Optional callable that takes no args and returns default stage config list when no configs are found - Returns: Tuple of (config_path, stage_configs) + + The priority of stage configs should be: + Diffusion-only: + 1. stage_configs_path + 2. configs from default_stage_cfg_factory (construct from kwargs) + + Other: + 1. stage_configs_path + 2. configs from resolve_model_config_path """ if stage_configs_path is None: config_path = resolve_model_config_path(model) stage_configs = load_stage_configs_from_model(model, base_engine_args=kwargs) - if not stage_configs: + if not stage_configs or kwargs.get("diffusion_only", False): if default_stage_cfg_factory is not None: default_stage_cfg = default_stage_cfg_factory() stage_configs = create_config(default_stage_cfg) @@ -424,8 +338,6 @@ def load_and_resolve_stage_configs( else: config_path = stage_configs_path stage_configs = load_stage_configs_from_yaml(stage_configs_path, base_engine_args=kwargs) - - stage_configs = filter_stages(config_path, stage_configs, kwargs) logger.debug(f"stage_configs: {stage_configs}") return config_path, stage_configs diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml index 6f4ba306a50..01d7dcca9e8 100644 --- a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml +++ b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml @@ -2,11 +2,6 @@ # Stage 0: AR Model (vLLM implementation) # The following config has been verified on 8x L40S-48G GPU. -modes: - - mode: text-to-image - stages: [1] - - mode: image-to-text - stages: [0] stage_args: - stage_id: 0 stage_type: llm # Use llm stage type for AR stages @@ -42,40 +37,6 @@ stage_args: seed: 42 detokenize: True repetition_penalty: 1.1 - - stage_id: 1 - stage_type: diffusion - runtime: - process: true - devices: "0,1,2,3,4,5,6,7" - max_batch_size: 1 - engine_args: - model_stage: diffusion - gpu_memory_utilization: 0.9 - enforce_eager: true - engine_output_type: image - distributed_executor_backend: "mp" - enable_prefix_caching: false - max_num_batched_tokens: 32768 - vae_use_slicing: false - vae_use_tiling: false - cache_backend: null - cache_config: null - enable_cache_dit_summary: false - parallel_config: - pipeline_parallel_size: 1 - data_parallel_size: 1 - tensor_parallel_size: 8 - enable_expert_parallel: false - sequence_parallel_size: 1 - ulysses_degree: 1 - ring_degree: 1 - cfg_parallel_size: 1 - vae_patch_parallel_size: 1 - use_hsdp: false - hsdp_shard_size: -1 - hsdp_replicate_size: 1 - final_output: true - final_output_type: image # Top-level runtime config (concise): default windows and stage edges runtime: diff --git a/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml index 8f969ced5f4..4e51fc88e9c 100644 --- a/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml +++ b/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml @@ -2,11 +2,6 @@ # Stage 0: AR Model (vLLM implementation) # The following config has been verified on 8x Max 1550 GPU. -modes: - - mode: text-to-image - stages: [1] - - mode: image-to-text - stages: [0] stage_args: - stage_id: 0 stage_type: llm # Use llm stage type to launch OmniLLM @@ -40,40 +35,6 @@ stage_args: seed: 42 detokenize: True repetition_penalty: 1.1 - - stage_id: 1 - stage_type: diffusion - runtime: - process: true - devices: "0,1,2,3,4,5,6,7" - max_batch_size: 1 - engine_args: - model_stage: diffusion - gpu_memory_utilization: 0.9 - enforce_eager: true - engine_output_type: image - distributed_executor_backend: "mp" - enable_prefix_caching: false - vae_use_slicing: false - vae_use_tiling: false - cache_backend: null - cache_config: null - enable_cache_dit_summary: false - quantization: "fp8" - parallel_config: - pipeline_parallel_size: 1 - data_parallel_size: 1 - tensor_parallel_size: 8 - enable_expert_parallel: true - sequence_parallel_size: 1 - ulysses_degree: 1 - ring_degree: 1 - cfg_parallel_size: 1 - vae_patch_parallel_size: 1 - use_hsdp: false - hsdp_shard_size: -1 - hsdp_replicate_size: 1 - final_output: true - final_output_type: image # Top-level runtime config (concise): default windows and stage edges runtime: