From fd42b507dde038153e2136fed15669b441327c03 Mon Sep 17 00:00:00 2001
From: dengyunyang <584797741@qq.com>
Date: Sat, 28 Mar 2026 11:46:36 +0800
Subject: [PATCH] config priority fix

Signed-off-by: dengyunyang <584797741@qq.com>
---
 .../hunyuan_image3/image_to_text.py           |   1 -
 .../text_to_image/text_to_image.py            |   8 +-
 vllm_omni/entrypoints/cli/serve.py            |   6 +
 vllm_omni/entrypoints/utils.py                | 108 ++----------------
 .../stage_configs/hunyuan_image_3_moe.yaml    |  39 -------
 .../stage_configs/hunyuan_image_3_moe.yaml    |  39 -------
 6 files changed, 23 insertions(+), 178 deletions(-)
diff --git a/examples/offline_inference/hunyuan_image3/image_to_text.py b/examples/offline_inference/hunyuan_image3/image_to_text.py
index d40134ac0a0..022c356e6c2 100644
--- a/examples/offline_inference/hunyuan_image3/image_to_text.py
+++ b/examples/offline_inference/hunyuan_image3/image_to_text.py
@@ -60,7 +60,6 @@ def main(args: argparse.Namespace) -> None:
     omni = Omni(
         model=args.model,
         enable_diffusion_pipeline_profiler=args.enable_diffusion_pipeline_profiler,
-        mode="image-to-text",
     )
 
     prompt = "<|startoftext|>You are an assistant that understands images and outputs text.<img>" + args.prompt
diff --git a/examples/offline_inference/text_to_image/text_to_image.py b/examples/offline_inference/text_to_image/text_to_image.py
index 58dc2a159f3..1018468b003 100644
--- a/examples/offline_inference/text_to_image/text_to_image.py
+++ b/examples/offline_inference/text_to_image/text_to_image.py
@@ -242,6 +242,12 @@ def parse_args() -> argparse.Namespace:
         action="store_true",
         help="Enable logging of diffusion pipeline stats.",
     )
+    parser.add_argument(
+        "--diffusion-only",
+        action="store_true",
+        help="Start only the diffusion (DIT) stage for model support multiple startup modes,"
+        "e.g., HunyuanImage-3.0 support both DIT and AR+DIT.",
+    )
     return parser.parse_args()
 
 
@@ -330,7 +336,7 @@ def main():
         "parallel_config": parallel_config,
         "enforce_eager": args.enforce_eager,
         "enable_cpu_offload": args.enable_cpu_offload,
-        "mode": "text-to-image",
+        "diffusion_only": args.diffusion_only,
         "log_stats": args.log_stats,
         "enable_diffusion_pipeline_profiler": args.enable_diffusion_pipeline_profiler,
         **lora_args,
diff --git a/vllm_omni/entrypoints/cli/serve.py b/vllm_omni/entrypoints/cli/serve.py
index f924d64c391..3556844de3d 100644
--- a/vllm_omni/entrypoints/cli/serve.py
+++ b/vllm_omni/entrypoints/cli/serve.py
@@ -248,6 +248,12 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
             default=1,
             help="Number of replica groups for HSDP. Each group holds a full sharded copy.",
         )
+        omni_config_group.add_argument(
+            "--diffusion-only",
+            action="store_true",
+            help="Start only the diffusion (DIT) stage for model support multiple startup modes,"
+            "e.g., HunyuanImage-3.0 support both DIT and AR+DIT.",
+        )
 
         # Cache optimization parameters
         omni_config_group.add_argument(
diff --git a/vllm_omni/entrypoints/utils.py b/vllm_omni/entrypoints/utils.py
index c664fe80a09..3cce82d7ae9 100644
--- a/vllm_omni/entrypoints/utils.py
+++ b/vllm_omni/entrypoints/utils.py
@@ -300,100 +300,6 @@ def load_stage_configs_from_yaml(config_path: str, base_engine_args: dict | None
     return stage_args
 
 
-def filter_stages(
-    config_path: str | None,
-    stage_configs: list,
-    kwargs: dict | None,
-) -> list:
-    """Filter stage configs by mode when YAML defines a `modes` section.
-
-    The YAML can define, e.g.:
-
-        modes:
-          - mode: text-to-image
-            stages: [1]
-          - mode: image-to-text
-            stages: [0]
-
-    When users pass `mode="image-to-text"` into Omni(**kwargs), only the stages
-    listed for that mode are returned. If no mode is provided, defaults to
-    "text-to-image". If no modes are defined or filtering fails, returns the
-    original stage_configs unchanged.
-
-    Args:
-        config_path: Path to the YAML config (used to read `modes`).
-        stage_configs: Loaded list of stage configs.
-        kwargs: Engine/caller kwargs; may contain "mode".
-
-    Returns:
-        Filtered list of stage configs (or original list if filtering not applied).
-    """
-    if not stage_configs or config_path is None:
-        return stage_configs
-
-    try:
-        cfg = load_yaml_config(config_path)
-        yaml_modes = getattr(cfg, "modes", None)
-        if yaml_modes is None:
-            return stage_configs
-
-        mode_to_stage_ids: dict[str, list[int]] = {}
-        if yaml_modes is not None:
-            for entry in yaml_modes:
-                mode_name = None
-                stages = None
-                if hasattr(entry, "mode") or hasattr(entry, "stages"):
-                    mode_name = getattr(entry, "mode", None)
-                    stages = getattr(entry, "stages", None)
-                elif isinstance(entry, dict):
-                    mode_name = entry.get("mode")
-                    stages = entry.get("stages")
-
-                if mode_name is None or stages is None:
-                    continue
-
-                if isinstance(stages, int):
-                    stage_list = [stages]
-                else:
-                    stage_list = list(stages)
-
-                mode_to_stage_ids[str(mode_name)] = [int(sid) for sid in stage_list]
-
-        # No modes section or empty mapping: use all stages and return early.
-        active_mode: str | None = None
-        if isinstance(kwargs, dict):
-            active_mode = kwargs.get("mode")
-
-        if active_mode is None:
-            active_mode = "text-to-image"
-
-        if active_mode not in mode_to_stage_ids:
-            logger.warning(
-                "Requested mode '%s' not found in config '%s'; available modes: %s. Using all stages.",
-                active_mode,
-                config_path,
-                sorted(mode_to_stage_ids.keys()),
-            )
-            return stage_configs
-
-        allowed_ids = set(mode_to_stage_ids[active_mode])
-        filtered_stage_configs = [sc for sc in stage_configs if getattr(sc, "stage_id", None) in allowed_ids]
-        if not filtered_stage_configs:
-            logger.warning(
-                "Mode '%s' in config '%s' resolved to stage ids %s, but none matched loaded stage_args. "
-                "Falling back to all stages.",
-                active_mode,
-                config_path,
-                sorted(allowed_ids),
-            )
-            return stage_configs
-
-        return filtered_stage_configs
-    except Exception as e:
-        logger.warning("Failed to apply mode-based stage filtering: %s", e)
-        return stage_configs
-
-
 def load_and_resolve_stage_configs(
     model: str,
     stage_configs_path: str | None,
@@ -408,14 +314,22 @@ def load_and_resolve_stage_configs(
         kwargs: Engine arguments to merge with stage configs
         default_stage_cfg_factory: Optional callable that takes no args and returns
             default stage config list when no configs are found
-
     Returns:
         Tuple of (config_path, stage_configs)
+
+    The priority of stage configs should be:
+    Diffusion-only:
+        1. stage_configs_path
+        2. configs from default_stage_cfg_factory (construct from kwargs)
+
+    Other:
+        1. stage_configs_path
+        2. configs from resolve_model_config_path
     """
     if stage_configs_path is None:
         config_path = resolve_model_config_path(model)
         stage_configs = load_stage_configs_from_model(model, base_engine_args=kwargs)
-        if not stage_configs:
+        if not stage_configs or kwargs.get("diffusion_only", False):
             if default_stage_cfg_factory is not None:
                 default_stage_cfg = default_stage_cfg_factory()
                 stage_configs = create_config(default_stage_cfg)
@@ -424,8 +338,6 @@ def load_and_resolve_stage_configs(
     else:
         config_path = stage_configs_path
         stage_configs = load_stage_configs_from_yaml(stage_configs_path, base_engine_args=kwargs)
-
-    stage_configs = filter_stages(config_path, stage_configs, kwargs)
     logger.debug(f"stage_configs: {stage_configs}")
 
     return config_path, stage_configs
diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml
index 6f4ba306a50..01d7dcca9e8 100644
--- a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml
+++ b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml
@@ -2,11 +2,6 @@
 # Stage 0: AR Model (vLLM implementation)
 
 # The following config has been verified on 8x L40S-48G GPU.
-modes:
-  - mode: text-to-image
-    stages: [1]
-  - mode: image-to-text
-    stages: [0]
 stage_args:
   - stage_id: 0
     stage_type: llm  # Use llm stage type for AR stages
@@ -42,40 +37,6 @@ stage_args:
       seed: 42
       detokenize: True
       repetition_penalty: 1.1
-  - stage_id: 1
-    stage_type: diffusion
-    runtime:
-      process: true
-      devices: "0,1,2,3,4,5,6,7"
-      max_batch_size: 1
-    engine_args:
-      model_stage: diffusion
-      gpu_memory_utilization: 0.9
-      enforce_eager: true
-      engine_output_type: image
-      distributed_executor_backend: "mp"
-      enable_prefix_caching: false
-      max_num_batched_tokens: 32768
-      vae_use_slicing: false
-      vae_use_tiling: false
-      cache_backend: null
-      cache_config: null
-      enable_cache_dit_summary: false
-      parallel_config:
-        pipeline_parallel_size: 1
-        data_parallel_size: 1
-        tensor_parallel_size: 8
-        enable_expert_parallel: false
-        sequence_parallel_size: 1
-        ulysses_degree: 1
-        ring_degree: 1
-        cfg_parallel_size: 1
-        vae_patch_parallel_size: 1
-        use_hsdp: false
-        hsdp_shard_size: -1
-        hsdp_replicate_size: 1
-    final_output: true
-    final_output_type: image
 
 # Top-level runtime config (concise): default windows and stage edges
 runtime:
diff --git a/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml
index 8f969ced5f4..4e51fc88e9c 100644
--- a/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml
+++ b/vllm_omni/platforms/xpu/stage_configs/hunyuan_image_3_moe.yaml
@@ -2,11 +2,6 @@
 # Stage 0: AR Model (vLLM implementation)
 
 # The following config has been verified on 8x Max 1550 GPU.
-modes:
-  - mode: text-to-image
-    stages: [1]
-  - mode: image-to-text
-    stages: [0]
 stage_args:
   - stage_id: 0
     stage_type: llm  # Use llm stage type to launch OmniLLM
@@ -40,40 +35,6 @@ stage_args:
       seed: 42
       detokenize: True
       repetition_penalty: 1.1
-  - stage_id: 1
-    stage_type: diffusion
-    runtime:
-      process: true
-      devices: "0,1,2,3,4,5,6,7"
-      max_batch_size: 1
-    engine_args:
-      model_stage: diffusion
-      gpu_memory_utilization: 0.9
-      enforce_eager: true
-      engine_output_type: image
-      distributed_executor_backend: "mp"
-      enable_prefix_caching: false
-      vae_use_slicing: false
-      vae_use_tiling: false
-      cache_backend: null
-      cache_config: null
-      enable_cache_dit_summary: false
-      quantization: "fp8"
-      parallel_config:
-        pipeline_parallel_size: 1
-        data_parallel_size: 1
-        tensor_parallel_size: 8
-        enable_expert_parallel: true
-        sequence_parallel_size: 1
-        ulysses_degree: 1
-        ring_degree: 1
-        cfg_parallel_size: 1
-        vae_patch_parallel_size: 1
-        use_hsdp: false
-        hsdp_shard_size: -1
-        hsdp_replicate_size: 1
-    final_output: true
-    final_output_type: image
 
 # Top-level runtime config (concise): default windows and stage edges
 runtime: