diff --git a/tests/entrypoints/test_async_omni_diffusion_config.py b/tests/entrypoints/test_async_omni_diffusion_config.py
index 83b465fdb47..235add5725f 100644
--- a/tests/entrypoints/test_async_omni_diffusion_config.py
+++ b/tests/entrypoints/test_async_omni_diffusion_config.py
@@ -4,6 +4,7 @@
 import pytest
 from vllm.utils.argparse_utils import FlexibleArgumentParser
 
+from vllm_omni.config.stage_config import deploy_override_field_names
 from vllm_omni.engine.async_omni_engine import AsyncOmniEngine
 from vllm_omni.entrypoints.cli.serve import OmniServeCommand, _create_default_diffusion_stage_cfg
 
@@ -30,6 +31,15 @@ def test_default_stage_config_includes_cache_backend():
     assert engine_args["model_stage"] == "diffusion"
 
 
+def test_default_stage_config_ignores_none_deploy_overrides():
+    """Ensure nullified deploy override defaults do not alter diffusion defaults."""
+    baseline = AsyncOmniEngine._create_default_diffusion_stage_cfg({})[0]
+    nullified_overrides = {name: None for name in deploy_override_field_names()}
+    stage_cfg = AsyncOmniEngine._create_default_diffusion_stage_cfg(nullified_overrides)[0]
+
+    assert stage_cfg == baseline
+
+
 def test_default_cache_config_used_when_missing():
     """Ensure default cache_config is synthesized when only backend is given."""
     stage_cfg = AsyncOmniEngine._create_default_diffusion_stage_cfg(
diff --git a/tests/helpers/stage_config.py b/tests/helpers/stage_config.py
index 310af917459..23d80c82658 100644
--- a/tests/helpers/stage_config.py
+++ b/tests/helpers/stage_config.py
@@ -516,6 +516,7 @@ def delete_by_path(config_dict: dict, path: str) -> None:
                 "max_num_seqs": 1,
                 "gpu_memory_utilization": 0.9,
                 "enforce_eager": True,
+                "enable_prefix_caching": False,
                 "max_num_batched_tokens": 16384,
                 "max_model_len": 16384,
                 "skip_mm_profiling": True,
diff --git a/tests/test_arg_utils.py b/tests/test_arg_utils.py
index ae640b2d861..2fd5cf302e0 100644
--- a/tests/test_arg_utils.py
+++ b/tests/test_arg_utils.py
@@ -369,8 +369,8 @@ def _build_full_serve_parser():
 def test_nullify_stage_engine_defaults_resets_inherited_defaults():
     import argparse
 
+    from vllm_omni.config.stage_config import deploy_override_field_names
     from vllm_omni.engine.arg_utils import (
-        deploy_override_field_names,
         nullify_stage_engine_defaults,
     )
 
diff --git a/tests/test_config_factory.py b/tests/test_config_factory.py
index 7abe8fc8693..6799fb80acc 100644
--- a/tests/test_config_factory.py
+++ b/tests/test_config_factory.py
@@ -93,6 +93,7 @@ def test_to_omegaconf_basic(self):
         assert omega_config.engine_args.worker_type == "ar"
         assert omega_config.final_output is True
         assert omega_config.final_output_type == "text"
+        assert "max_num_seqs" not in omega_config.engine_args
         # Legacy field name for backward compatibility
         assert omega_config.engine_input_source == []
 
@@ -146,6 +147,24 @@ def test_to_omegaconf_max_num_seqs_in_engine_args(self):
         omega_config = config.to_omegaconf()
         assert omega_config.engine_args.max_num_seqs == 32
 
+    def test_to_omegaconf_omits_none_deploy_overrides_for_engine_args(self):
+        """None deploy overrides must fall through to EngineArgs defaults."""
+        from vllm_omni.config.stage_config import deploy_override_field_names
+
+        config = StageConfig(
+            stage_id=0,
+            model_stage="thinker",
+            runtime_overrides={name: None for name in deploy_override_field_names()},
+        )
+
+        omega_config = config.to_omegaconf()
+        engine_args = dict(omega_config.engine_args)
+
+        assert "devices" not in engine_args
+        assert "max_batch_size" not in engine_args
+        for name in deploy_override_field_names() - {"devices"}:
+            assert name not in engine_args
+
 
 class TestModelPipeline:
     """Tests for ModelPipeline class."""
@@ -806,21 +825,80 @@ def test_register_and_lookup(self):
 
 
 class TestDeployConfigLoading:
-    def test_load_deploy_config(self):
+    def test_deploy_override_fields_include_deploy_schema_fields(self):
+        from vllm_omni.config.stage_config import deploy_override_field_names
+
+        expected_fields = {
+            "async_chunk",
+            "async_scheduling",
+            "compilation_config",
+            "config_format",
+            "data_parallel_size",
+            "devices",
+            "disable_hybrid_kv_cache_manager",
+            "distributed_executor_backend",
+            "dtype",
+            "enable_chunked_prefill",
+            "enable_flashinfer_autotune",
+            "enable_prefix_caching",
+            "enforce_eager",
+            "gpu_memory_utilization",
+            "load_format",
+            "max_model_len",
+            "max_num_batched_tokens",
+            "max_num_seqs",
+            "mm_processor_cache_gb",
+            "pipeline_parallel_size",
+            "profiler_config",
+            "quantization",
+            "skip_mm_profiling",
+            "subtalker_sampling_params",
+            "tensor_parallel_size",
+            "tokenizer_mode",
+            "trust_remote_code",
+        }
+
+        actual_fields = deploy_override_field_names()
+        assert expected_fields == actual_fields, (
+            f"added={actual_fields - expected_fields}, removed={expected_fields - actual_fields}"
+        )
+
+    def test_load_qwen3_omni_moe_deploy_config(self):
         from pathlib import Path
 
         from vllm_omni.config.stage_config import load_deploy_config
 
         deploy_path = Path(__file__).parent.parent / "vllm_omni" / "deploy" / "qwen3_omni_moe.yaml"
-        if not deploy_path.exists():
-            pytest.skip("Deploy config not found")
-
         deploy = load_deploy_config(deploy_path)
         assert len(deploy.stages) == 3
         assert deploy.async_chunk is True
         assert deploy.connectors is not None
         assert deploy.platforms is not None
 
+    def test_load_voxtral_tts_deploy_config_schema_fields(self):
+        from pathlib import Path
+
+        from vllm_omni.config.stage_config import load_deploy_config
+
+        deploy_path = Path(__file__).parent.parent / "vllm_omni" / "deploy" / "voxtral_tts.yaml"
+        deploy = load_deploy_config(deploy_path)
+        assert deploy.stages[0].config_format == "mistral"
+        assert deploy.stages[0].load_format == "mistral"
+        assert deploy.stages[0].tokenizer_mode == "mistral"
+        assert not any(
+            name in deploy.stages[0].engine_extras for name in ("config_format", "load_format", "tokenizer_mode")
+        )
+
+    def test_load_ming_flash_omni_deploy_config_schema_fields(self):
+        from pathlib import Path
+
+        from vllm_omni.config.stage_config import load_deploy_config
+
+        deploy_path = Path(__file__).parent.parent / "vllm_omni" / "deploy" / "ming_flash_omni.yaml"
+        deploy = load_deploy_config(deploy_path)
+        assert deploy.stages[0].compilation_config == {"pass_config": {"fuse_allreduce_rms": False}}
+        assert "compilation_config" not in deploy.stages[0].engine_extras
+
     def test_merge_pipeline_deploy(self):
         from pathlib import Path
 
@@ -1171,7 +1249,8 @@ def test_ci_inherits_from_main(self):
         deploy = load_deploy_config(ci_path)
         assert len(deploy.stages) == 3
         # CI overrides
-        assert deploy.stages[0].engine_extras.get("load_format") == "dummy"
+        assert deploy.stages[0].load_format == "dummy"
+        assert "load_format" not in deploy.stages[0].engine_extras
         assert deploy.stages[0].max_num_seqs == 5
         # Inherited from base
         assert deploy.stages[0].gpu_memory_utilization == 0.9
@@ -1376,7 +1455,7 @@ def test_typed_kwarg_overrides_yaml(self):
     def test_none_value_skipped_yaml_wins(self):
         stages = self._stages({"max_num_seqs": None})
         assert stages[2].runtime_overrides.get("max_num_seqs") is None
-        assert stages[2].yaml_engine_args.get("max_num_seqs") == 1
+        assert "max_num_seqs" not in stages[2].yaml_engine_args
 
     def test_empty_kwargs_yaml_only(self):
         stages = self._stages({})
diff --git a/vllm_omni/config/stage_config.py b/vllm_omni/config/stage_config.py
index d4e33667723..ad2639ab33b 100644
--- a/vllm_omni/config/stage_config.py
+++ b/vllm_omni/config/stage_config.py
@@ -399,19 +399,41 @@ class StageDeployConfig:
     the top level of ``DeployConfig`` and propagated to every stage.
     """
 
+    # === Omni fields ===
+    # Stage identity and Omni runtime placement.
     stage_id: int
-    max_num_seqs: int = 64
-    gpu_memory_utilization: float = 0.9
-    tensor_parallel_size: int = 1
-    enforce_eager: bool = False
-    max_num_batched_tokens: int = 32768
-    max_model_len: int | None = None
-    async_scheduling: bool | None = None
-    devices: str = "0"
+    devices: str | None = None
+
+    # Inter-stage connector wiring and request defaults.
     output_connectors: dict[str, str] | None = None
     input_connectors: dict[str, str] | None = None
     default_sampling_params: dict[str, Any] | None = None
     subtalker_sampling_params: dict[str, Any] | None = None
+
+    # === vLLM EngineArgs fields ===
+    # Parallelism and scheduler/memory capacity.
+    tensor_parallel_size: int | None = None
+    gpu_memory_utilization: float | None = None
+    max_num_seqs: int | None = None
+    max_num_batched_tokens: int | None = None
+    max_model_len: int | None = None
+
+    # Execution, scheduling, and KV/cache behavior.
+    enforce_eager: bool | None = None
+    async_scheduling: bool | None = None
+    disable_hybrid_kv_cache_manager: bool | None = None
+    mm_processor_cache_gb: float | None = None
+
+    # Compilation, profiling, tokenizer/config parsing, and model loading.
+    compilation_config: dict[str, Any] | None = None
+    profiler_config: dict[str, Any] | None = None
+    skip_mm_profiling: bool | None = None
+    enable_flashinfer_autotune: bool | None = None
+    config_format: str | None = None
+    load_format: str | None = None
+    tokenizer_mode: str | None = None
+
+    # Pass-through vLLM EngineArgs fields that are not represented above.
     engine_extras: dict[str, Any] = field(default_factory=dict)
 
 
@@ -436,14 +458,14 @@ class DeployConfig:
     pipeline: str | None = None
 
     # === Pipeline-wide engine settings (applied uniformly to every stage) ===
-    trust_remote_code: bool = True
+    trust_remote_code: bool | None = None
     distributed_executor_backend: str | None = None
     dtype: str | None = None
     quantization: str | None = None
-    enable_prefix_caching: bool = False
+    enable_prefix_caching: bool | None = None
     enable_chunked_prefill: bool | None = None
-    data_parallel_size: int = 1
-    pipeline_parallel_size: int = 1
+    data_parallel_size: int | None = None
+    pipeline_parallel_size: int | None = None
 
 
 _STAGE_NON_ENGINE_KEYS = frozenset(
@@ -465,10 +487,10 @@ def _parse_stage_deploy(stage_data: dict[str, Any]) -> StageDeployConfig:
     """Parse a single stage entry from deploy YAML into StageDeployConfig."""
     if "engine_args" in stage_data:
         engine_args = dict(stage_data["engine_args"])
-        devices = stage_data.get("runtime", {}).get("devices", stage_data.get("devices", "0"))
+        devices = stage_data.get("runtime", {}).get("devices", stage_data.get("devices"))
     else:
         engine_args = {k: v for k, v in stage_data.items() if k not in _STAGE_NON_ENGINE_KEYS and k != "stage_id"}
-        devices = stage_data.get("devices", "0")
+        devices = stage_data.get("devices")
 
     kwargs: dict[str, Any] = {"stage_id": stage_data["stage_id"], "devices": devices}
     for name, f in _STAGE_DEPLOY_FIELDS.items():
@@ -687,6 +709,15 @@ def _select_processor_funcs(
 )
 
 
+def deploy_override_field_names() -> frozenset[str]:
+    """Return deploy-schema fields whose CLI defaults must not override YAML."""
+    return (
+        frozenset(_STAGE_DEPLOY_FIELDS)
+        | frozenset(_PIPELINE_WIDE_ENGINE_FIELDS)
+        | frozenset({"async_chunk", "devices"})
+    )
+
+
 def _build_engine_args(
     ps: StagePipelineConfig,
     ds: StageDeployConfig | None,
@@ -802,7 +833,7 @@ def merge_pipeline_deploy(
             engine_args["async_scheduling"] = sched_cls is OmniARAsyncScheduler
         extras = _build_extras(ps, ds)
         runtime: dict[str, Any] = {"process": True}
-        if ds is not None:
+        if ds is not None and ds.devices is not None:
             runtime["devices"] = ds.devices
 
         result.append(
@@ -865,13 +896,13 @@ def to_omegaconf(self) -> Any:
 
         # CLI overrides take precedence over YAML defaults
         for key, value in self.runtime_overrides.items():
-            if key not in ("devices", "max_batch_size"):
+            if value is not None and key not in ("devices", "max_batch_size"):
                 engine_args[key] = value
 
         # Build runtime config from YAML defaults + CLI overrides
         runtime: dict[str, Any] = dict(self.yaml_runtime)
         runtime.setdefault("process", True)
-        if "devices" in self.runtime_overrides:
+        if self.runtime_overrides.get("devices") is not None:
             runtime["devices"] = self.runtime_overrides["devices"]
 
         # Legacy compat: migrate runtime.max_batch_size → engine_args.max_num_seqs
@@ -887,8 +918,6 @@ def to_omegaconf(self) -> Any:
             effective_mbs = int(cli_mbs or legacy_mbs or 1)
             engine_args.setdefault("max_num_seqs", effective_mbs)
 
-        engine_args.setdefault("max_num_seqs", 1)
-
         # Build full config dict
         config_dict: dict[str, Any] = {
             "stage_id": self.stage_id,
diff --git a/vllm_omni/deploy/bagel.yaml b/vllm_omni/deploy/bagel.yaml
index 9d2f1f8fffa..8de6f9305ba 100644
--- a/vllm_omni/deploy/bagel.yaml
+++ b/vllm_omni/deploy/bagel.yaml
@@ -10,8 +10,11 @@ async_chunk: false
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
     max_num_seqs: 3
     gpu_memory_utilization: 0.45
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "0"
     default_sampling_params:
       temperature: 0.4
@@ -23,8 +26,11 @@ stages:
       repetition_penalty: 1.05
 
   - stage_id: 1
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "0"
     input_connectors:
       from_stage_0: shared_memory_connector
diff --git a/vllm_omni/deploy/bagel_single_stage.yaml b/vllm_omni/deploy/bagel_single_stage.yaml
index 8470124ec78..bcfbad253a5 100644
--- a/vllm_omni/deploy/bagel_single_stage.yaml
+++ b/vllm_omni/deploy/bagel_single_stage.yaml
@@ -16,7 +16,11 @@ async_chunk: false
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
+    enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "0"
     default_sampling_params:
       seed: 52
diff --git a/vllm_omni/deploy/cosyvoice3.yaml b/vllm_omni/deploy/cosyvoice3.yaml
index 53e3eb3f301..4bfd4ab859d 100644
--- a/vllm_omni/deploy/cosyvoice3.yaml
+++ b/vllm_omni/deploy/cosyvoice3.yaml
@@ -27,9 +27,12 @@ connectors:
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     gpu_memory_utilization: 0.4
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "0"
     output_connectors:
       to_stage_1: connector_of_shared_memory
@@ -45,9 +48,12 @@ stages:
     skip_mm_profiling: true
 
   - stage_id: 1
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     gpu_memory_utilization: 0.2
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     max_model_len: 32768
     devices: "0"
     input_connectors:
diff --git a/vllm_omni/deploy/fish_qwen3_omni.yaml b/vllm_omni/deploy/fish_qwen3_omni.yaml
index a5bee925b68..5b0c44988a0 100644
--- a/vllm_omni/deploy/fish_qwen3_omni.yaml
+++ b/vllm_omni/deploy/fish_qwen3_omni.yaml
@@ -24,6 +24,8 @@ stages:
     max_num_seqs: 4
     gpu_memory_utilization: 0.6
     enforce_eager: false
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: false
     # vLLM >=0.19 requires max_num_batched_tokens >= max_model_len when
     # enable_chunked_prefill=false. Bumped from legacy 3072 to match
@@ -46,6 +48,8 @@ stages:
     max_num_seqs: 1
     gpu_memory_utilization: 0.1
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: false
     max_num_batched_tokens: 16384
     max_model_len: 16384
diff --git a/vllm_omni/deploy/glm_image.yaml b/vllm_omni/deploy/glm_image.yaml
index 28b88fb429a..099df1b1508 100644
--- a/vllm_omni/deploy/glm_image.yaml
+++ b/vllm_omni/deploy/glm_image.yaml
@@ -18,6 +18,8 @@ stages:
     max_num_seqs: 1
     gpu_memory_utilization: 0.6
     enforce_eager: false
+    trust_remote_code: true
+    enable_prefix_caching: false
     max_num_batched_tokens: 32768
     devices: "0"
     default_sampling_params:
@@ -32,8 +34,11 @@ stages:
   # Stage 1: Diffusion (DiT + VAE)
   # Receives prior_token_ids from AR, performs denoising + VAE decode.
   - stage_id: 1
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "1"
     default_sampling_params:
       seed: 42
diff --git a/vllm_omni/deploy/mimo_audio.yaml b/vllm_omni/deploy/mimo_audio.yaml
index f5e704f9bd4..a92e905f70e 100644
--- a/vllm_omni/deploy/mimo_audio.yaml
+++ b/vllm_omni/deploy/mimo_audio.yaml
@@ -25,6 +25,8 @@ stages:
     max_num_seqs: 1
     gpu_memory_utilization: 0.3
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     max_num_batched_tokens: 8192
     max_model_len: 8192
     devices: "0"
@@ -42,6 +44,8 @@ stages:
     max_num_seqs: 1
     gpu_memory_utilization: 0.2
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: false
     max_num_batched_tokens: 8192
     max_model_len: 8192
diff --git a/vllm_omni/deploy/moss_tts_nano.yaml b/vllm_omni/deploy/moss_tts_nano.yaml
index 585e244ca4a..2c8fc54c057 100644
--- a/vllm_omni/deploy/moss_tts_nano.yaml
+++ b/vllm_omni/deploy/moss_tts_nano.yaml
@@ -19,6 +19,7 @@ stages:
     max_num_seqs: 4
     gpu_memory_utilization: 0.3
     enforce_eager: true
+    enable_prefix_caching: false
     max_num_batched_tokens: 4096
     max_model_len: 4096
     devices: "0"
diff --git a/vllm_omni/deploy/qwen2_5_omni.yaml b/vllm_omni/deploy/qwen2_5_omni.yaml
index 7ab87e59052..487ceefdddb 100644
--- a/vllm_omni/deploy/qwen2_5_omni.yaml
+++ b/vllm_omni/deploy/qwen2_5_omni.yaml
@@ -19,9 +19,12 @@ async_chunk: false
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     gpu_memory_utilization: 0.8
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     mm_processor_cache_gb: 0
     devices: "0"
     default_sampling_params:
@@ -33,9 +36,12 @@ stages:
       repetition_penalty: 1.1
 
   - stage_id: 1
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     gpu_memory_utilization: 0.8
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "1"
     default_sampling_params:
       temperature: 0.9
@@ -46,9 +52,12 @@ stages:
       repetition_penalty: 1.05
 
   - stage_id: 2
+    max_num_batched_tokens: 32768
     max_num_seqs: 1
     gpu_memory_utilization: 0.15
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     enable_flashinfer_autotune: false
     async_scheduling: false
     devices: "0"
diff --git a/vllm_omni/deploy/qwen3_omni_moe.yaml b/vllm_omni/deploy/qwen3_omni_moe.yaml
index bbc8e11400a..445437c0fa5 100644
--- a/vllm_omni/deploy/qwen3_omni_moe.yaml
+++ b/vllm_omni/deploy/qwen3_omni_moe.yaml
@@ -22,7 +22,11 @@ connectors:
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
+    max_num_seqs: 64
     gpu_memory_utilization: 0.9
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "0"
     default_sampling_params:
       temperature: 0.4
@@ -33,7 +37,11 @@ stages:
       repetition_penalty: 1.05
 
   - stage_id: 1
+    max_num_batched_tokens: 32768
+    max_num_seqs: 64
     gpu_memory_utilization: 0.6
+    trust_remote_code: true
+    enable_prefix_caching: false
     devices: "1"
     input_connectors:
       from_stage_0: connector_of_shared_memory
@@ -45,10 +53,13 @@ stages:
       repetition_penalty: 1.05
 
   - stage_id: 2
+    max_num_batched_tokens: 51200
+    max_num_seqs: 64
     gpu_memory_utilization: 0.1
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: false
-    max_num_batched_tokens: 51200
     devices: "1"
     input_connectors:
       from_stage_1: connector_of_shared_memory
diff --git a/vllm_omni/deploy/qwen3_tts.yaml b/vllm_omni/deploy/qwen3_tts.yaml
index 51839cab1be..4bf13540314 100644
--- a/vllm_omni/deploy/qwen3_tts.yaml
+++ b/vllm_omni/deploy/qwen3_tts.yaml
@@ -31,6 +31,8 @@ stages:
   - stage_id: 0
     max_num_seqs: 10
     gpu_memory_utilization: 0.3
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: true
     max_num_batched_tokens: 512
     max_model_len: 4096
@@ -53,6 +55,8 @@ stages:
     max_num_seqs: 1
     gpu_memory_utilization: 0.3
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: true
     # Must be divisible by num_code_groups and cover (left_context + chunk).
     # Prefill length is Q * num_frames (e.g. 16 * 2148 = 34368); keep
diff --git a/vllm_omni/deploy/voxcpm2.yaml b/vllm_omni/deploy/voxcpm2.yaml
index b49906710df..71ef148242a 100644
--- a/vllm_omni/deploy/voxcpm2.yaml
+++ b/vllm_omni/deploy/voxcpm2.yaml
@@ -16,6 +16,8 @@ stages:
     max_num_seqs: 4
     gpu_memory_utilization: 0.9
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: true
     max_num_batched_tokens: 4096
     max_model_len: 4096
diff --git a/vllm_omni/deploy/voxtral_tts.yaml b/vllm_omni/deploy/voxtral_tts.yaml
index 87d999c67e0..929daddb13f 100644
--- a/vllm_omni/deploy/voxtral_tts.yaml
+++ b/vllm_omni/deploy/voxtral_tts.yaml
@@ -21,9 +21,12 @@ connectors:
 
 stages:
   - stage_id: 0
+    max_num_batched_tokens: 32768
     max_num_seqs: 32
     gpu_memory_utilization: 0.8
     enforce_eager: false
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: true
     max_model_len: 4096
     devices: "0"
@@ -48,6 +51,8 @@ stages:
     max_num_seqs: 32
     gpu_memory_utilization: 0.1
     enforce_eager: true
+    trust_remote_code: true
+    enable_prefix_caching: false
     async_scheduling: false
     max_num_batched_tokens: 65536
     max_model_len: 65536
diff --git a/vllm_omni/engine/arg_utils.py b/vllm_omni/engine/arg_utils.py
index 3f16c329e27..6c10c750053 100644
--- a/vllm_omni/engine/arg_utils.py
+++ b/vllm_omni/engine/arg_utils.py
@@ -456,50 +456,12 @@ class OrchestratorArgs:
     }
 )
 
-_DEPLOY_ENGINE_ARG_OVERRIDE_FIELDS: frozenset[str] = frozenset(
-    {
-        # Capacity / scheduling.
-        "async_scheduling",
-        "max_model_len",
-        "max_num_batched_tokens",
-        "max_num_seqs",
-        # Memory / parallelism.
-        "data_parallel_size",
-        "gpu_memory_utilization",
-        "pipeline_parallel_size",
-        "tensor_parallel_size",
-        # Execution / loading.
-        "enforce_eager",
-        "distributed_executor_backend",
-        "dtype",
-        "quantization",
-        "trust_remote_code",
-        # Caching / chunking.
-        "async_chunk",
-        "enable_prefix_caching",
-        "enable_chunked_prefill",
-        # Model-specific engine extras.
-        "subtalker_sampling_params",
-    }
-)
-
-_DEPLOY_RUNTIME_OVERRIDE_FIELDS: frozenset[str] = frozenset(
-    {
-        "devices",
-    }
-)
-
 
 def orchestrator_field_names() -> frozenset[str]:
     """Return the names of every field on OrchestratorArgs."""
     return frozenset(f.name for f in fields(OrchestratorArgs))
 
 
-def deploy_override_field_names() -> frozenset[str]:
-    """Return kwargs whose parser defaults must not override deploy YAML."""
-    return _DEPLOY_ENGINE_ARG_OVERRIDE_FIELDS | _DEPLOY_RUNTIME_OVERRIDE_FIELDS
-
-
 def internal_blacklist_keys() -> frozenset[str]:
     """Return the set of CLI keys that must never be forwarded as per-stage
     engine overrides.
@@ -653,6 +615,8 @@ def nullify_stage_engine_defaults(parser: argparse.ArgumentParser) -> None:
     """Reset stage-level engine flag defaults to ``None``; preserve real
     default in help text. Only deploy-YAML override fields are touched.
     Idempotent."""
+    from vllm_omni.config.stage_config import deploy_override_field_names
+
     override_dests = deploy_override_field_names()
 
     for action in parser._actions:
diff --git a/vllm_omni/entrypoints/omni_base.py b/vllm_omni/entrypoints/omni_base.py
index f1d1e90a897..86674206ee7 100644
--- a/vllm_omni/entrypoints/omni_base.py
+++ b/vllm_omni/entrypoints/omni_base.py
@@ -106,9 +106,7 @@ def from_cli_args(
         kwargs: dict[str, Any] = {k: v for k, v in vars(args).items() if not k.startswith("_")}
 
         if parser is not None and not getattr(parser, "_omni_nullified", False):
-            from vllm_omni.engine.arg_utils import (
-                deploy_override_field_names,
-            )
+            from vllm_omni.config.stage_config import deploy_override_field_names
             from vllm_omni.entrypoints.utils import detect_explicit_cli_keys
 
             explicit = detect_explicit_cli_keys(sys.argv[1:], parser) or set()