From 02d154ff20cbeffda4af34baaa2f964b41efbe86 Mon Sep 17 00:00:00 2001 From: princepride Date: Sat, 14 Mar 2026 15:30:41 +0000 Subject: [PATCH 1/2] remove useless hf_overrides Signed-off-by: princepride --- benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml | 4 ---- benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml | 4 ---- tests/perf/stage_configs/qwen3_tts.yaml | 4 ---- .../model_executor/stage_configs/fish_speech_s2_pro.yaml | 4 ---- vllm_omni/model_executor/stage_configs/qwen3_tts.yaml | 6 ------ .../model_executor/stage_configs/qwen3_tts_batch.yaml | 6 ------ .../stage_configs/qwen3_tts_no_async_chunk.yaml | 4 ---- vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml | 8 ++------ 8 files changed, 2 insertions(+), 38 deletions(-) diff --git a/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml b/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml index 1597f8aa24e..d8e4072c07b 100644 --- a/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml +++ b/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs1.yaml @@ -10,8 +10,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -43,8 +41,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml b/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml index 18493f3aeee..93692d60dac 100644 --- a/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml +++ b/benchmarks/qwen3-tts/vllm_omni/configs/qwen3_tts_bs4.yaml @@ -11,8 +11,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -44,8 +42,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/tests/perf/stage_configs/qwen3_tts.yaml b/tests/perf/stage_configs/qwen3_tts.yaml index 4ba4e6e83e8..c069294e482 100644 --- a/tests/perf/stage_configs/qwen3_tts.yaml +++ b/tests/perf/stage_configs/qwen3_tts.yaml @@ -13,8 +13,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -46,8 +44,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/vllm_omni/model_executor/stage_configs/fish_speech_s2_pro.yaml b/vllm_omni/model_executor/stage_configs/fish_speech_s2_pro.yaml index 7f5ba120265..a847cb18143 100644 --- a/vllm_omni/model_executor/stage_configs/fish_speech_s2_pro.yaml +++ b/vllm_omni/model_executor/stage_configs/fish_speech_s2_pro.yaml @@ -8,8 +8,6 @@ stage_args: engine_args: model_stage: fish_speech_slow_ar model_arch: FishSpeechSlowARForConditionalGeneration - hf_overrides: - architectures: [FishSpeechSlowARForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -43,8 +41,6 @@ stage_args: engine_args: model_stage: dac_decoder model_arch: FishSpeechDACDecoder - hf_overrides: - architectures: [FishSpeechDACDecoder] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/vllm_omni/model_executor/stage_configs/qwen3_tts.yaml b/vllm_omni/model_executor/stage_configs/qwen3_tts.yaml index 570ed710c51..c80299fcf3b 100644 --- a/vllm_omni/model_executor/stage_configs/qwen3_tts.yaml +++ b/vllm_omni/model_executor/stage_configs/qwen3_tts.yaml @@ -8,9 +8,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -43,9 +40,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/vllm_omni/model_executor/stage_configs/qwen3_tts_batch.yaml b/vllm_omni/model_executor/stage_configs/qwen3_tts_batch.yaml index 5236668dc64..e1eb2af0a6f 100644 --- a/vllm_omni/model_executor/stage_configs/qwen3_tts_batch.yaml +++ b/vllm_omni/model_executor/stage_configs/qwen3_tts_batch.yaml @@ -12,9 +12,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -47,9 +44,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/vllm_omni/model_executor/stage_configs/qwen3_tts_no_async_chunk.yaml b/vllm_omni/model_executor/stage_configs/qwen3_tts_no_async_chunk.yaml index da22953bbac..68868924ed7 100644 --- a/vllm_omni/model_executor/stage_configs/qwen3_tts_no_async_chunk.yaml +++ b/vllm_omni/model_executor/stage_configs/qwen3_tts_no_async_chunk.yaml @@ -8,8 +8,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: false @@ -38,8 +36,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - hf_overrides: - architectures: [Qwen3TTSCode2Wav] worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true diff --git a/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml b/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml index 60659a9768b..de62e258cbd 100644 --- a/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml +++ b/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml @@ -8,9 +8,7 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSTalkerForConditionalGeneration] + model_arch: Qwen3TTSTalkerForConditionalGeneration worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: true @@ -43,9 +41,7 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - # Force stage-specific registered architecture. - hf_overrides: - architectures: [Qwen3TTSCode2Wav] + model_arch: Qwen3TTSCode2Wav worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true From 58fa85029bf8df02065fdb2367d0241c8a11fc17 Mon Sep 17 00:00:00 2001 From: princepride Date: Sat, 14 Mar 2026 15:34:39 +0000 Subject: [PATCH 2/2] fix some bug Signed-off-by: princepride --- vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml b/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml index de62e258cbd..a86358a4ba9 100644 --- a/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml +++ b/vllm_omni/platforms/npu/stage_configs/qwen3_tts.yaml @@ -8,7 +8,6 @@ stage_args: engine_args: model_stage: qwen3_tts model_arch: Qwen3TTSTalkerForConditionalGeneration - model_arch: Qwen3TTSTalkerForConditionalGeneration worker_type: ar scheduler_cls: vllm_omni.core.sched.omni_ar_scheduler.OmniARScheduler enforce_eager: true @@ -41,7 +40,6 @@ stage_args: engine_args: model_stage: code2wav model_arch: Qwen3TTSCode2Wav - model_arch: Qwen3TTSCode2Wav worker_type: generation scheduler_cls: vllm_omni.core.sched.omni_generation_scheduler.OmniGenerationScheduler enforce_eager: true