diff --git a/vllm_omni/benchmarks/patch/patch.py b/vllm_omni/benchmarks/patch/patch.py
index 4e13dfaced2..539af11f868 100644
--- a/vllm_omni/benchmarks/patch/patch.py
+++ b/vllm_omni/benchmarks/patch/patch.py
@@ -376,9 +376,8 @@ async def benchmark(
         limit_per_host=max_concurrency or 0,
         ttl_dns_cache=300,
         use_dns_cache=True,
-        keepalive_timeout=60,
         enable_cleanup_closed=True,
-        force_close=False,
+        force_close=True,
         ssl=ssl_setting,
     )
 
diff --git a/vllm_omni/model_executor/models/qwen3_omni/qwen3_omni.py b/vllm_omni/model_executor/models/qwen3_omni/qwen3_omni.py
index 2b5bcd60a03..5a22ce024ab 100644
--- a/vllm_omni/model_executor/models/qwen3_omni/qwen3_omni.py
+++ b/vllm_omni/model_executor/models/qwen3_omni/qwen3_omni.py
@@ -1047,9 +1047,7 @@ def _get_talker_assistant_parts(
                 dim=0,
             )
         else:
-            trailing_text_hidden = torch.zeros(
-                tts_eos_embed.shape, device=tts_eos_embed.device, dtype=tts_eos_embed.dtype
-            )
+            trailing_text_hidden = tts_eos_embed
 
         input_embeds = assistant_text_hidden + assistant_codec_hidden
         input_ids = torch.full(