diff --git a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py
index 6be039df105..7bba11f236a 100644
--- a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py
+++ b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py
@@ -219,7 +219,14 @@ def forward(
                 if i >= len(left_context_size):
                     break
                 if "left_context_size" in info:
-                    left_context_size[i] = info["left_context_size"]
+                    val = info["left_context_size"]
+                    # Non-async path sends a list (required by
+                    # serialize_additional_information which drops
+                    # plain ints); async chunk path sends a plain int.
+                    # Handle both.
+                    if isinstance(val, list):
+                        val = val[0] if val else 0
+                    left_context_size[i] = int(val)
         for i, req_ids in enumerate(request_ids_list):
             if req_ids.numel() < 1:
                 parsed.append((0, 0))
diff --git a/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py b/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py
index 69724dfc09c..5a010809594 100644
--- a/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py
+++ b/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py
@@ -45,6 +45,9 @@ def talker2code2wav(
             ref_code_len = 0
         # Code2Wav expects codebook-major flat: [Q*num_frames]
         codec_codes = audio_codes.transpose(0, 1).cpu().reshape(-1).tolist()
+        # Wrap ref_code_len in a list: serialize_additional_information()
+        # only preserves tensor and list values; plain ints are dropped.
+        # The consumer (Qwen3TTSCode2Wav.forward) unwraps the list.
         additional_information = {"left_context_size": [ref_code_len]} if ref_code_len > 0 else None
         code2wav_inputs.append(
             OmniTokensPrompt(