diff --git a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py index 6be039df105..7bba11f236a 100644 --- a/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py +++ b/vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py @@ -219,7 +219,14 @@ def forward( if i >= len(left_context_size): break if "left_context_size" in info: - left_context_size[i] = info["left_context_size"] + val = info["left_context_size"] + # Non-async path sends a list (required by + # serialize_additional_information which drops + # plain ints); async chunk path sends a plain int. + # Handle both. + if isinstance(val, list): + val = val[0] if val else 0 + left_context_size[i] = int(val) for i, req_ids in enumerate(request_ids_list): if req_ids.numel() < 1: parsed.append((0, 0)) diff --git a/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py b/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py index 69724dfc09c..5a010809594 100644 --- a/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py +++ b/vllm_omni/model_executor/stage_input_processors/qwen3_tts.py @@ -45,6 +45,9 @@ def talker2code2wav( ref_code_len = 0 # Code2Wav expects codebook-major flat: [Q*num_frames] codec_codes = audio_codes.transpose(0, 1).cpu().reshape(-1).tolist() + # Wrap ref_code_len in a list: serialize_additional_information() + # only preserves tensor and list values; plain ints are dropped. + # The consumer (Qwen3TTSCode2Wav.forward) unwraps the list. additional_information = {"left_context_size": [ref_code_len]} if ref_code_len > 0 else None code2wav_inputs.append( OmniTokensPrompt(