Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions examples/online_serving/qwen3_tts/openai_speech_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,14 @@ def run_tts_generation(args) -> None:
print(response.text)
return

if response.content.decode("utf-8").startswith('{"error"'):
print(f"Error: {response.content.decode('utf-8')}")
return
# Check for JSON error response (only if content is valid UTF-8 text)
try:
text = response.content.decode("utf-8")
if text.startswith('{"error"'):
print(f"Error: {text}")
return
except UnicodeDecodeError:
pass # Binary audio data, not an error

# Save audio response
output_path = args.output or "tts_output.wav"
Expand Down
8 changes: 7 additions & 1 deletion vllm_omni/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,16 @@ def from_diffusion(
def multimodal_output(self) -> dict[str, Any]:
"""Return multimodal output from the underlying request output or local field.

For pipeline outputs, this proxies to request_output.multimodal_output.
For pipeline outputs, this checks completion outputs first, then request_output.
For diffusion outputs, this returns the local _multimodal_output field.
"""
if self.request_output is not None:
# Check completion outputs first (where multimodal_output is attached)
if self.request_output.outputs:
for output in self.request_output.outputs:
mm = getattr(output, "multimodal_output", None)
if mm:
return mm
return getattr(self.request_output, "multimodal_output", {})
return self._multimodal_output

Expand Down