diff --git a/examples/online_serving/qwen3_tts/gradio_demo.py b/examples/online_serving/qwen3_tts/gradio_demo.py index a2f17cb7890..ccf56d8156f 100644 --- a/examples/online_serving/qwen3_tts/gradio_demo.py +++ b/examples/online_serving/qwen3_tts/gradio_demo.py @@ -213,7 +213,7 @@ def _build_player_js(sample_rate: int) -> str: node.port.postMessage({{ type: 'clear' }}); gen = true; - st = {{ t0: performance.now(), chunks: 0, samples: 0, ttfp: null }}; + st = {{ t0: null, chunks: 0, samples: 0, ttfp: null }}; setStatus('Connecting...', '#4A90D9'); const bEl = document.getElementById('tts-stop-btn'); if (bEl) bEl.style.display = 'inline-block'; @@ -231,6 +231,7 @@ def _build_player_js(sample_rate: int) -> str: try {{ console.log('fetch payload:', JSON.stringify({{input: payload.input?.slice(0,30), task: payload.task_type, has_ref: !!payload.ref_audio, stream: payload.stream}})); + st.t0 = performance.now(); const r = await fetch('/proxy/v1/audio/speech', {{ method: 'POST', headers: {{ 'Content-Type': 'application/json' }}, @@ -362,21 +363,8 @@ async def proxy_speech(request: Request): req_id = body.get("_req_id") if req_id and req_id in _pending_payloads: body = _pending_payloads.pop(req_id) - # Pre-download ref_audio URL so TTFP only measures synthesis time - ref = body.get("ref_audio", "") - if isinstance(ref, str) and ref.startswith("http"): - try: - async with httpx.AsyncClient(timeout=30) as dl: - r = await dl.get(ref) - r.raise_for_status() - import base64 - - b64 = base64.b64encode(r.content).decode() - ct = r.headers.get("content-type", "audio/wav") - body["ref_audio"] = f"data:{ct};base64,{b64}" - logger.info("Pre-downloaded ref_audio: %d bytes", len(r.content)) - except Exception: - logger.exception("Failed to pre-download ref_audio, passing URL as-is") + # Pass ref_audio URL directly to vLLM server (it handles URL resolution). + # Pre-downloading and re-encoding adds ~2-3s to TTFP for large files. logger.info( "Proxy request: %s", {k: (f"<{len(str(v))} chars>" if k == "ref_audio" else v) for k, v in body.items()},