vllm-project · hsliuustc0106 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · chatgpt-codex-connector
@@ -213,7 +213,7 @@ def _build_player_js(sample_rate: int) -> str:
         node.port.postMessage({{ type: 'clear' }});
 
         gen = true;
-        st = {{ t0: performance.now(), chunks: 0, samples: 0, ttfp: null }};
+        st = {{ t0: null, chunks: 0, samples: 0, ttfp: null }};
         setStatus('Connecting...', '#4A90D9');
         const bEl = document.getElementById('tts-stop-btn');
         if (bEl) bEl.style.display = 'inline-block';
@@ -231,6 +231,7 @@ def _build_player_js(sample_rate: int) -> str:
 
         try {{
             console.log('fetch payload:', JSON.stringify({{input: payload.input?.slice(0,30), task: payload.task_type, has_ref: !!payload.ref_audio, stream: payload.stream}}));
+            st.t0 = performance.now();
             const r = await fetch('/proxy/v1/audio/speech', {{
                 method: 'POST',
                 headers: {{ 'Content-Type': 'application/json' }},
@@ -362,21 +363,8 @@ async def proxy_speech(request: Request):
         req_id = body.get("_req_id")
         if req_id and req_id in _pending_payloads:
             body = _pending_payloads.pop(req_id)
-        # Pre-download ref_audio URL so TTFP only measures synthesis time
-        ref = body.get("ref_audio", "")
-        if isinstance(ref, str) and ref.startswith("http"):
-            try:
-                async with httpx.AsyncClient(timeout=30) as dl:
-                    r = await dl.get(ref)
-                    r.raise_for_status()
-                    import base64
-
-                    b64 = base64.b64encode(r.content).decode()
-                    ct = r.headers.get("content-type", "audio/wav")
-                    body["ref_audio"] = f"data:{ct};base64,{b64}"
-                    logger.info("Pre-downloaded ref_audio: %d bytes", len(r.content))
-            except Exception:
-                logger.exception("Failed to pre-download ref_audio, passing URL as-is")
+        # Pass ref_audio URL directly to vLLM server (it handles URL resolution).
+        # Pre-downloading and re-encoding adds ~2-3s to TTFP for large files.
         logger.info(
             "Proxy request: %s",
             {k: (f"<{len(str(v))} chars>" if k == "ref_audio" else v) for k, v in body.items()},