From c195084656c2fc401489829cd7bef0c455a56a3d Mon Sep 17 00:00:00 2001 From: Alex Brooks Date: Mon, 13 Apr 2026 17:50:21 +0000 Subject: [PATCH] fix wrong benchmark strip Signed-off-by: Alex Brooks --- vllm_omni/benchmarks/patch/patch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm_omni/benchmarks/patch/patch.py b/vllm_omni/benchmarks/patch/patch.py index 343655df20e..17d7498ba26 100644 --- a/vllm_omni/benchmarks/patch/patch.py +++ b/vllm_omni/benchmarks/patch/patch.py @@ -143,7 +143,11 @@ async def async_request_openai_chat_omni_completions( if response.status == 200: handler = StreamedResponseHandler() async for chunk_bytes in response.content.iter_any(): - chunk_bytes = chunk_bytes.strip() + # NOTE: Do NOT strip() here; TCP may fragment the SSE messages, + # so stripping here can cause problems depending on how it is split. + # + # Simple example: [b'data: ', b'{json}\n\n'] <- stripping the first + # chunk will break SSE parsing because the space after 'data:' is required. if not chunk_bytes: continue