diff --git a/python/sglang/srt/disaggregation/mini_lb.py b/python/sglang/srt/disaggregation/mini_lb.py index c7e0a20899c..8e3371c73f9 100644 --- a/python/sglang/srt/disaggregation/mini_lb.py +++ b/python/sglang/srt/disaggregation/mini_lb.py @@ -18,6 +18,10 @@ from sglang.srt.disaggregation.utils import PDRegistryRequest +AIOHTTP_STREAM_READ_CHUNK_SIZE = ( + 1024 * 64 +) # 64KB, to prevent aiohttp's "Chunk too big" error + def setup_logger(): logger = logging.getLogger("pdlb") @@ -154,7 +158,9 @@ async def stream_results(): else: yield chunk else: - async for chunk in decode_response.content: + async for chunk in decode_response.content.iter_chunked( + AIOHTTP_STREAM_READ_CHUNK_SIZE + ): yield chunk return StreamingResponse(