Skip to content

Commit b903e1b

Browse files
authored
[Frontend] error suppression cleanup (#7786)
Signed-off-by: Joe Runde <[email protected]>
1 parent a152246 commit b903e1b

File tree

3 files changed

+18
-7
lines changed

3 files changed

+18
-7
lines changed

tests/entrypoints/openai/rpc/test_zmq_client.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,12 @@ async def test_client_aborts_use_timeouts(monkeypatch, dummy_server,
7575
m.setattr(dummy_server, "abort", lambda x: None)
7676
m.setattr(client, "_data_timeout", 10)
7777

78-
# Ensure the client doesn't hang
78+
# The client should suppress timeouts on `abort`s
79+
# and return normally, assuming the server will eventually
80+
# abort the request.
7981
client_task = asyncio.get_running_loop().create_task(
8082
client.abort("test request id"))
81-
with pytest.raises(TimeoutError, match="Server didn't reply within"):
82-
await asyncio.wait_for(client_task, timeout=0.05)
83+
await asyncio.wait_for(client_task, timeout=0.05)
8384

8485

8586
@pytest.mark.asyncio

vllm/entrypoints/openai/api_server.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import re
77
import tempfile
88
from argparse import Namespace
9-
from contextlib import asynccontextmanager, suppress
9+
from contextlib import asynccontextmanager
1010
from http import HTTPStatus
1111
from typing import AsyncIterator, Optional, Set
1212

@@ -83,8 +83,7 @@ async def lifespan(app: FastAPI):
8383
async def _force_log():
8484
while True:
8585
await asyncio.sleep(10)
86-
with suppress(Exception):
87-
await async_engine_client.do_log_stats()
86+
await async_engine_client.do_log_stats()
8887

8988
if not engine_args.disable_log_stats:
9089
task = asyncio.create_task(_force_log())

vllm/entrypoints/openai/rpc/client.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,18 @@ async def _is_tracing_enabled_rpc(self) -> bool:
335335

336336
async def abort(self, request_id: str):
337337
"""Send an ABORT_REQUEST signal to the RPC Server"""
338-
with suppress(RPCClientClosedError):
338+
339+
# Suppress timeouts as well.
340+
# In cases where the server is busy processing requests and a very
341+
# large volume of abort requests arrive, it is likely that the server
342+
# will not be able to ack all of them in time. We have seen this when
343+
# we abort 20k requests at once while another 2k are processing- many
344+
# of them time out, but we see the server successfully abort all of the
345+
# requests.
346+
# In this case we assume that the server has received or will receive
347+
# these abort requests, and ignore the timeout. This prevents a massive
348+
# wall of `TimeoutError` stack traces.
349+
with suppress(RPCClientClosedError, TimeoutError):
339350
await self._send_one_way_rpc_request(
340351
request=RPCAbortRequest(request_id),
341352
error_message=f"RPCAbortRequest {request_id} failed")

0 commit comments

Comments
 (0)