From e5a874e2d1ddcb7eb8a6c96018835bee74ff4fc2 Mon Sep 17 00:00:00 2001 From: Paco Xu Date: Mon, 26 Jan 2026 16:01:31 +0800 Subject: [PATCH 1/2] fix(grpc): improve GetServerInfo response consistency and accuracy - Use single time.time() for last_receive_timestamp and uptime_seconds to avoid drift between the two values - Report actual pause state via self.async_llm.is_paused instead of hardcoded False Signed-off-by: Paco Xu --- vllm/entrypoints/grpc_server.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/grpc_server.py b/vllm/entrypoints/grpc_server.py index 1fc3354a41cd..5fd92f897d34 100755 --- a/vllm/entrypoints/grpc_server.py +++ b/vllm/entrypoints/grpc_server.py @@ -228,13 +228,14 @@ async def GetServerInfo( Returns: GetServerInfoResponse protobuf """ + last_receive_timestamp = time.time() num_requests = self.async_llm.output_processor.get_num_unfinished_requests() return vllm_engine_pb2.GetServerInfoResponse( active_requests=num_requests, - is_paused=False, # TODO - last_receive_timestamp=time.time(), # TODO looks wrong? - uptime_seconds=time.time() - self.start_time, + is_paused=self.async_llm.is_paused, + last_receive_timestamp=last_receive_timestamp, + uptime_seconds=last_receive_timestamp - self.start_time, server_type="vllm-grpc", ) From 74257a895be017291beb5ae3c87bd2a0d79a63fb Mon Sep 17 00:00:00 2001 From: Paco Xu Date: Tue, 27 Jan 2026 09:34:09 +0800 Subject: [PATCH 2/2] grpc: add TODO that last_receive_timestamp should track last request time last_receive_timestamp is meant to be the time of the last received request, not the current time; leave a TODO to track it when requests are received. Signed-off-by: Paco Xu --- vllm/entrypoints/grpc_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/entrypoints/grpc_server.py b/vllm/entrypoints/grpc_server.py index 5fd92f897d34..6beab7433f63 100755 --- a/vllm/entrypoints/grpc_server.py +++ b/vllm/entrypoints/grpc_server.py @@ -228,6 +228,8 @@ async def GetServerInfo( Returns: GetServerInfoResponse protobuf """ + # TODO: last_receive_timestamp is meant to be the time of the last received + # request, not the current time; track it when requests are received. last_receive_timestamp = time.time() num_requests = self.async_llm.output_processor.get_num_unfinished_requests()