From 493f6f5349b2629041cc184ef90cadcafe56ee4c Mon Sep 17 00:00:00 2001 From: Jinghui Zhang Date: Tue, 17 Mar 2026 15:28:40 -0700 Subject: [PATCH] [BugFix] Ensure num_cached_tokens is non-negative for kv transfer failed requests For requests failing KV load in decode side, since it's still in WAITING_REMOTE_KV state, its num_cached_tokens are still the default -1, and it was never updated, when we do metrics logging on local_cache_hit, -1 will be used and will crash engine due to: ValueError: Counters can only be incremented by non-negative amounts. --- vllm/v1/core/sched/scheduler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 486ce8debc88..5bc2a16db307 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -1470,7 +1470,7 @@ def update_from_output( finish_reason=request.get_finished_reason(), events=request.take_events(), trace_headers=request.trace_headers, - num_cached_tokens=request.num_cached_tokens, + num_cached_tokens=max(0, request.num_cached_tokens), ) )