Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion lib/bindings/kvbm/src/block_manager/vllm/connector/leader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,21 @@ impl Leader for KvConnectorLeader {
"request_finished called for request_id: {request_id} but slot is not found"
);
self.inflight_requests.remove(&request_id);
return Ok(false);
// We must return `true` here even though the leader slot is gone.
//
// Within a single call to vLLM's `update_from_output()`, two things
// happen in sequence:
// 1. Stopped requests call _free_request() → request_finished() (here)
// 2. Worker's finished_sending is processed by _update_from_kv_xfer_finished()
//
// The worker's get_finished() ran during the forward pass and may have
// reported this request in finished_sending for this same step. If we
// return `false`, vLLM deletes the request from self.requests at step 1.
// Then step 2 hits `assert req_id in self.requests` and crashes.
//
// Returning `true` keeps the request in self.requests so step 2 can
// process finished_sending and call _free_blocks() properly.
return Ok(true);
}

// grab the slot
Expand Down
22 changes: 16 additions & 6 deletions lib/bindings/kvbm/src/block_manager/vllm/connector/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,11 +409,15 @@ impl Worker for KvConnectorWorker {
tracing::debug!(request_id, "request slot is not finished");
}
} else {
// made this condition more strict slot existence checks were added as a prerequesite
// to be added to the maybe_finished_offloading set.
panic!(
"request slot missing for {request_id}; however, it was present when added to the maybe finished offloading set"
// Slot was removed between when we added it to
// maybe_finished_offloading and now. Signal completion so
// vLLM can free the request via _free_blocks().
tracing::warn!(
request_id,
"request slot missing from maybe_finished_offloading set; \
signaling completion"
);
is_finished_offloading.insert(request_id.clone());
}
}

Expand Down Expand Up @@ -443,9 +447,15 @@ impl Worker for KvConnectorWorker {
tracing::debug!(request_id, "request slot is not finished");
}
} else {
panic!(
"request slot missing for {request_id}; however, it was present when added to the maybe finished onboarding set"
// Slot was removed between when we added it to
// maybe_finished_onboarding and now. Signal completion so
// vLLM can free the request via _free_blocks().
tracing::warn!(
request_id,
"request slot missing from maybe_finished_onboarding set; \
signaling completion"
);
is_finished_onboarding.insert(request_id.clone());
}
}

Expand Down
Loading