Skip to content

Commit a383d03

Browse files
committed
Dont call request_finished unless request has already been scheduled
Signed-off-by: jthomson04 <[email protected]>
1 parent 60b3ad9 commit a383d03

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,11 +1664,16 @@ def _terminate_request(self, request: LlmRequest):
16641664
if self.kv_connector_manager is None:
16651665
self.resource_manager.free_resources(request)
16661666
else:
1667-
cache_block_ids = self.kv_cache_manager.get_cache_indices(request)
1668-
1669-
if not self.kv_connector_manager.request_finished(
1670-
request, cache_block_ids):
1671-
self.resource_manager.free_resources(request)
1667+
# Only call request_finished on the connector if the request has already been added to the kv cache manager.
1668+
try:
1669+
cache_block_ids = self.kv_cache_manager.get_cache_indices(
1670+
request)
1671+
except IndexError:
1672+
pass
1673+
else:
1674+
if not self.kv_connector_manager.request_finished(
1675+
request, cache_block_ids):
1676+
self.resource_manager.free_resources(request)
16721677

16731678
@nvtx_range("_handle_canceled_requests")
16741679
def _handle_canceled_requests(self):

0 commit comments

Comments
 (0)