diff --git a/vllm/v1/core/encoder_cache_manager.py b/vllm/v1/core/encoder_cache_manager.py index d73c05d2cf80..b7837b99be38 100644 --- a/vllm/v1/core/encoder_cache_manager.py +++ b/vllm/v1/core/encoder_cache_manager.py @@ -237,7 +237,7 @@ def free(self, request: Request) -> None: Typically called when a request is finished, cancelled, or aborted. """ - input_ids = self.get_cached_input_ids(request).copy() + input_ids = self.get_cached_input_ids(request) for input_id in input_ids: self.free_encoder_input(request, input_id)