diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 072d2a164729..9250eb17ed4a 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -790,7 +790,7 @@ async def pause_generation( # Clear cache if clear_cache: - await self.reset_prefix_cache() + await self.reset_prefix_cache(reset_running_requests=True) await self.reset_mm_cache() await self.reset_encoder_cache()