Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,29 @@ def reset_encoder_cache(self) -> None:
# Reset the GPU model runner's encoder cache (physical storage)
self.model_executor.reset_encoder_cache()

def _reset_caches(self, reset_running_requests=True) -> None:
self.reset_prefix_cache(reset_running_requests=reset_running_requests)
def _reset_caches(
self,
reset_running_requests: bool = True,
reset_connector: bool = True,
) -> None:
# ``reset_connector`` defaults to True so external KV-store
# connectors (e.g. MooncakeStoreConnector) drop their state
# alongside the local prefix/mm/encoder caches. This matches the
# invariant callers of ``pause_generation(clear_cache=True)``
# expect: clear all caches, not just the on-engine ones.
# ``Scheduler.reset_connector_cache`` already handles the
# no-connector case (logs + short-circuits), so this is a no-op
# for engines without a configured KV connector.
#
# Internal callers that genuinely want a local-only invalidation
# can pass ``reset_connector=False``; users that need that escape
# hatch from outside should call
# ``scheduler.reset_prefix_cache(reset_connector=False)``
# directly rather than going through this cascade.
self.reset_prefix_cache(
reset_running_requests=reset_running_requests,
reset_connector=reset_connector,
)
Comment on lines +652 to +674

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

By defaulting reset_connector to True in _reset_caches, and because _reset_caches is called by default in pause_scheduler(clear_cache=True), most users (who do not use an external KV connector) will now see a warning log: "reset_connector called but no KV connector is configured." from Scheduler.reset_connector_cache every time they pause the engine.

To avoid this log spam for the majority of users while still ensuring the external cache is cleared when present, we should only pass reset_connector=True to the scheduler if a connector is actually configured. The current implementation's claim in the docstring that it is a "no-op" is slightly misleading because of this logging side effect.

    def _reset_caches(
        self,
        reset_running_requests: bool = True,
        reset_connector: bool = True,
    ) -> None:
        # ``reset_connector`` defaults to True so external KV-store
        # connectors (e.g. MooncakeStoreConnector) drop their state
        # alongside the local prefix/mm/encoder caches. This matches the
        # invariant callers of ``pause_generation(clear_cache=True)``
        # expect: clear all caches, not just the on-engine ones.
        #
        # Internal callers that genuinely want a local-only invalidation
        # can pass ``reset_connector=False``; users that need that escape
        # hatch from outside should call
        # ``scheduler.reset_prefix_cache(reset_connector=False)``
        # directly rather than going through this cascade.
        has_connector = self.scheduler.get_kv_connector() is not None
        self.reset_prefix_cache(
            reset_running_requests=reset_running_requests,
            reset_connector=reset_connector and has_connector,
        )

self.reset_mm_cache()
self.reset_encoder_cache()

Expand Down
Loading