diff --git a/vllm/v1/attention/backend.py b/vllm/v1/attention/backend.py index 13082608c47c..49eb91576ed6 100644 --- a/vllm/v1/attention/backend.py +++ b/vllm/v1/attention/backend.py @@ -347,7 +347,7 @@ def replace(self, **kwargs) -> "CommonAttentionMetadata": """ Prefer using device seq_lens directly to avoid implicit H<>D sync. If a CPU copy is needed, use `seq_lens.cpu()` instead. - Will be removed in a future release (v0.15.0) + Will be removed in a future release, please migrate as soon as possible. """ ) def seq_lens_cpu(self) -> torch.Tensor: @@ -361,7 +361,7 @@ def seq_lens_cpu(self) -> torch.Tensor: Prefer using device seq_lens directly to avoid implicit H<>D sync which breaks full async scheduling. If a CPU copy is needed, it can be derived from query_start_loc_cpu and seq_lens. - Will be removed in a future release (v0.15.0) + Will be removed in a future release, please migrate as soon as possible. """ ) def num_computed_tokens_cpu(self) -> torch.Tensor: