diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 8e81c6fe965d..26d372c11319 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -919,9 +919,7 @@ def build( # Guard access to seq_lens_cpu, which may not always be needed # and can be expensive to retrieve in async mode. needs_seq_lens_cpu = self.use_dcp or use_cascade or not is_only_trtllm_decode - seq_lens_cpu = ( - common_attn_metadata.seq_lens.cpu() if needs_seq_lens_cpu else None - ) + seq_lens_cpu = common_attn_metadata.seq_lens_cpu if needs_seq_lens_cpu else None seq_lens_np = seq_lens_cpu.numpy() if seq_lens_cpu is not None else None num_blocks_np = ( (seq_lens_np + (page_size - 1)) // page_size