Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tensorrt_llm/_torch/attention_backend/trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,7 @@ def get_empty_like(like_tensor: torch.Tensor,
cache_name="block_ids_per_seq",
dtype=torch.int32,
)
self.block_ids_per_seq.fill_(0)
self.kv_block_ids_per_seq = get_empty(
[
self.kv_cache_manager.max_batch_size,
Expand All @@ -715,6 +716,7 @@ def get_empty_like(like_tensor: torch.Tensor,
cache_name="kv_block_ids_per_seq",
dtype=torch.int32,
)
self.kv_block_ids_per_seq.fill_(0)
if self.enable_paged_context_mla:
# for kv cache reuse/chunked context in MLA
self.ctx_cached_token_indptr = get_empty(
Expand Down