diff --git a/tensorrt_llm/_torch/attention_backend/trtllm.py b/tensorrt_llm/_torch/attention_backend/trtllm.py index 143fae88d62..6d4235734a9 100644 --- a/tensorrt_llm/_torch/attention_backend/trtllm.py +++ b/tensorrt_llm/_torch/attention_backend/trtllm.py @@ -634,7 +634,7 @@ def __post_init__(self) -> None: self.block_ids_per_seq = None self.kv_block_ids_per_seq = None if self.enable_flash_mla: - self.block_ids_per_seq = torch.empty( + self.block_ids_per_seq = torch.zeros( [ self.kv_cache_manager.max_batch_size, self.kv_cache_manager.max_blocks_per_seq