We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a949988 commit 5fbafbbCopy full SHA for 5fbafbb
python/sglang/srt/mem_cache/memory_pool.py
@@ -879,7 +879,12 @@ def get_size_per_token(self):
879
self.qk_rope_head_dim = self.device_pool.qk_rope_head_dim
880
self.layer_num = self.device_pool.layer_num
881
882
- return (self.kv_lora_rank + self.qk_rope_head_dim) * 1 * self.dtype.itemsize
+ return (
883
+ (self.kv_lora_rank + self.qk_rope_head_dim)
884
+ * 1
885
+ * self.dtype.itemsize
886
+ * self.layer_num
887
+ )
888
889
def init_kv_buffer(self):
890
return torch.empty(
0 commit comments