Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/kernels/benchmark_paged_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from vllm._custom_C import paged_attention_custom
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, create_kv_caches_with_random

NUM_BLOCKS = 1024
NUM_BLOCKS = 1024*1024
PARTITION_SIZE = 256


Expand Down Expand Up @@ -176,7 +176,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
if do_profile:
latency = run_benchmark(num_iters=1, profile=True)
else:
latency = run_benchmark(num_iters=100, profile=False)
latency = run_benchmark(num_iters=1000, profile=False)
print(f"Kernel running time: {latency * 1000000:.3f} us")


Expand Down
Loading