Skip to content

Commit 9ef83d1

Browse files
committed
fix ut for vllm kv layout
Signed-off-by: Qidi Sang <[email protected]>
1 parent 67bdec3 commit 9ef83d1

File tree

2 files changed

+2
-1
lines changed

2 files changed

+2
-1
lines changed

flashinfer/jit/xqa.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,5 @@ def gen_xqa_module(
9797
+ flag_head_grp_size
9898
+ flag_sliding_window,
9999
extra_ldflags=["-lcuda"], # Add CUDA Driver API library
100+
extra_cflags=["-DPAGED_KV_CACHE_LAYOUT=1"],
100101
)

tests/attention/test_xqa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def ref_attention(
160160
@pytest.mark.parametrize("use_attention_sinks", [True, False])
161161
@pytest.mark.parametrize("seq_len", [2, 15, 256, 514])
162162
@pytest.mark.parametrize("batch_size", [1, 4])
163-
@pytest.mark.parametrize("nb_k_heads", [1, 4, 8])
163+
@pytest.mark.parametrize("nb_k_heads", [2, 4])
164164
@pytest.mark.parametrize("tokens_per_page", [16, 64])
165165
@pytest.mark.parametrize("valid_elems_per_head", [32, 128])
166166
@pytest.mark.parametrize("head_grp_size", [8, 16])

0 commit comments

Comments
 (0)