fix typo bug and add test for vllm reconstruct_from_cache kernel

masahi · masahi · commit d59ee4207c39 · 2024-01-10T19:19:03.000Z
diff --git a/src/runtime/contrib/vllm/cache_kernels.cu b/src/runtime/contrib/vllm/cache_kernels.cu
@@ -94,7 +94,7 @@ __global__ void reconstruct_from_cache_kernel(
                               block_offset;
 
     key[tgt_key_idx] = __ldg(&key_cache[src_key_idx]);
-    value[src_value_idx] = __ldg(&value_cache[tgt_value_idx]);
+    value[tgt_value_idx] = __ldg(&value_cache[src_value_idx]);
   }
 }
 
diff --git a/tests/python/relax/test_contrib_vllm.py b/tests/python/relax/test_contrib_vllm.py
@@ -742,5 +742,39 @@ def main(
     assert np.max(np.abs(out_value_cache - ref_value_cache)) == 0
 
 
+def test_reconstruct_from_cache():
+    num_heads = 1
+    head_dim = 8
+    vec_size = 8
+    block_size = 16
+    num_tokens = 8
+    num_blocks = 1
+
+    dev = tvm.device("cuda", 0)
+
+    key = tvm.nd.array(np.random.randn(num_tokens, num_heads, head_dim).astype("float16"), dev)
+    value = tvm.nd.array(np.random.randn(num_tokens, num_heads, head_dim).astype("float16"), dev)
+    slot_mapping = tvm.nd.array(np.arange(num_tokens).astype("int32"), dev)
+
+    k_cache = tvm.nd.array(
+        np.random.randn(num_blocks, num_heads, head_dim // vec_size, block_size, vec_size).astype(
+            "float16"
+        ),
+        dev,
+    )
+    v_cache = tvm.nd.array(
+        np.random.randn(num_blocks, num_heads, head_dim, block_size).astype("float16"), dev
+    )
+
+    reshape_and_cache_func = tvm.get_global_func("tvm.contrib.vllm.reshape_and_cache")
+    reconstruct_from_cache_func = tvm.get_global_func("tvm.contrib.vllm.reconstruct_from_cache")
+
+    reshape_and_cache_func(key, value, k_cache, v_cache, slot_mapping)
+    out = reconstruct_from_cache_func(k_cache, v_cache, slot_mapping)
+
+    np.testing.assert_equal(key.numpy(), out[0].numpy())
+    np.testing.assert_equal(value.numpy(), out[1].numpy())
+
+
 if __name__ == "__main__":
     tvm.testing.main()

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ __global__ void reconstruct_from_cache_kernel(`
`94`	`94`	`block_offset;`
`95`	`95`
`96`	`96`	`key[tgt_key_idx] = __ldg(&key_cache[src_key_idx]);`
`97`		`- value[src_value_idx] = __ldg(&value_cache[tgt_value_idx]);`
	`97`	`+ value[tgt_value_idx] = __ldg(&value_cache[src_value_idx]);`
`98`	`98`	`}`
`99`	`99`	`}`
`100`	`100`