[Fix] PagedKVCache fetching compute stream when copy stream is needed (#16714)

MasterJH5574 · web-flow · commit 981009d45780 · 2024-03-13T19:50:58.000-04:00
This PR fixes an issue in PagedKVCache, where a compute stream will
always be fetched. For backends like WebGPU, the `GetCurrentStream`
function is not implemented, which leads to an error when fetching
the compute stream.
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -439,12 +439,12 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
       free_page_ids_.push_back(page_id);
     }
 
-    // The compute stream is the default stream.
     // If the device is CUDA/ROCm, we create a standalone copy stream, in
     // purpose to hide the latency of auxiliary stream copy.
-    compute_stream_ = DeviceAPI::Get(device)->GetCurrentStream(device);
     if (device.device_type == DLDeviceType::kDLCUDA ||
         device.device_type == DLDeviceType::kDLROCM) {
+      // The compute stream is the default stream.
+      compute_stream_ = DeviceAPI::Get(device)->GetCurrentStream(device);
       copy_stream_ = DeviceAPI::Get(device)->CreateStream(device);
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -439,12 +439,12 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {`
`439`	`439`	`free_page_ids_.push_back(page_id);`
`440`	`440`	`}`
`441`	`441`
`442`		`- // The compute stream is the default stream.`
`443`	`442`	`// If the device is CUDA/ROCm, we create a standalone copy stream, in`
`444`	`443`	`// purpose to hide the latency of auxiliary stream copy.`
`445`		`- compute_stream_ = DeviceAPI::Get(device)->GetCurrentStream(device);`
`446`	`444`	`if (device.device_type == DLDeviceType::kDLCUDA \|\|`
`447`	`445`	`device.device_type == DLDeviceType::kDLROCM) {`
	`446`	`+ // The compute stream is the default stream.`
	`447`	`+ compute_stream_ = DeviceAPI::Get(device)->GetCurrentStream(device);`
`448`	`448`	`copy_stream_ = DeviceAPI::Get(device)->CreateStream(device);`
`449`	`449`	`}`
`450`	`450`	`}`