@@ -771,7 +771,11 @@ GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t
771771 if (src_ctx->device == dst_ctx->device ) {
772772 CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
773773 } else {
774+ #ifdef GGML_CUDA_NO_PEER_COPY
775+ return false ;
776+ #else
774777 CUDA_CHECK (cudaMemcpyPeerAsync (dst->data , dst_ctx->device , src->data , src_ctx->device , ggml_nbytes (src), cudaStreamPerThread));
778+ #endif
775779 }
776780 CUDA_CHECK (cudaStreamSynchronize (cudaStreamPerThread));
777781 return true ;
@@ -11322,19 +11326,23 @@ GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_
1132211326 GGML_ASSERT (cuda_ctx_src->device == buf_ctx_src->device );
1132311327 GGML_ASSERT (cuda_ctx_dst->device == buf_ctx_dst->device );
1132411328
11325- if (!cuda_ctx_src->copy_event ) {
11326- ggml_cuda_set_device (cuda_ctx_src->device );
11327- CUDA_CHECK (cudaEventCreateWithFlags (&cuda_ctx_src->copy_event , cudaEventDisableTiming));
11328- }
11329-
1133011329 // copy on src stream
1133111330 if (cuda_ctx_src->device == cuda_ctx_dst->device ) {
1133211331 CUDA_CHECK (cudaMemcpyAsync (dst->data , src->data , ggml_nbytes (dst), cudaMemcpyDeviceToDevice, cuda_ctx_dst->stream ()));
1133311332 } else {
11333+ #ifdef GGML_CUDA_NO_PEER_COPY
11334+ return false ;
11335+ #else
1133411336 CUDA_CHECK (cudaMemcpyPeerAsync (dst->data , cuda_ctx_dst->device , src->data , cuda_ctx_src->device , ggml_nbytes (dst), cuda_ctx_src->stream ()));
11337+ #endif
1133511338 }
1133611339
1133711340 // record event on src stream
11341+ if (!cuda_ctx_src->copy_event ) {
11342+ ggml_cuda_set_device (cuda_ctx_src->device );
11343+ CUDA_CHECK (cudaEventCreateWithFlags (&cuda_ctx_src->copy_event , cudaEventDisableTiming));
11344+ }
11345+
1133811346 CUDA_CHECK (cudaEventRecord (cuda_ctx_src->copy_event , cuda_ctx_src->stream ()));
1133911347
1134011348 // wait on dst stream for the copy to complete
@@ -11530,6 +11538,9 @@ GGML_CALL static bool ggml_backend_cuda_offload_op(ggml_backend_t backend, const
1153011538}
1153111539
1153211540static ggml_backend_event_t ggml_backend_cuda_event_new (ggml_backend_t backend) {
11541+ #ifdef GGML_CUDA_NO_PEER_COPY
11542+ return nullptr ;
11543+ #else
1153311544 ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context ;
1153411545
1153511546 ggml_cuda_set_device (cuda_ctx->device );
@@ -11541,6 +11552,7 @@ static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend)
1154111552 /* .backend = */ backend,
1154211553 /* .context = */ event,
1154311554 };
11555+ #endif
1154411556}
1154511557
1154611558static void ggml_backend_cuda_event_free (ggml_backend_event_t event) {
0 commit comments