From 50e59ad6ea577ccf62022a5147caa0d601751dcc Mon Sep 17 00:00:00 2001 From: J Todd Date: Tue, 7 Dec 2021 10:51:50 +0000 Subject: [PATCH] Don't enqueue an event wait on same CUDA stream This is a no-op as CUDA streams are 'in order'. This patch avoids small overhead associated with submitting the wait. --- sycl/plugins/cuda/pi_cuda.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 77a7f9c50c008..883e761de3a22 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -473,7 +473,10 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) { // for native events, the cuStreamWaitEvent call is used. // This makes all future work submitted to stream wait for all // work captured in event. - return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0)); + if (queue->get() != event->get_queue()->get()) { + return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0)); + } + return PI_SUCCESS; } _pi_program::_pi_program(pi_context ctxt)