From 50e59ad6ea577ccf62022a5147caa0d601751dcc Mon Sep 17 00:00:00 2001
From: J Todd <joeatodd@gmail.com>
Date: Tue, 7 Dec 2021 10:51:50 +0000
Subject: [PATCH] Don't enqueue an event wait on same CUDA stream

This is a no-op as CUDA streams are 'in order'. This patch avoids
small overhead associated with submitting the wait.
---
 sycl/plugins/cuda/pi_cuda.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp
index 77a7f9c50c008..883e761de3a22 100644
--- a/sycl/plugins/cuda/pi_cuda.cpp
+++ b/sycl/plugins/cuda/pi_cuda.cpp
@@ -473,7 +473,10 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) {
   // for native events, the cuStreamWaitEvent call is used.
   // This makes all future work submitted to stream wait for all
   // work captured in event.
-  return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0));
+  if (queue->get() != event->get_queue()->get()) {
+    return PI_CHECK_ERROR(cuStreamWaitEvent(queue->get(), event->get(), 0));
+  }
+  return PI_SUCCESS;
 }
 
 _pi_program::_pi_program(pi_context ctxt)