diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp
index 0f8ebd75c4d9b..5ddbaa77b658d 100644
--- a/sycl/include/sycl/backend.hpp
+++ b/sycl/include/sycl/backend.hpp
@@ -214,6 +214,22 @@ get_native<backend::ext_oneapi_cuda, device>(const device &Obj) {
       Obj.getNative());
 }
 
+template <>
+inline backend_return_t<backend::ext_oneapi_cuda, event>
+get_native<backend::ext_oneapi_cuda, event>(const event &Event) {
+  if (Event.get_backend() != backend::ext_oneapi_cuda) {
+    throw sycl::exception(make_error_code(errc::backend_mismatch),
+                          "Backends mismatch");
+  }
+  backend_return_t<backend::ext_oneapi_cuda, event> ReturnValue;
+  for (auto const &element : Event.getNativeVector()) {
+    ReturnValue.push_back(
+        reinterpret_cast<typename detail::interop<backend::ext_oneapi_cuda,
+                                                  event>::value_type>(element));
+  }
+  return ReturnValue;
+}
+
 #ifndef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL
 template <>
 __SYCL_DEPRECATED(
@@ -248,6 +264,22 @@ inline backend_return_t<backend::ext_oneapi_hip, context> get_native<
       Obj.getNative());
 }
 
+template <>
+inline backend_return_t<backend::ext_oneapi_hip, event>
+get_native<backend::ext_oneapi_hip, event>(const event &Event) {
+  if (Event.get_backend() != backend::ext_oneapi_hip) {
+    throw sycl::exception(make_error_code(errc::backend_mismatch),
+                          "Backends mismatch");
+  }
+  backend_return_t<backend::ext_oneapi_hip, event> ReturnValue;
+  for (auto const &element : Event.getNativeVector()) {
+    ReturnValue.push_back(
+        reinterpret_cast<typename detail::interop<backend::ext_oneapi_hip,
+                                                  event>::value_type>(element));
+  }
+  return ReturnValue;
+}
+
 #endif // SYCL_EXT_ONEAPI_BACKEND_HIP
 
 template <backend BackendName, typename DataT, int Dimensions,
diff --git a/sycl/include/sycl/detail/backend_traits_cuda.hpp b/sycl/include/sycl/detail/backend_traits_cuda.hpp
index c45aabb4769e4..92fb136126ae1 100644
--- a/sycl/include/sycl/detail/backend_traits_cuda.hpp
+++ b/sycl/include/sycl/detail/backend_traits_cuda.hpp
@@ -50,7 +50,8 @@ template <> struct interop<backend::ext_oneapi_cuda, device> {
 };
 
 template <> struct interop<backend::ext_oneapi_cuda, event> {
-  using type = CUevent;
+  using type = std::vector<CUevent>;
+  using value_type = CUevent;
 };
 
 template <> struct interop<backend::ext_oneapi_cuda, queue> {
@@ -90,7 +91,8 @@ template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
 };
 
 template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
-  using type = CUevent;
+  using type = std::vector<CUevent>;
+  using value_type = CUevent;
 };
 
 template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
diff --git a/sycl/include/sycl/detail/backend_traits_hip.hpp b/sycl/include/sycl/detail/backend_traits_hip.hpp
index eef21a7f413b2..b432472d1caa7 100644
--- a/sycl/include/sycl/detail/backend_traits_hip.hpp
+++ b/sycl/include/sycl/detail/backend_traits_hip.hpp
@@ -44,7 +44,8 @@ template <> struct interop<backend::ext_oneapi_hip, device> {
 };
 
 template <> struct interop<backend::ext_oneapi_hip, event> {
-  using type = HIPevent;
+  using type = std::vector<HIPevent>;
+  using value_type = HIPevent;
 };
 
 template <> struct interop<backend::ext_oneapi_hip, queue> {
@@ -84,7 +85,8 @@ template <> struct BackendInput<backend::ext_oneapi_hip, event> {
 };
 
 template <> struct BackendReturn<backend::ext_oneapi_hip, event> {
-  using type = HIPevent;
+  using type = std::vector<HIPevent>;
+  using value_type = HIPevent;
 };
 
 template <> struct BackendInput<backend::ext_oneapi_hip, queue> {
diff --git a/sycl/include/sycl/detail/cg_types.hpp b/sycl/include/sycl/detail/cg_types.hpp
index 9da1f0b664d46..5ceaf71c05d54 100644
--- a/sycl/include/sycl/detail/cg_types.hpp
+++ b/sycl/include/sycl/detail/cg_types.hpp
@@ -16,6 +16,7 @@
 #include <sycl/detail/kernel_desc.hpp>         // for kernel_param_kind_t
 #include <sycl/detail/pi.h>                    // for PI_ERROR_INVALID_WORK...
 #include <sycl/exception.hpp>                  // for nd_range_error
+#include <sycl/ext/codeplay/experimental/host_task_properties.hpp> // for property::host_task::manual_interop_sync
 #include <sycl/group.hpp>                      // for group
 #include <sycl/h_item.hpp>                     // for h_item
 #include <sycl/id.hpp>                         // for id
diff --git a/sycl/include/sycl/detail/host_task_impl.hpp b/sycl/include/sycl/detail/host_task_impl.hpp
index 1634269ac2f2b..990f03eb4129c 100644
--- a/sycl/include/sycl/detail/host_task_impl.hpp
+++ b/sycl/include/sycl/detail/host_task_impl.hpp
@@ -24,14 +24,23 @@ namespace detail {
 class HostTask {
   std::function<void()> MHostTask;
   std::function<void(interop_handle)> MInteropTask;
+  bool ManualInteropSync = false;
 
 public:
   HostTask() : MHostTask([]() {}) {}
   HostTask(std::function<void()> &&Func) : MHostTask(Func) {}
   HostTask(std::function<void(interop_handle)> &&Func) : MInteropTask(Func) {}
+  HostTask(std::function<void(interop_handle)> &&Func,
+           const property_list PropList)
+      : MInteropTask(Func),
+        ManualInteropSync{
+            PropList.has_property<ext::codeplay::experimental::property::
+                                      host_task::manual_interop_sync>()} {}
 
   bool isInteropTask() const { return !!MInteropTask; }
 
+  bool isManualInteropSync() const { return ManualInteropSync; }
+
   void call(HostProfilingInfo *HPI) {
     if (HPI)
       HPI->start();
@@ -74,7 +83,7 @@ std::enable_if_t<
     detail::check_fn_signature<std::remove_reference_t<FuncT>, void()>::value ||
     detail::check_fn_signature<std::remove_reference_t<FuncT>,
                                void(interop_handle)>::value>
-handler::host_task_impl(FuncT &&Func) {
+handler::host_task_impl(FuncT &&Func, const property_list &PropList) {
   throwIfActionIsCreated();
 
   MNDRDesc.set(range<1>(1));
@@ -82,7 +91,11 @@ handler::host_task_impl(FuncT &&Func) {
   // accessors during finalize
   MArgs = MAssociatedAccesors;
 
-  MHostTask.reset(new detail::HostTask(std::move(Func)));
+  if constexpr (detail::check_fn_signature<std::remove_reference_t<FuncT>,
+                                           void(interop_handle)>::value)
+    MHostTask.reset(new detail::HostTask(std::move(Func), std::move(PropList)));
+  else
+    MHostTask.reset(new detail::HostTask(std::move(Func)));
 
   setType(detail::CG::CodeplayHostTask);
 }
diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp
index 544e776a21e1e..8eb77b1964b7c 100644
--- a/sycl/include/sycl/detail/property_helper.hpp
+++ b/sycl/include/sycl/detail/property_helper.hpp
@@ -49,8 +49,9 @@ enum DataLessPropKind {
   GraphDependOnAllLeaves = 24,
   GraphUpdatable = 25,
   GraphEnableProfiling = 26,
+  HostTaskManualInteropSync = 27,
   // Indicates the last known dataless property.
-  LastKnownDataLessPropKind = 26,
+  LastKnownDataLessPropKind = 27,
   // Exceeding 32 may cause ABI breaking change on some of OSes.
   DataLessPropKindSize = 32
 };
diff --git a/sycl/include/sycl/ext/codeplay/experimental/host_task_properties.hpp b/sycl/include/sycl/ext/codeplay/experimental/host_task_properties.hpp
new file mode 100644
index 0000000000000..cbc52ded6124c
--- /dev/null
+++ b/sycl/include/sycl/ext/codeplay/experimental/host_task_properties.hpp
@@ -0,0 +1,38 @@
+//==-------- host_task_properties.hpp --- SYCL host task properties --------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <sycl/context.hpp>
+#include <sycl/detail/property_helper.hpp>
+#include <sycl/properties/property_traits.hpp>
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext::codeplay::experimental::property::host_task {
+
+class manual_interop_sync : public ::sycl::detail::DataLessProperty<
+                                ::sycl::detail::HostTaskManualInteropSync> {};
+
+} // namespace ext::codeplay::experimental::property::host_task
+
+// Forward declaration
+class host_task;
+
+template <>
+struct is_property<
+    ext::codeplay::experimental::property::host_task::manual_interop_sync>
+    : std::true_type {};
+
+template <>
+struct is_property_of<
+    ext::codeplay::experimental::property::host_task::manual_interop_sync,
+    host_task> : std::true_type {};
+
+} // namespace _V1
+} // namespace sycl
diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp
index 0dedc6648fd44..610ef5a7b84c4 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp
@@ -48,7 +48,8 @@ template <> struct interop<backend::ext_oneapi_cuda, device> {
 };
 
 template <> struct interop<backend::ext_oneapi_cuda, event> {
-  using type = CUevent;
+  using type = std::vector<CUevent>;
+  using value_type = CUevent;
 };
 
 template <> struct interop<backend::ext_oneapi_cuda, queue> {
@@ -92,7 +93,8 @@ template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
 };
 
 template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
-  using type = CUevent;
+  using type = std::vector<CUevent>;
+  using value_type = CUevent;
 };
 
 template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
diff --git a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp
index 9d01c37691f33..3460bb73621aa 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp
@@ -28,7 +28,8 @@ inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) {
 // Implementation of cuda::has_native_event
 inline __SYCL_EXPORT bool has_native_event(event sycl_event) {
   if (sycl_event.get_backend() == backend::ext_oneapi_cuda)
-    return get_native<backend::ext_oneapi_cuda>(sycl_event) != nullptr;
+    return get_native<backend::ext_oneapi_cuda>(sycl_event).size() &&
+           get_native<backend::ext_oneapi_cuda>(sycl_event).front() != nullptr;
 
   return false;
 }
diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index a71f5400a813d..41e2fca836787 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -1866,7 +1866,7 @@ class __SYCL_EXPORT handler {
                                               void()>::value ||
                    detail::check_fn_signature<std::remove_reference_t<FuncT>,
                                               void(interop_handle)>::value>
-  host_task_impl(FuncT &&Func);
+  host_task_impl(FuncT &&Func, const property_list &);
 
   /// @brief Get the command graph if any associated with this handler. It can
   /// come from either the associated queue or from being set explicitly through
@@ -2067,8 +2067,8 @@ class __SYCL_EXPORT handler {
                                               void()>::value ||
                    detail::check_fn_signature<std::remove_reference_t<FuncT>,
                                               void(interop_handle)>::value>
-  host_task(FuncT &&Func) {
-    host_task_impl(Func);
+  host_task(FuncT &&Func, const property_list PropList = {}) {
+    host_task_impl(Func, PropList);
   }
 
   /// Defines and invokes a SYCL kernel function for the specified range and
diff --git a/sycl/include/sycl/interop_handle.hpp b/sycl/include/sycl/interop_handle.hpp
index 9839ffd58a6cb..d87b9cb5e0567 100644
--- a/sycl/include/sycl/interop_handle.hpp
+++ b/sycl/include/sycl/interop_handle.hpp
@@ -39,6 +39,11 @@ class DispatchHostTask;
 class queue_impl;
 class device_impl;
 class context_impl;
+
+// Needed for get_native_events to check if backend_return_t<Backend, event> is
+// a vector
+template <typename T> struct is_std_vector : std::false_type {};
+template <typename T> struct is_std_vector<std::vector<T>> : std::true_type {};
 } // namespace detail
 
 class queue;
@@ -182,6 +187,66 @@ class interop_handle {
 #endif
   }
 
+  // Adds events from the native API so that the SYCL runtime can integrate the
+  // async calls in a native API, with other async operations in the SYCL DAG.
+  // Using this function removes the requirement that a host task callable must
+  // synchronize with any asynchronous operations from within the callable.
+  template <backend Backend = backend::opencl>
+  void add_native_events(backend_return_t<Backend, event> NativeEvents) {
+#ifndef __SYCL_DEVICE_ONLY__
+    // TODO: replace the exception thrown below with the SYCL 2020 exception
+    // with the error code 'errc::backend_mismatch' when those new exceptions
+    // are ready to be used.
+    if (Backend != get_backend())
+      throw invalid_object_error("Incorrect backend argument was passed",
+                                 PI_ERROR_INVALID_MEM_OBJECT);
+    // All native events can be cast to void*, we use this as a generic entry
+    // point to source library
+    std::vector<pi_native_handle> NativeEventHolders(NativeEvents.size());
+    for (auto i = 0; i < NativeEvents.size(); ++i)
+      NativeEventHolders[i] =
+          reinterpret_cast<pi_native_handle>(NativeEvents[i]);
+    return addNativeEvents(NativeEventHolders);
+#else
+    // we believe this won't be ever called on device side
+    return;
+#endif
+  }
+
+  // Gets all the native events that the host task depends on, and that are
+  // still active
+  template <backend Backend = backend::opencl>
+  backend_return_t<Backend, event> get_native_events() {
+#ifndef __SYCL_DEVICE_ONLY__
+    // TODO: replace the exception thrown below with the SYCL 2020 exception
+    // with the error code 'errc::backend_mismatch' when those new exceptions
+    // are ready to be used.
+    if (Backend != get_backend())
+      throw invalid_object_error("Incorrect backend argument was passed",
+                                 PI_ERROR_INVALID_MEM_OBJECT);
+    // All native events can be cast to void*, we use this as a generic entry
+    // point to source library
+    std::vector<pi_native_handle> NativeEventHolders = getNativeEvents();
+    backend_return_t<Backend, event>
+        RetNativeEvents; // This may be a vector of native events or a single
+                         //  native event, depending on the backend
+    if constexpr (detail::is_std_vector<
+                      backend_return_t<Backend, event>>::value) {
+      using ValueT = typename backend_return_t<Backend, event>::value_type;
+      for (auto i = 0; i < NativeEventHolders.size(); ++i)
+        RetNativeEvents.push_back(
+            reinterpret_cast<ValueT>(NativeEventHolders[i]));
+    } else {
+      RetNativeEvents = reinterpret_cast<backend_return_t<Backend, event>>(
+          NativeEventHolders[0]);
+    }
+    return RetNativeEvents;
+#else
+    // we believe this won't be ever called on device side
+    return {};
+#endif
+  }
+
 private:
   friend class detail::ExecCGCommand;
   friend class detail::DispatchHostTask;
@@ -190,8 +255,9 @@ class interop_handle {
   interop_handle(std::vector<ReqToMem> MemObjs,
                  const std::shared_ptr<detail::queue_impl> &Queue,
                  const std::shared_ptr<detail::device_impl> &Device,
-                 const std::shared_ptr<detail::context_impl> &Context)
-      : MQueue(Queue), MDevice(Device), MContext(Context),
+                 const std::shared_ptr<detail::context_impl> &Context,
+                 const std::shared_ptr<detail::event_impl> &Event)
+      : MQueue(Queue), MDevice(Device), MContext(Context), MEvent(Event),
         MMemObjs(std::move(MemObjs)) {}
 
   template <backend Backend, typename DataT, int Dims>
@@ -215,10 +281,13 @@ class interop_handle {
   getNativeQueue(int32_t &NativeHandleDesc) const;
   __SYCL_EXPORT pi_native_handle getNativeDevice() const;
   __SYCL_EXPORT pi_native_handle getNativeContext() const;
+  __SYCL_EXPORT void addNativeEvents(std::vector<pi_native_handle> &);
+  __SYCL_EXPORT std::vector<pi_native_handle> getNativeEvents() const;
 
   std::shared_ptr<detail::queue_impl> MQueue;
   std::shared_ptr<detail::device_impl> MDevice;
   std::shared_ptr<detail::context_impl> MContext;
+  std::shared_ptr<detail::event_impl> MEvent;
 
   std::vector<ReqToMem> MMemObjs;
 };
diff --git a/sycl/include/sycl/properties/all_properties.hpp b/sycl/include/sycl/properties/all_properties.hpp
index 6f42500ea6b38..b6f3355b6d65a 100644
--- a/sycl/include/sycl/properties/all_properties.hpp
+++ b/sycl/include/sycl/properties/all_properties.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
+#include <sycl/ext/codeplay/experimental/host_task_properties.hpp>
 #include <sycl/properties/accessor_properties.hpp>
 #include <sycl/properties/buffer_properties.hpp>
 #include <sycl/properties/context_properties.hpp>
diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt
index b1ef3f71ae170..f11fd2ecc8bd9 100644
--- a/sycl/plugins/unified_runtime/CMakeLists.txt
+++ b/sycl/plugins/unified_runtime/CMakeLists.txt
@@ -99,14 +99,8 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
       CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
   endfunction()
 
-  set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
-  # commit 1e9b1b493fe30e6236bf611ae6d82366c9376f6c
-  # Merge: a011f092 d8500a36
-  # Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
-  # Date:   Fri Jun 21 10:22:52 2024 +0100
-  #     Merge pull request #805 from aarongreig/aaron/kernelSetArgIndirectionFix
-  #     Correct level of indirection used in KernelSetArgPointer calls.
-  set(UNIFIED_RUNTIME_TAG 1e9b1b493fe30e6236bf611ae6d82366c9376f6c)
+  set(UNIFIED_RUNTIME_REPO "https://github.com/hdelan/unified-runtime.git")
+  set(UNIFIED_RUNTIME_TAG interop-event-check)
 
   fetch_adapter_source(level_zero
     ${UNIFIED_RUNTIME_REPO}
diff --git a/sycl/source/detail/backend_impl.hpp b/sycl/source/detail/backend_impl.hpp
index ca23ceb48815c..37add4e8b43e3 100644
--- a/sycl/source/detail/backend_impl.hpp
+++ b/sycl/source/detail/backend_impl.hpp
@@ -9,13 +9,22 @@
 #pragma once
 #include <cassert>
 #include <sycl/backend_types.hpp>
+#include <sycl/event.hpp>
 
 namespace sycl {
 inline namespace _V1 {
 namespace detail {
 
 template <class T> backend getImplBackend(const T &Impl) {
-  assert(!Impl->is_host() && "Cannot get the backend for host.");
+  // Experimental host task allows the user to get backend for event impls
+  if constexpr (std::is_same_v<T, std::shared_ptr<event_impl>>) {
+    assert((!Impl->is_host() || Impl->backendSet()) &&
+           "interop_handle::add_native_events must be "
+           "used in order for a host "
+           "task event to have a native event");
+  } else {
+    assert(!Impl->is_host() && "Cannot get the backend for host.");
+  }
   return Impl->getContextImplPtr()->getBackend();
 }
 
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index c7d245e5e91c0..710111a1daf34 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -82,6 +82,9 @@ void event_impl::waitInternal(bool *Success) {
         "waitInternal method cannot be used for a discarded event.");
   } else if (MState != HES_Complete) {
     // Wait for the host event
+    // In the case that the Host Task function stores native events with
+    // add_native_events, waitInternal will only wait on the lambda to complete,
+    // not on the asynchronous events
     std::unique_lock<std::mutex> lock(MMutex);
     cv.wait(lock, [this] { return MState == HES_Complete; });
   }
@@ -91,6 +94,14 @@ void event_impl::waitInternal(bool *Success) {
     Event->wait(Event);
 }
 
+void event_impl::waitForHostTaskNativeEvents() {
+  std::unique_lock<std::mutex> Lock(MHostTaskNativeEventsMutex);
+  if (MHostTaskNativeEventsHaveBeenWaitedOn.exchange(true))
+    return;
+  for (const EventImplPtr &Event : MHostTaskNativeEvents)
+    Event->wait(Event);
+}
+
 void event_impl::setComplete() {
   if (MHostEvent || !MEvent) {
     {
@@ -265,6 +276,9 @@ void event_impl::wait(std::shared_ptr<sycl::detail::event_impl> Self,
   else if (MCommand)
     detail::Scheduler::getInstance().waitForEvent(Self, Success);
 
+  if (MHostTaskNativeEvents.size())
+    waitForHostTaskNativeEvents();
+
 #ifdef XPTI_ENABLE_INSTRUMENTATION
   instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
 #endif
@@ -483,11 +497,34 @@ pi_native_handle event_impl::getNative() {
   }
   if (MContext->getBackend() == backend::opencl)
     Plugin->call<PiApiKind::piEventRetain>(getHandleRef());
-  pi_native_handle Handle;
-  Plugin->call<PiApiKind::piextEventGetNativeHandle>(getHandleRef(), &Handle);
+  pi_native_handle Handle = 0;
+  if (auto HandleRef = getHandleRef())
+    Plugin->call<PiApiKind::piextEventGetNativeHandle>(HandleRef, &Handle);
   return Handle;
 }
 
+std::vector<pi_native_handle> event_impl::getNativeVector() {
+  // Return empty vec if native events have already been waited on
+  if (isCompleted() && (!hasHostTaskNativeEvents() ||
+                        MHostTaskNativeEventsHaveBeenWaitedOn.load()))
+    return {};
+
+  // If there is a native event return that. This will also initialize context
+  if (auto nativeEvent = getNative())
+    return {nativeEvent};
+
+  // Return native events submitted via host task interop
+  auto Plugin = getPlugin();
+  std::vector<pi_native_handle> HandleVec;
+  for (auto &HostTaskNativeEventImpl : MHostTaskNativeEvents) {
+    pi_native_handle Handle;
+    Plugin->call<PiApiKind::piextEventGetNativeHandle>(
+        HostTaskNativeEventImpl->MEvent, &Handle);
+    HandleVec.push_back(Handle);
+  }
+  return HandleVec;
+}
+
 std::vector<EventImplPtr> event_impl::getWaitList() {
   if (MState == HES_Discarded)
     throw sycl::exception(
@@ -502,6 +539,8 @@ std::vector<EventImplPtr> event_impl::getWaitList() {
                 MPreparedDepsEvents.end());
   Result.insert(Result.end(), MPreparedHostDepsEvents.begin(),
                 MPreparedHostDepsEvents.end());
+  Result.insert(Result.end(), MHostTaskNativeEvents.begin(),
+                MHostTaskNativeEvents.end());
 
   return Result;
 }
diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp
index 91bef738450d3..af87b074268f9 100644
--- a/sycl/source/detail/event_impl.hpp
+++ b/sycl/source/detail/event_impl.hpp
@@ -8,6 +8,7 @@
 
 #pragma once
 
+#include <detail/context_impl.hpp> // For context_impl::setPlugin
 #include <detail/plugin.hpp>
 #include <sycl/detail/cl.h>
 #include <sycl/detail/common.hpp>
@@ -131,6 +132,9 @@ class event_impl {
   ///        it's pointing to is then set according to the outcome.
   void waitInternal(bool *Success = nullptr);
 
+  /// Waits for the event with respect to device type.
+  void waitForHostTaskNativeEvents();
+
   /// Marks this event as completed.
   void setComplete();
 
@@ -189,6 +193,11 @@ class event_impl {
   /// \return a native handle.
   pi_native_handle getNative();
 
+  /// Gets the vector of native handles associated with the SYCL event.
+  ///
+  /// \return a native handle.
+  std::vector<pi_native_handle> getNativeVector();
+
   /// Returns vector of event dependencies.
   ///
   /// @return a reference to MPreparedDepsEvents.
@@ -321,6 +330,28 @@ class event_impl {
     return MEventFromSubmittedExecCommandBuffer;
   }
 
+  void addHostTaskNativeEvent(EventImplPtr Event) {
+    // We need to keep track of which native events refer to
+    // this one host task event, we do that so we can correctly
+    // return the vec of native events when calling get_native
+    MHostTaskNativeEvents.push_back(Event);
+    // We also add to the PreparedDepsEvents so that we can do standard
+    // dependency analysis
+    MPreparedDepsEvents.push_back(Event);
+  }
+
+  bool hasHostTaskNativeEvents() const {
+    return MHostTaskNativeEvents.size() > 0;
+  }
+
+  bool backendSet() const {
+    return !MContext->is_host() || hasHostTaskNativeEvents();
+  }
+
+  const std::vector<EventImplPtr> &getHostTaskNativeEvents() const {
+    return MHostTaskNativeEvents;
+  }
+
   void setProfilingEnabled(bool Value) { MIsProfilingEnabled = Value; }
 
   // Sets a command-buffer command when this event represents an enqueue to a
@@ -379,6 +410,12 @@ class event_impl {
 
   std::vector<EventImplPtr> MPostCompleteEvents;
 
+  // Used to hold pi_events for native events that are stored with
+  // interop_handle::add_native_events
+  std::vector<EventImplPtr> MHostTaskNativeEvents;
+  std::atomic<bool> MHostTaskNativeEventsHaveBeenWaitedOn = false;
+  std::mutex MHostTaskNativeEventsMutex;
+
   /// Indicates that the task associated with this event has been submitted by
   /// the queue to the device.
   std::atomic<bool> MIsFlushed = false;
diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp
index 34537c7191af6..d609c8b7ac5c3 100644
--- a/sycl/source/detail/platform_impl.hpp
+++ b/sycl/source/detail/platform_impl.hpp
@@ -141,7 +141,8 @@ class platform_impl {
 
   // \return the Plugin associated with this platform.
   const PluginPtr &getPlugin() const {
-    assert(!MHostPlatform && "Plugin is not available for Host.");
+    assert((!MHostPlatform || MBackendSet) &&
+           "Plugin is not available for Host.");
     return MPlugin;
   }
 
@@ -241,6 +242,7 @@ class platform_impl {
   bool MHostPlatform = false;
   sycl::detail::pi::PiPlatform MPlatform = 0;
   backend MBackend;
+  bool MBackendSet = !MHostPlatform;
 
   PluginPtr MPlugin;
   std::vector<std::weak_ptr<device_impl>> MDeviceCache;
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index a164c455fed54..c87f98ef35e1b 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -320,8 +320,12 @@ static void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls,
 class DispatchHostTask {
   ExecCGCommand *MThisCmd;
   std::vector<interop_handle::ReqToMem> MReqToMem;
+  EventImplPtr MEvent; // If we want to add native events within the host task
+                       // using the interop_handle then we need to have a ptr
+                       // to the event impl of the sycl event returned at
+                       // CGSubmit
 
-  pi_result waitForEvents() const {
+  pi_result waitForNativeDepEvents() const {
     std::map<const PluginPtr, std::vector<EventImplPtr>>
         RequiredEventsPerPlugin;
 
@@ -341,8 +345,9 @@ class DispatchHostTask {
       if (RawEvents.size() == 0)
         continue;
       try {
-        PluginWithEvents.first->call<PiApiKind::piEventsWait>(RawEvents.size(),
-                                                              RawEvents.data());
+        if (RawEvents.size())
+          PluginWithEvents.first->call<PiApiKind::piEventsWait>(
+              RawEvents.size(), RawEvents.data());
       } catch (const sycl::exception &E) {
         CGHostTask &HostTask = static_cast<CGHostTask &>(MThisCmd->getCG());
         HostTask.MQueue->reportAsyncException(std::current_exception());
@@ -358,6 +363,8 @@ class DispatchHostTask {
     // Host events can't throw exceptions so don't try to catch it.
     for (const EventImplPtr &Event : MThisCmd->MPreparedHostDepsEvents) {
       Event->waitInternal();
+      if (Event->hasHostTaskNativeEvents())
+        Event->waitForHostTaskNativeEvents();
     }
 
     return PI_SUCCESS;
@@ -365,8 +372,9 @@ class DispatchHostTask {
 
 public:
   DispatchHostTask(ExecCGCommand *ThisCmd,
-                   std::vector<interop_handle::ReqToMem> ReqToMem)
-      : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {}
+                   std::vector<interop_handle::ReqToMem> ReqToMem,
+                   EventImplPtr MEvent)
+      : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)), MEvent(MEvent) {}
 
   void operator()() const {
     assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask);
@@ -385,16 +393,18 @@ class DispatchHostTask {
     }
 #endif
 
-    pi_result WaitResult = waitForEvents();
-    if (WaitResult != PI_SUCCESS) {
-      std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error(
-          std::string("Couldn't wait for host-task's dependencies"),
-          WaitResult));
-      HostTask.MQueue->reportAsyncException(EPtr);
-      // reset host-task's lambda and quit
-      HostTask.MHostTask.reset();
-      Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
-      return;
+    if (!HostTask.MHostTask->isManualInteropSync()) {
+      pi_result WaitResult = waitForNativeDepEvents();
+      if (WaitResult != PI_SUCCESS) {
+        std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error(
+            std::string("Couldn't wait for host-task's dependencies"),
+            WaitResult));
+        HostTask.MQueue->reportAsyncException(EPtr);
+        // reset host-task's lambda and quit
+        HostTask.MHostTask.reset();
+        Scheduler::getInstance().NotifyHostTaskCompletion(MThisCmd);
+        return;
+      }
     }
 
     try {
@@ -402,7 +412,7 @@ class DispatchHostTask {
       if (HostTask.MHostTask->isInteropTask()) {
         interop_handle IH{MReqToMem, HostTask.MQueue,
                           HostTask.MQueue->getDeviceImplPtr(),
-                          HostTask.MQueue->getContextImplPtr()};
+                          HostTask.MQueue->getContextImplPtr(), MEvent};
 
         HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH);
       } else
@@ -1055,7 +1065,7 @@ void AllocaCommand::emitInstrumentationData() {
 
 pi_int32 AllocaCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
 
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
@@ -1152,7 +1162,7 @@ void *AllocaSubBufCommand::getMemAllocation() const {
 
 pi_int32 AllocaSubBufCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
   MMemAllocation = MemoryManager::allocateMemSubBuffer(
@@ -1221,7 +1231,7 @@ void ReleaseCommand::emitInstrumentationData() {
 
 pi_int32 ReleaseCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
   bool SkipRelease = false;
 
@@ -1344,7 +1354,7 @@ void MapMemObject::emitInstrumentationData() {
 
 pi_int32 MapMemObject::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
   flushCrossQueueDeps(EventImpls, getWorkerQueue());
 
@@ -1430,7 +1440,7 @@ bool UnMapMemObject::producesPiEvent() const {
 
 pi_int32 UnMapMemObject::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
   flushCrossQueueDeps(EventImpls, getWorkerQueue());
 
@@ -1540,7 +1550,7 @@ bool MemCpyCommand::producesPiEvent() const {
 
 pi_int32 MemCpyCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
 
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
 
@@ -1603,7 +1613,7 @@ void ExecCGCommand::clearAuxiliaryResources() {
 
 pi_int32 UpdateHostRequirementCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
   Command::waitForEvents(MQueue, EventImpls, Event);
 
@@ -1697,7 +1707,7 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
 pi_int32 MemCpyCommandHost::enqueueImp() {
   const QueueImplPtr &Queue = getWorkerQueue();
   waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
 
   sycl::detail::pi::PiEvent &Event = MEvent->getHandleRef();
@@ -1729,7 +1739,8 @@ EmptyCommand::EmptyCommand(QueueImplPtr Queue)
 
 pi_int32 EmptyCommand::enqueueImp() {
   waitForPreparedHostEvents();
-  waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef());
+  auto DepEvents = getAllPreparedDepsEvents();
+  waitForEvents(MQueue, DepEvents, MEvent->getHandleRef());
 
   return PI_SUCCESS;
 }
@@ -2751,7 +2762,7 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() {
   // Any device dependencies need to be waited on here since subsequent
   // submissions of the command buffer itself will not receive dependencies on
   // them, e.g. initial copies from host to device
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   flushCrossQueueDeps(EventImpls, getWorkerQueue());
   std::vector<sycl::detail::pi::PiEvent> RawEvents = getPiEvents(EventImpls);
   if (!RawEvents.empty()) {
@@ -2906,7 +2917,7 @@ pi_int32 ExecCGCommand::enqueueImp() {
 pi_int32 ExecCGCommand::enqueueImpQueue() {
   if (getCG().getType() != CG::CGTYPE::CodeplayHostTask)
     waitForPreparedHostEvents();
-  std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
+  std::vector<EventImplPtr> EventImpls = getAllPreparedDepsEvents();
   auto RawEvents = getPiEvents(EventImpls);
   flushCrossQueueDeps(EventImpls, getWorkerQueue());
 
@@ -3163,7 +3174,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() {
     copySubmissionCodeLocation();
 
     MQueue->getThreadPool().submit<DispatchHostTask>(
-        DispatchHostTask(this, std::move(ReqToMem)));
+        DispatchHostTask(this, std::move(ReqToMem), MEvent));
 
     MShouldCompleteEventIfPossible = false;
 
diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp
index 8ba0cceee9e6a..eb77ec55f3e22 100644
--- a/sycl/source/detail/scheduler/commands.hpp
+++ b/sycl/source/detail/scheduler/commands.hpp
@@ -301,6 +301,18 @@ class Command {
     return MPreparedDepsEvents;
   }
 
+  // This will return the MPreparedDepsEvents as well as any
+  // HostTaskNativeEvents that an EventImplPtr might hold
+  const std::vector<EventImplPtr> getAllPreparedDepsEvents() const {
+    std::vector<EventImplPtr> RetEvents = MPreparedDepsEvents;
+    for (auto &DepEvent : MPreparedHostDepsEvents)
+      if (DepEvent->hasHostTaskNativeEvents())
+        RetEvents.insert(RetEvents.end(),
+                         DepEvent->getHostTaskNativeEvents().begin(),
+                         DepEvent->getHostTaskNativeEvents().end());
+    return RetEvents;
+  }
+
   // XPTI instrumentation. Copy code location details to the internal struct.
   // Memory is allocated in this method and released in destructor.
   void copySubmissionCodeLocation();
diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp
index 9853b25d9d310..7dead84f63bb9 100644
--- a/sycl/source/detail/scheduler/graph_processor.cpp
+++ b/sycl/source/detail/scheduler/graph_processor.cpp
@@ -57,7 +57,12 @@ bool Scheduler::GraphProcessor::handleBlockingCmd(Command *Cmd,
     std::lock_guard<std::mutex> Guard(Cmd->MBlockedUsersMutex);
     if (Cmd->isBlocking()) {
       const EventImplPtr &RootCmdEvent = RootCommand->getEvent();
-      Cmd->addBlockedUserUnique(RootCmdEvent);
+      // Host tasks don't need to be added to wait list. When host tasks are
+      // enqueued, a new thread is created which waits on dep events via
+      // condition variables, so they don't need to be enqueued by other
+      // additional means
+      if (!RootCommand->isHostTask())
+        Cmd->addBlockedUserUnique(RootCmdEvent);
       EnqueueResult = EnqueueResultT(EnqueueResultT::SyclEnqueueBlocked, Cmd);
 
       // Blocked command will be enqueued asynchronously from submission so we
diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp
index a7bae8055c10b..367cd7c52ac5e 100644
--- a/sycl/source/event.cpp
+++ b/sycl/source/event.cpp
@@ -127,8 +127,7 @@ backend event::get_backend() const noexcept { return getImplBackend(impl); }
 pi_native_handle event::getNative() const { return impl->getNative(); }
 
 std::vector<pi_native_handle> event::getNativeVector() const {
-  std::vector<pi_native_handle> ReturnVector = {impl->getNative()};
-  return ReturnVector;
+  return impl->getNativeVector();
 }
 
 } // namespace _V1
diff --git a/sycl/source/interop_handle.cpp b/sycl/source/interop_handle.cpp
index cd479493bbae3..dfe36d78faeac 100644
--- a/sycl/source/interop_handle.cpp
+++ b/sycl/source/interop_handle.cpp
@@ -52,5 +52,49 @@ interop_handle::getNativeQueue(int32_t &NativeHandleDesc) const {
   return MQueue->getNative(NativeHandleDesc);
 }
 
+void interop_handle::addNativeEvents(
+    std::vector<pi_native_handle> &NativeEvents) {
+  auto Plugin = MQueue->getPlugin();
+
+  if (!MEvent->backendSet()) {
+    MEvent->setContextImpl(MContext);
+  }
+
+  // Make a std::vector of PiEvents from the native events
+  for (auto i = 0u; i < NativeEvents.size(); ++i) {
+    detail::pi::PiEvent Ev;
+    Plugin->call<detail::PiApiKind::piextEventCreateWithNativeHandle>(
+        NativeEvents[i], MContext->getHandleRef(),
+        /*OwnNativeHandle*/ true, &Ev);
+    auto EventImpl = std::make_shared<detail::event_impl>(
+        Ev, detail::createSyclObjFromImpl<context>(MContext));
+    // TODO: Do I need to call things like:
+    // setStateIncomplete -> Not sure
+    // setSubmissionTime  -> Not sure
+    // More...?
+    MEvent->addHostTaskNativeEvent(EventImpl);
+  }
+}
+
+std::vector<pi_native_handle> interop_handle::getNativeEvents() const {
+  // What if the events here have not yet been enqueued? I will need to wait on
+  // them. That is probably already done?
+  //
+  // Moreover what are the usual requirements of the host task launch?
+  //
+  // Do all dependent events need to be complete, or just enqueued? I suspect it
+  // is the former, and we want the latter in the case that we are using these
+  // entry points. We will maybe need a new host task entry point.
+  std::vector<pi_native_handle> RetEvents;
+  for (auto &DepEvent : MEvent->getWaitList()) {
+    if (DepEvent->backendSet()) {
+      auto NativeEvents = DepEvent->getNativeVector();
+      RetEvents.insert(RetEvents.end(), NativeEvents.begin(),
+                       NativeEvents.end());
+    }
+  }
+  return RetEvents;
+}
+
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/test-e2e/HostInteropTask/native-events/add-native-events-cuda.cpp b/sycl/test-e2e/HostInteropTask/native-events/add-native-events-cuda.cpp
new file mode 100644
index 0000000000000..48a19fca6d5be
--- /dev/null
+++ b/sycl/test-e2e/HostInteropTask/native-events/add-native-events-cuda.cpp
@@ -0,0 +1,139 @@
+// REQUIRES: cuda
+//
+// RUN: %{build} -o %t.out -lcuda
+// RUN: %{run} %t.out
+
+// These tests use the add_native_events API to ensure that the SYCL RT can
+// handle the events submitted to add_native_events within its runtime DAG.
+//
+// If manual_interop_sync is used then the user deals with async dependencies
+// manually in the HT lambda through the get_native_events interface.
+
+#include "native-events-cuda.hpp"
+#include "native-events.hpp"
+
+#include <cuda.h>
+#include <sycl/sycl.hpp>
+
+using T = unsigned; // We don't need to test lots of types, we just want a race
+                    // condition
+constexpr size_t bufSize = 1e6;
+constexpr T pattern = 42;
+
+// Tries to check for a race condition if the backend events are not added to
+// the SYCL dag.
+template <typename WaitOnType> void test1() {
+  printf("Running test 2\n");
+  sycl::queue q;
+  std::vector<T> out(bufSize, 0);
+
+  T *ptrHost = sycl::malloc_host<T>(bufSize, q); // malloc_host is necessary to
+                                                 // make the memcpy as async as
+                                                 // possible
+
+  auto syclEvent = q.submit([&](sycl::handler &cgh) {
+    cgh.host_task([&](sycl::interop_handle ih) {
+      auto [stream, ev] = cudaSetCtxAndGetStreamAndEvent(ih);
+      CUdeviceptr cuPtr;
+      CUDA_CHECK(cuMemAlloc_v2(&cuPtr, bufSize * sizeof(T)));
+      CUDA_CHECK(cuMemsetD32Async(cuPtr, pattern, bufSize, stream));
+      CUDA_CHECK(
+          cuMemcpyDtoHAsync(ptrHost, cuPtr, bufSize * sizeof(T), stream));
+
+      CUDA_CHECK(cuEventRecord(ev, stream));
+
+      ih.add_native_events<sycl::backend::ext_oneapi_cuda>({ev});
+    });
+  });
+  waitHelper<WaitOnType>(syclEvent, q);
+  checkResults(ptrHost, bufSize, pattern);
+}
+
+// Using host task event as a cgh.depends_on with USM
+template <typename WaitOnType> void test2() {
+  printf("Running test 3\n");
+  using T = unsigned;
+
+  sycl::queue q;
+  std::vector<T> out(bufSize, 0);
+
+  T *ptrHostA = sycl::malloc_host<T>(bufSize, q);
+  T *ptrHostB = sycl::malloc_host<T>(bufSize, q);
+
+  auto hostTaskEvent = q.submit([&](sycl::handler &cgh) {
+    cgh.host_task([&](sycl::interop_handle ih) {
+      auto [stream, ev] = cudaSetCtxAndGetStreamAndEvent(ih);
+      CUdeviceptr cuPtr;
+      CUDA_CHECK(cuMemAlloc_v2(&cuPtr, bufSize * sizeof(T)));
+
+      CUDA_CHECK(cuMemsetD32Async(cuPtr, pattern, bufSize, stream));
+      CUDA_CHECK(
+          cuMemcpyDtoHAsync(ptrHostA, cuPtr, bufSize * sizeof(T), stream));
+
+      CUDA_CHECK(cuEventRecord(ev, stream));
+
+      ih.add_native_events<sycl::backend::ext_oneapi_cuda>({ev});
+    });
+  });
+
+  auto syclEvent = q.submit([&](sycl::handler &cgh) {
+    cgh.depends_on(hostTaskEvent);
+    cgh.memcpy(ptrHostB, ptrHostA, bufSize * sizeof(T));
+  });
+
+  waitHelper<WaitOnType>(syclEvent, q);
+  checkResults(ptrHostB, bufSize, pattern);
+  printf("Tests passed\n");
+}
+
+// Using host task event with implicit DAG from buffer accessor model
+template <typename WaitOnType> void test3() {
+  printf("Running test 4\n");
+  using T = unsigned;
+
+  sycl::queue q;
+
+  T *ptrHostIn = sycl::malloc_host<T>(bufSize, q);
+  T *ptrHostOut = sycl::malloc_host<T>(bufSize, q);
+
+  // Dummy buffer to create dependencies between commands. Use a host malloc
+  // for host ptr to make sure the buffer has pinned memory
+  sycl::buffer<T, 1> buf{
+      ptrHostIn, bufSize, {sycl::property::buffer::use_host_ptr{}}};
+
+  q.submit([&](sycl::handler &cgh) {
+    sycl::accessor acc{buf, sycl::write_only};
+
+    cgh.host_task([&](sycl::interop_handle ih) {
+      // FIXME: this call fails
+      auto accPtr = ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(acc);
+      auto [stream, ev] = cudaSetCtxAndGetStreamAndEvent(ih);
+
+      CUDA_CHECK(cuMemsetD32Async(reinterpret_cast<CUdeviceptr>(accPtr),
+                                  pattern, bufSize, stream));
+      CUDA_CHECK(cuEventRecord(ev, stream));
+
+      ih.add_native_events<sycl::backend::ext_oneapi_cuda>({ev});
+    });
+  });
+
+  {
+    sycl::host_accessor hostAcc{buf};
+    for (auto i = 0; i < bufSize; ++i)
+      ptrHostOut[i] = hostAcc[i];
+  }
+
+  q.wait();
+  checkResults(ptrHostOut, bufSize, pattern);
+  printf("Tests passed\n");
+}
+
+int main() {
+  test1<sycl::queue>();
+  test1<sycl::event>();
+  test2<sycl::queue>();
+  test2<sycl::event>();
+  // test3<sycl::queue>(); Fails with `SyclObject.impl && "every constructor
+  //                       should create an impl"' failed.
+  // test3<sycl::event>();
+}
diff --git a/sycl/test-e2e/HostInteropTask/native-events/get-native-events-cuda.cpp b/sycl/test-e2e/HostInteropTask/native-events/get-native-events-cuda.cpp
new file mode 100644
index 0000000000000..798ffb69009fa
--- /dev/null
+++ b/sycl/test-e2e/HostInteropTask/native-events/get-native-events-cuda.cpp
@@ -0,0 +1,187 @@
+// REQUIRES: cuda
+//
+// RUN: %{build} -o %t.out -lcuda
+// RUN: %{run} %t.out
+
+// These tests use the get_native_events API together with manual_interop_sync
+// property. If manual interop sync is not used then get_native_events is not
+// necessary, since all events have been synchronized with already on host,
+// before the HT lambda is launched.
+//
+// If manual_interop_sync is used then the user deals with async dependencies
+// manually in the HT lambda through the get_native_events interface.
+//
+
+#include "native-events-cuda.hpp"
+#include "native-events.hpp"
+
+#include <cuda.h>
+#include <sycl/sycl.hpp>
+
+using T = unsigned; // We don't need to test lots of types, we just want a race
+                    // condition
+constexpr size_t bufSize = 1e7;
+constexpr T pattern = 42;
+
+sycl::queue q;
+
+// Check that the SYCL event that we submit with add_native_events can be
+// retrieved later through get_native_events in a dependent host task
+template <typename WaitOnType, bool UseManualInteropSync> struct test1 {
+  void operator()() {
+    printf("Running test 1\n");
+    std::atomic<CUevent> atomicEvent; // To share the event from the host task
+                                      // with the main thread
+
+    auto syclEvent1 = q.submit([&](sycl::handler &cgh) {
+      cgh.host_task([&](sycl::interop_handle ih) {
+        auto [_, ev] = cudaSetCtxAndGetStreamAndEvent(ih);
+        cuEventRecord(ev, 0);
+        atomicEvent.store(ev);
+        ih.add_native_events<sycl::backend::ext_oneapi_cuda>({ev});
+      });
+    });
+
+    // This task must wait on the other lambda to complete
+    auto syclEvent2 = q.submit([&](sycl::handler &cgh) {
+      cgh.depends_on(syclEvent1);
+      cgh.host_task(
+          [&](sycl::interop_handle ih) {
+            auto nativeEvents =
+                ih.get_native_events<sycl::backend::ext_oneapi_cuda>();
+            if constexpr (!UseManualInteropSync) {
+              // Events should be synchronized with by SYCL RT if
+              // manual_interop_sync not used
+              return;
+            }
+            assert(std::find(nativeEvents.begin(), nativeEvents.end(),
+                             atomicEvent.load()) != nativeEvents.end());
+          },
+          PropList(UseManualInteropSync));
+    });
+
+    waitHelper<WaitOnType>(syclEvent2, q);
+  }
+};
+
+// Tries to check for a race condition if the backend events are not added to
+// the SYCL dag.
+template <typename WaitOnType, bool UseManualInteropSync> struct test2 {
+  void operator()() {
+    printf("Running test 2\n");
+    T *ptrHost = sycl::malloc_host<T>(
+        bufSize,
+        q); // malloc_host is necessary to make the memcpy as async as possible
+
+    auto syclEvent1 = q.submit([&](sycl::handler &cgh) {
+      cgh.host_task([&](sycl::interop_handle ih) {
+        auto [stream, ev] = cudaSetCtxAndGetStreamAndEvent(ih);
+        CUdeviceptr cuPtr;
+        CUDA_CHECK(cuMemAlloc_v2(&cuPtr, bufSize * sizeof(T)));
+        CUDA_CHECK(cuMemsetD32Async(cuPtr, pattern, bufSize, stream));
+        CUDA_CHECK(
+            cuMemcpyDtoHAsync(ptrHost, cuPtr, bufSize * sizeof(T), stream));
+
+        CUDA_CHECK(cuEventRecord(ev, stream));
+
+        ih.add_native_events<sycl::backend::ext_oneapi_cuda>({ev});
+      });
+    });
+
+    auto syclEvent2 = q.submit([&](sycl::handler &cgh) {
+      cgh.depends_on(syclEvent1);
+      cgh.host_task(
+          [&](sycl::interop_handle ih) {
+            cudaSetCtxAndGetStreamAndEvent(ih);
+            auto nativeEvents =
+                ih.get_native_events<sycl::backend::ext_oneapi_cuda>();
+            if constexpr (!UseManualInteropSync) {
+              // Events should be synchronized with by SYCL RT if
+              // manual_interop_sync not used
+              return;
+            }
+            assert(nativeEvents.size());
+            for (auto &cudaEv : nativeEvents) {
+              CUDA_CHECK(cuEventSynchronize(cudaEv));
+            }
+          },
+          PropList(UseManualInteropSync));
+    });
+
+    waitHelper<WaitOnType>(syclEvent2, q);
+    for (auto i = 0; i < bufSize; ++i) {
+      if (ptrHost[i] != pattern) {
+        fprintf(stderr, "Wrong result at index: %d, have %d vs %d\n", i,
+                ptrHost[i], pattern);
+        throw;
+      }
+    }
+  }
+};
+
+// Using host task event as a cgh.depends_on with USM
+template <typename WaitOnType, bool UseManualInteropSync> struct test3 {
+  void operator()() {
+    printf("Running test 3\n");
+    using T = unsigned;
+
+    T *ptrHostA = sycl::malloc_host<T>(bufSize, q);
+    T *ptrHostB = sycl::malloc_host<T>(bufSize, q);
+
+    T *ptrDevice = sycl::malloc_device<T>(bufSize, q);
+
+    for (auto i = 0; i < bufSize; ++i)
+      ptrHostA[i] = pattern;
+
+    auto syclEvent1 = q.submit([&](sycl::handler &cgh) {
+      cgh.memcpy(ptrDevice, ptrHostA, bufSize * sizeof(T));
+    });
+
+    auto syclEvent2 = q.submit([&](sycl::handler &cgh) {
+      cgh.depends_on(syclEvent1);
+      cgh.host_task(
+          [&](sycl::interop_handle ih) {
+            auto [stream, _] = cudaSetCtxAndGetStreamAndEvent(ih);
+            auto nativeEvents =
+                ih.get_native_events<sycl::backend::ext_oneapi_cuda>();
+            if constexpr (UseManualInteropSync) {
+              assert(nativeEvents.size());
+              for (auto &cudaEv : nativeEvents) {
+                CUDA_CHECK(cuStreamWaitEvent(stream, cudaEv, 0));
+              }
+            }
+
+            CUDA_CHECK(cuMemcpyDtoHAsync(
+                ptrHostB, reinterpret_cast<CUdeviceptr>(ptrDevice),
+                bufSize * sizeof(T), stream));
+            CUDA_CHECK(cuStreamSynchronize(stream));
+          },
+          PropList(UseManualInteropSync));
+    });
+
+    waitHelper<WaitOnType>(syclEvent2, q);
+
+    for (auto i = 0; i < bufSize; --i) {
+      if (ptrHostB[i] != pattern) {
+        cuCtxSynchronize();
+        fprintf(stderr, "Wrong result at index: %d, have %d vs %d\n", i,
+                ptrHostB[i], pattern);
+        throw;
+      }
+    }
+  }
+};
+
+template <template <typename, bool> typename Func> void run() {
+  Func<sycl::queue, /* UseManualInteropSync*/ true>()();
+  Func<sycl::event, /* UseManualInteropSync*/ true>()();
+  Func<sycl::queue, /* UseManualInteropSync*/ false>()();
+  Func<sycl::event, /* UseManualInteropSync*/ false>()();
+}
+
+int main() {
+  run<test1>();
+  run<test2>();
+  run<test3>();
+  printf("Tests passed\n");
+}
diff --git a/sycl/test-e2e/HostInteropTask/native-events/manual-interop-sync.cpp b/sycl/test-e2e/HostInteropTask/native-events/manual-interop-sync.cpp
new file mode 100644
index 0000000000000..c8f86ad23fa6e
--- /dev/null
+++ b/sycl/test-e2e/HostInteropTask/native-events/manual-interop-sync.cpp
@@ -0,0 +1,12 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+#include "native-events.hpp"
+
+void test_empty_host_task_with_manual_interop_sync_property() {
+  sycl::queue{}.submit([&](sycl::handler &cgh) {
+    cgh.host_task([&](sycl::interop_handle ih) {}, {manual_interop_sync{}});
+  });
+}
+
+int main() { test_empty_host_task_with_manual_interop_sync_property(); }
diff --git a/sycl/test-e2e/HostInteropTask/native-events/native-events-cuda.hpp b/sycl/test-e2e/HostInteropTask/native-events/native-events-cuda.hpp
new file mode 100644
index 0000000000000..7dc082bee106d
--- /dev/null
+++ b/sycl/test-e2e/HostInteropTask/native-events/native-events-cuda.hpp
@@ -0,0 +1,41 @@
+#include <cuda.h>
+#include <sycl/sycl.hpp>
+
+#define CUDA_CHECK(expr)                                                       \
+  if (auto var = expr; var != CUDA_SUCCESS) {                                  \
+    printf(#expr " failed, returned val %d\n", var);                           \
+    throw;                                                                     \
+  }
+
+// Set up the context in host task and get a created event and stream
+std::pair<CUstream, CUevent>
+cudaSetCtxAndGetStreamAndEvent(sycl::interop_handle &ih) {
+  auto dev = ih.get_native_device<sycl::backend::ext_oneapi_cuda>();
+  CUcontext ctx;
+  CUDA_CHECK(cuDevicePrimaryCtxRetain(&ctx, dev));
+  CUDA_CHECK(cuCtxSetCurrent(ctx));
+  CUstream stream;
+  CUDA_CHECK(cuStreamCreate(&stream, 0));
+  CUevent ev;
+  CUDA_CHECK(cuEventCreate(&ev, 0));
+  return {stream, ev};
+}
+
+// To test that waiting on events is equivalent to wait on queues
+template <typename WaitOnType>
+void waitHelper(sycl::event &event, sycl::queue &q) {
+  if constexpr (std::is_same_v<WaitOnType, sycl::queue>) {
+    q.wait();
+  } else if constexpr (std::is_same_v<WaitOnType, sycl::event>) {
+    event.wait();
+  }
+}
+
+template <typename T> void checkResults(T *ptr, size_t bufSize, T pattern) {
+  for (auto i = 0; i < bufSize; ++i) {
+    if (ptr[i] != pattern) {
+      printf("Wrong result at index: %d, have %d vs %d\n", i, ptr[i], pattern);
+      throw;
+    }
+  }
+}
diff --git a/sycl/test-e2e/HostInteropTask/native-events/native-events.hpp b/sycl/test-e2e/HostInteropTask/native-events/native-events.hpp
new file mode 100644
index 0000000000000..c8fc2446c0401
--- /dev/null
+++ b/sycl/test-e2e/HostInteropTask/native-events/native-events.hpp
@@ -0,0 +1,10 @@
+#include <sycl/sycl.hpp>
+
+using manual_interop_sync =
+    sycl::ext::codeplay::experimental::property::host_task::manual_interop_sync;
+
+constexpr auto PropList = [](bool UseManualInteropSync) -> sycl::property_list {
+  if (UseManualInteropSync)
+    return {manual_interop_sync{}};
+  return {};
+};