Skip to content
Closed
32 changes: 32 additions & 0 deletions sycl/include/sycl/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,22 @@ get_native<backend::ext_oneapi_cuda, device>(const device &Obj) {
Obj.getNative());
}

template <>
inline backend_return_t<backend::ext_oneapi_cuda, event>
get_native<backend::ext_oneapi_cuda, event>(const event &Event) {
if (Event.get_backend() != backend::ext_oneapi_cuda) {
throw sycl::exception(make_error_code(errc::backend_mismatch),
"Backends mismatch");
}
backend_return_t<backend::ext_oneapi_cuda, event> ReturnValue;
for (auto const &element : Event.getNativeVector()) {
ReturnValue.push_back(
reinterpret_cast<typename detail::interop<backend::ext_oneapi_cuda,
event>::value_type>(element));
}
return ReturnValue;
}

#ifndef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL
template <>
__SYCL_DEPRECATED(
Expand Down Expand Up @@ -248,6 +264,22 @@ inline backend_return_t<backend::ext_oneapi_hip, context> get_native<
Obj.getNative());
}

template <>
inline backend_return_t<backend::ext_oneapi_hip, event>
get_native<backend::ext_oneapi_hip, event>(const event &Event) {
if (Event.get_backend() != backend::ext_oneapi_hip) {
throw sycl::exception(make_error_code(errc::backend_mismatch),
"Backends mismatch");
}
backend_return_t<backend::ext_oneapi_hip, event> ReturnValue;
for (auto const &element : Event.getNativeVector()) {
ReturnValue.push_back(
reinterpret_cast<typename detail::interop<backend::ext_oneapi_hip,
event>::value_type>(element));
}
return ReturnValue;
}

#endif // SYCL_EXT_ONEAPI_BACKEND_HIP

template <backend BackendName, typename DataT, int Dimensions,
Expand Down
6 changes: 4 additions & 2 deletions sycl/include/sycl/detail/backend_traits_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ template <> struct interop<backend::ext_oneapi_cuda, device> {
};

template <> struct interop<backend::ext_oneapi_cuda, event> {
using type = CUevent;
using type = std::vector<CUevent>;
using value_type = CUevent;
};

template <> struct interop<backend::ext_oneapi_cuda, queue> {
Expand Down Expand Up @@ -90,7 +91,8 @@ template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
using type = CUevent;
using type = std::vector<CUevent>;
using value_type = CUevent;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
Expand Down
6 changes: 4 additions & 2 deletions sycl/include/sycl/detail/backend_traits_hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ template <> struct interop<backend::ext_oneapi_hip, device> {
};

template <> struct interop<backend::ext_oneapi_hip, event> {
using type = HIPevent;
using type = std::vector<HIPevent>;
using value_type = HIPevent;
};

template <> struct interop<backend::ext_oneapi_hip, queue> {
Expand Down Expand Up @@ -84,7 +85,8 @@ template <> struct BackendInput<backend::ext_oneapi_hip, event> {
};

template <> struct BackendReturn<backend::ext_oneapi_hip, event> {
using type = HIPevent;
using type = std::vector<HIPevent>;
using value_type = HIPevent;
};

template <> struct BackendInput<backend::ext_oneapi_hip, queue> {
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <sycl/detail/kernel_desc.hpp> // for kernel_param_kind_t
#include <sycl/detail/pi.h> // for PI_ERROR_INVALID_WORK...
#include <sycl/exception.hpp> // for nd_range_error
#include <sycl/ext/codeplay/experimental/host_task_properties.hpp> // for property::host_task::manual_interop_sync
#include <sycl/group.hpp> // for group
#include <sycl/h_item.hpp> // for h_item
#include <sycl/id.hpp> // for id
Expand Down
17 changes: 15 additions & 2 deletions sycl/include/sycl/detail/host_task_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,23 @@ namespace detail {
class HostTask {
std::function<void()> MHostTask;
std::function<void(interop_handle)> MInteropTask;
bool ManualInteropSync = false;

public:
HostTask() : MHostTask([]() {}) {}
HostTask(std::function<void()> &&Func) : MHostTask(Func) {}
HostTask(std::function<void(interop_handle)> &&Func) : MInteropTask(Func) {}
HostTask(std::function<void(interop_handle)> &&Func,
const property_list PropList)
: MInteropTask(Func),
ManualInteropSync{
PropList.has_property<ext::codeplay::experimental::property::
host_task::manual_interop_sync>()} {}

bool isInteropTask() const { return !!MInteropTask; }

bool isManualInteropSync() const { return ManualInteropSync; }

void call(HostProfilingInfo *HPI) {
if (HPI)
HPI->start();
Expand Down Expand Up @@ -74,15 +83,19 @@ std::enable_if_t<
detail::check_fn_signature<std::remove_reference_t<FuncT>, void()>::value ||
detail::check_fn_signature<std::remove_reference_t<FuncT>,
void(interop_handle)>::value>
handler::host_task_impl(FuncT &&Func) {
handler::host_task_impl(FuncT &&Func, const property_list &PropList) {
throwIfActionIsCreated();

MNDRDesc.set(range<1>(1));
// Need to copy these rather than move so that we can check associated
// accessors during finalize
MArgs = MAssociatedAccesors;

MHostTask.reset(new detail::HostTask(std::move(Func)));
if constexpr (detail::check_fn_signature<std::remove_reference_t<FuncT>,
void(interop_handle)>::value)
MHostTask.reset(new detail::HostTask(std::move(Func), std::move(PropList)));
else
MHostTask.reset(new detail::HostTask(std::move(Func)));

setType(detail::CG::CodeplayHostTask);
}
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ enum DataLessPropKind {
GraphDependOnAllLeaves = 24,
GraphUpdatable = 25,
GraphEnableProfiling = 26,
HostTaskManualInteropSync = 27,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 26,
LastKnownDataLessPropKind = 27,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//==-------- host_task_properties.hpp --- SYCL host task properties --------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/context.hpp>
#include <sycl/detail/property_helper.hpp>
#include <sycl/properties/property_traits.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::codeplay::experimental::property::host_task {

class manual_interop_sync : public ::sycl::detail::DataLessProperty<
::sycl::detail::HostTaskManualInteropSync> {};

} // namespace ext::codeplay::experimental::property::host_task

// Forward declaration
class host_task;

template <>
struct is_property<
ext::codeplay::experimental::property::host_task::manual_interop_sync>
: std::true_type {};

template <>
struct is_property_of<
ext::codeplay::experimental::property::host_task::manual_interop_sync,
host_task> : std::true_type {};

} // namespace _V1
} // namespace sycl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ template <> struct interop<backend::ext_oneapi_cuda, device> {
};

template <> struct interop<backend::ext_oneapi_cuda, event> {
using type = CUevent;
using type = std::vector<CUevent>;
using value_type = CUevent;
};

template <> struct interop<backend::ext_oneapi_cuda, queue> {
Expand Down Expand Up @@ -92,7 +93,8 @@ template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
using type = CUevent;
using type = std::vector<CUevent>;
using value_type = CUevent;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) {
// Implementation of cuda::has_native_event
inline __SYCL_EXPORT bool has_native_event(event sycl_event) {
if (sycl_event.get_backend() == backend::ext_oneapi_cuda)
return get_native<backend::ext_oneapi_cuda>(sycl_event) != nullptr;
return get_native<backend::ext_oneapi_cuda>(sycl_event).size() &&
get_native<backend::ext_oneapi_cuda>(sycl_event).front() != nullptr;

return false;
}
Expand Down
6 changes: 3 additions & 3 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,7 @@ class __SYCL_EXPORT handler {
void()>::value ||
detail::check_fn_signature<std::remove_reference_t<FuncT>,
void(interop_handle)>::value>
host_task_impl(FuncT &&Func);
host_task_impl(FuncT &&Func, const property_list &);

/// @brief Get the command graph if any associated with this handler. It can
/// come from either the associated queue or from being set explicitly through
Expand Down Expand Up @@ -2067,8 +2067,8 @@ class __SYCL_EXPORT handler {
void()>::value ||
detail::check_fn_signature<std::remove_reference_t<FuncT>,
void(interop_handle)>::value>
host_task(FuncT &&Func) {
host_task_impl(Func);
host_task(FuncT &&Func, const property_list PropList = {}) {
host_task_impl(Func, PropList);
}

/// Defines and invokes a SYCL kernel function for the specified range and
Expand Down
73 changes: 71 additions & 2 deletions sycl/include/sycl/interop_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ class DispatchHostTask;
class queue_impl;
class device_impl;
class context_impl;

// Needed for get_native_events to check if backend_return_t<Backend, event> is
// a vector
template <typename T> struct is_std_vector : std::false_type {};
template <typename T> struct is_std_vector<std::vector<T>> : std::true_type {};
} // namespace detail

class queue;
Expand Down Expand Up @@ -182,6 +187,66 @@ class interop_handle {
#endif
}

// Adds events from the native API so that the SYCL runtime can integrate the
// async calls in a native API, with other async operations in the SYCL DAG.
// Using this function removes the requirement that a host task callable must
// synchronize with any asynchronous operations from within the callable.
template <backend Backend = backend::opencl>
void add_native_events(backend_return_t<Backend, event> NativeEvents) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This member function is named ext_oneapi_add_native_events in the extension.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should remove the default backend so the user must always specify which backend they are targeting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea RE default backend. Will change

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also have it in the extension to use backend_return_t here, but I wonder if this should be backend_input_t instead.

Copy link
Contributor Author

@hdelan hdelan May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting thought. If we have backend_input_t then maybe this entry point should take a std::vector<backend_input_t<...>>

#ifndef __SYCL_DEVICE_ONLY__
// TODO: replace the exception thrown below with the SYCL 2020 exception
// with the error code 'errc::backend_mismatch' when those new exceptions
// are ready to be used.
if (Backend != get_backend())
throw invalid_object_error("Incorrect backend argument was passed",
PI_ERROR_INVALID_MEM_OBJECT);
// All native events can be cast to void*, we use this as a generic entry
// point to source library
std::vector<pi_native_handle> NativeEventHolders(NativeEvents.size());
for (auto i = 0; i < NativeEvents.size(); ++i)
NativeEventHolders[i] =
reinterpret_cast<pi_native_handle>(NativeEvents[i]);
return addNativeEvents(NativeEventHolders);
#else
// we believe this won't be ever called on device side
return;
#endif
}

// Gets all the native events that the host task depends on, and that are
// still active
template <backend Backend = backend::opencl>
backend_return_t<Backend, event> get_native_events() {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have this as returning a std::vector<backend_return_t<Backend, event>> since there may be more than one SYCL event so more than one sets of native events.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes maybe this is a good idea. The problem is that backend_return_t is already a std::vector for many backends. Allocating a vector of vectors would be unnecessarily bad in terms of perf overhead. So maybe a std::vector<backend_input_t<...>> is better?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We want a vector here regardless of whether backend_return_t already is a vector or not. Using backend_input_t in a return type feels wrong, although I think it would work just fine. A third option would be to use some template magic to make a vector out of backend_return_t if it is not already a vector.

#ifndef __SYCL_DEVICE_ONLY__
// TODO: replace the exception thrown below with the SYCL 2020 exception
// with the error code 'errc::backend_mismatch' when those new exceptions
// are ready to be used.
if (Backend != get_backend())
throw invalid_object_error("Incorrect backend argument was passed",
PI_ERROR_INVALID_MEM_OBJECT);
// All native events can be cast to void*, we use this as a generic entry
// point to source library
std::vector<pi_native_handle> NativeEventHolders = getNativeEvents();
backend_return_t<Backend, event>
RetNativeEvents; // This may be a vector of native events or a single
// native event, depending on the backend
if constexpr (detail::is_std_vector<
backend_return_t<Backend, event>>::value) {
using ValueT = typename backend_return_t<Backend, event>::value_type;
for (auto i = 0; i < NativeEventHolders.size(); ++i)
RetNativeEvents.push_back(
reinterpret_cast<ValueT>(NativeEventHolders[i]));
} else {
RetNativeEvents = reinterpret_cast<backend_return_t<Backend, event>>(
NativeEventHolders[0]);
}
return RetNativeEvents;
#else
// we believe this won't be ever called on device side
return {};
#endif
}

private:
friend class detail::ExecCGCommand;
friend class detail::DispatchHostTask;
Expand All @@ -190,8 +255,9 @@ class interop_handle {
interop_handle(std::vector<ReqToMem> MemObjs,
const std::shared_ptr<detail::queue_impl> &Queue,
const std::shared_ptr<detail::device_impl> &Device,
const std::shared_ptr<detail::context_impl> &Context)
: MQueue(Queue), MDevice(Device), MContext(Context),
const std::shared_ptr<detail::context_impl> &Context,
const std::shared_ptr<detail::event_impl> &Event)
: MQueue(Queue), MDevice(Device), MContext(Context), MEvent(Event),
MMemObjs(std::move(MemObjs)) {}

template <backend Backend, typename DataT, int Dims>
Expand All @@ -215,10 +281,13 @@ class interop_handle {
getNativeQueue(int32_t &NativeHandleDesc) const;
__SYCL_EXPORT pi_native_handle getNativeDevice() const;
__SYCL_EXPORT pi_native_handle getNativeContext() const;
__SYCL_EXPORT void addNativeEvents(std::vector<pi_native_handle> &);
__SYCL_EXPORT std::vector<pi_native_handle> getNativeEvents() const;

std::shared_ptr<detail::queue_impl> MQueue;
std::shared_ptr<detail::device_impl> MDevice;
std::shared_ptr<detail::context_impl> MContext;
std::shared_ptr<detail::event_impl> MEvent;

std::vector<ReqToMem> MMemObjs;
};
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/properties/all_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
#include <sycl/ext/codeplay/experimental/host_task_properties.hpp>
#include <sycl/properties/accessor_properties.hpp>
#include <sycl/properties/buffer_properties.hpp>
#include <sycl/properties/context_properties.hpp>
Expand Down
10 changes: 2 additions & 8 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,8 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 1e9b1b493fe30e6236bf611ae6d82366c9376f6c
# Merge: a011f092 d8500a36
# Author: Kenneth Benzie (Benie) <[email protected]>
# Date: Fri Jun 21 10:22:52 2024 +0100
# Merge pull request #805 from aarongreig/aaron/kernelSetArgIndirectionFix
# Correct level of indirection used in KernelSetArgPointer calls.
set(UNIFIED_RUNTIME_TAG 1e9b1b493fe30e6236bf611ae6d82366c9376f6c)
set(UNIFIED_RUNTIME_REPO "https://github.com/hdelan/unified-runtime.git")
set(UNIFIED_RUNTIME_TAG interop-event-check)

fetch_adapter_source(level_zero
${UNIFIED_RUNTIME_REPO}
Expand Down
11 changes: 10 additions & 1 deletion sycl/source/detail/backend_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,22 @@
#pragma once
#include <cassert>
#include <sycl/backend_types.hpp>
#include <sycl/event.hpp>

namespace sycl {
inline namespace _V1 {
namespace detail {

template <class T> backend getImplBackend(const T &Impl) {
assert(!Impl->is_host() && "Cannot get the backend for host.");
// Experimental host task allows the user to get backend for event impls
if constexpr (std::is_same_v<T, std::shared_ptr<event_impl>>) {
assert((!Impl->is_host() || Impl->backendSet()) &&
"interop_handle::add_native_events must be "
"used in order for a host "
"task event to have a native event");
} else {
assert(!Impl->is_host() && "Cannot get the backend for host.");
}
return Impl->getContextImplPtr()->getBackend();
}

Expand Down
Loading