Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL
DESTINATION ${SYCL_INCLUDE_DIR}/sycl
COMPONENT OpenCL-Headers)

# Option to enable online kernel fusion via a JIT compiler
option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" OFF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pvchupin - Do we have a good place to document this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A future PR will add support for fusion in buildbot/configure.py, which would then set the value for this option.

Users using buildbot/configure.py would therefore not need to set this option manually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, I am okay with waiting with documentation for it until it has an option in buildbot/configure.py. I'll let @pvchupin have the last say in that though.


# Needed for feature_test.hpp
if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
set(SYCL_BUILD_PI_CUDA ON)
Expand Down
7 changes: 6 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ enum DataLessPropKind {
UseDefaultStream = 8,
DiscardEvents = 9,
DeviceReadOnly = 10,
FusionPromotePrivate = 11,
FusionPromoteLocal = 12,
FusionNoBarrier = 13,
FusionEnable = 14,
FusionForce = 15,
// Indicates the last known dataless property.
LastKnownDataLessPropKind = 10,
LastKnownDataLessPropKind = 15,
// Exceeding 32 may cause ABI breaking change on some of OSes.
DataLessPropKindSize = 32
};
Expand Down
108 changes: 108 additions & 0 deletions sycl/include/sycl/ext/codeplay/experimental/fusion_properties.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
//==----------- fusion_properties.hpp --- SYCL fusion properties -----------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/access/access.hpp>
#include <sycl/detail/property_helper.hpp>
#include <sycl/properties/property_traits.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext {
namespace codeplay {
namespace experimental {
namespace property {

class promote_private
: public detail::DataLessProperty<detail::FusionPromotePrivate> {};

class promote_local
: public detail::DataLessProperty<detail::FusionPromoteLocal> {};

class no_barriers : public detail::DataLessProperty<detail::FusionNoBarrier> {};

class force_fusion : public detail::DataLessProperty<detail::FusionForce> {};

namespace queue {
class enable_fusion : public detail::DataLessProperty<detail::FusionEnable> {};
} // namespace queue

} // namespace property
} // namespace experimental
} // namespace codeplay
} // namespace ext

// Forward declarations
template <typename T, int Dimensions, typename AllocatorT, typename Enable>
class buffer;

template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
class accessor;

class queue;

// Property trait specializations.
template <>
struct is_property<ext::codeplay::experimental::property::promote_private>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::promote_local>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::no_barriers>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::force_fusion>
: std::true_type {};

template <>
struct is_property<ext::codeplay::experimental::property::queue::enable_fusion>
: std::true_type {};

// Buffer property trait specializations
template <typename T, int Dimensions, typename AllocatorT>
struct is_property_of<ext::codeplay::experimental::property::promote_private,
buffer<T, Dimensions, AllocatorT, void>>
: std::true_type {};

template <typename T, int Dimensions, typename AllocatorT>
struct is_property_of<ext::codeplay::experimental::property::promote_local,
buffer<T, Dimensions, AllocatorT, void>>
: std::true_type {};

// Accessor property trait specializations
template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
struct is_property_of<ext::codeplay::experimental::property::promote_private,
accessor<DataT, Dimensions, AccessMode, AccessTarget,
IsPlaceholder, PropertyListT>> : std::true_type {
};

template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
typename PropertyListT>
struct is_property_of<ext::codeplay::experimental::property::promote_local,
accessor<DataT, Dimensions, AccessMode, AccessTarget,
IsPlaceholder, PropertyListT>> : std::true_type {
};

// Queue property trait specializations
template <>
struct is_property_of<
ext::codeplay::experimental::property::queue::enable_fusion, queue>
: std::true_type {};

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
93 changes: 93 additions & 0 deletions sycl/include/sycl/ext/codeplay/experimental/fusion_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//==---- fusion_wrapper.hpp --- SYCL wrapper for queue for kernel fusion ---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/queue.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext {
namespace codeplay {
namespace experimental {

///
/// A wrapper wrapping a sycl::queue to provide access to the kernel fusion API,
/// allowing to manage kernel fusion on the wrapped queue.
class __SYCL_EXPORT fusion_wrapper {

public:
///
/// Wrap a queue to get access to the kernel fusion API.
///
/// @throw sycl::exception with errc::invalid if trying to construct a wrapper
/// on a queue which doesn't support fusion.
explicit fusion_wrapper(queue &q);

///
/// Access the queue wrapped by this fusion wrapper.
queue get_queue() const;

///
/// @brief Check whether the wrapped queue is in fusion mode or not.
bool is_in_fusion_mode() const;

///
/// @brief Set the wrapped queue into "fusion mode". This means that the
/// kernels that are submitted in subsequent calls to queue::submit() are not
/// submitted for execution right away, but rather added to a list of kernels
/// that should be fused.
///
/// @throw sycl::exception with errc::invalid if this operation is called on a
/// queue which is already in fusion mode.
void start_fusion();

///
/// @brief Cancel the fusion and submit all kernels submitted since the last
/// start_fusion() for immediate execution without fusion. The kernels are
/// executed in the same order as they were initially submitted to the wrapped
/// queue.
///
/// This operation is asynchronous, i.e., it may return after the previously
/// submitted kernels have been passed to the scheduler, but before any of the
/// previously submitted kernel starts or completes execution. The events
/// returned by submit() since the last call to start_fusion remain valid and
/// can be used for synchronization.
///
/// The queue is not in "fusion mode" anymore after this calls returns, until
/// the next start_fusion().
void cancel_fusion();

///
/// @brief Complete the fusion: JIT-compile a fused kernel from all kernels
/// submitted to the wrapped queue since the last start_fusion and submit the
/// fused kernel for execution. Inside the fused kernel, the per-work-item
/// effects are executed in the same order as the kernels were initially
/// submitted.
///
/// This operation is asynchronous, i.e., it may return after the JIT
/// compilation is executed and the fused kernel is passed to the scheduler,
/// but before the fused kernel starts or completes execution. The returned
/// event allows to synchronize with the execution of the fused kernel. All
/// events returned by queue::submit since the last call to start_fusion
/// remain valid.
///
/// The wrapped queue is not in "fusion mode" anymore after this calls
/// returns, until the next start_fusion().
///
/// @param properties Properties to take into account when performing fusion.
event complete_fusion(const property_list &propList = {});

private:
std::shared_ptr<detail::queue_impl> MQueue;
};
} // namespace experimental
} // namespace codeplay
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
4 changes: 4 additions & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#if SYCL_BUILD_PI_HIP
#define SYCL_EXT_ONEAPI_BACKEND_HIP 1
#endif
#cmakedefine01 SYCL_ENABLE_KERNEL_FUSION
#if SYCL_ENABLE_KERNEL_FUSION
#define SYCL_EXT_CODEPLAY_KERNEL_FUSION 1
#endif

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
1 change: 1 addition & 0 deletions sycl/include/sycl/properties/all_properties.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
#include <sycl/properties/accessor_properties.hpp>
#include <sycl/properties/buffer_properties.hpp>
#include <sycl/properties/context_properties.hpp>
Expand Down
8 changes: 8 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,14 @@ class __SYCL_EXPORT queue {
} _CODELOCFW(CodeLoc));
}

/// @brief Returns true if the queue was created with the
/// ext::codeplay::experimental::property::queue::enable_fusion property.
///
/// Equivalent to
/// `has_property<ext::codeplay::experimental::property::queue::enable_fusion>()`.
///
bool ext_codeplay_supports_fusion() const;

// Clean KERNELFUNC macros.
#undef _KERNELFUNCPARAM

Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#if SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO
#include <sycl/ext/oneapi/backend/level_zero.hpp>
#endif
#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/ext/oneapi/device_global/properties.hpp>
#include <sycl/ext/oneapi/experimental/builtins.hpp>
Expand Down
1 change: 1 addition & 0 deletions sycl/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ set(SYCL_SOURCES
"detail/error_handling/enqueue_kernel.cpp"
"detail/event_impl.cpp"
"detail/filter_selector_impl.cpp"
"detail/fusion/fusion_wrapper.cpp"
"detail/global_handler.cpp"
"detail/helpers.cpp"
"detail/handler_proxy.cpp"
Expand Down
55 changes: 55 additions & 0 deletions sycl/source/detail/fusion/fusion_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//==------------ fusion_wrapper.cpp ----------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/ext/codeplay/experimental/fusion_wrapper.hpp>

#include <detail/queue_impl.hpp>
#include <detail/scheduler/scheduler.hpp>

namespace sycl {
__SYCL_INLINE_VER_NAMESPACE(_V1) {
namespace ext {
namespace codeplay {
namespace experimental {

fusion_wrapper::fusion_wrapper(queue &Queue)
: MQueue{sycl::detail::getSyclObjImpl(Queue)} {
if (!Queue.ext_codeplay_supports_fusion()) {
throw sycl::exception(
sycl::errc::invalid,
"Cannot wrap a queue for fusion which doesn't support fusion");
}
}

queue fusion_wrapper::get_queue() const {
return sycl::detail::createSyclObjFromImpl<sycl::queue>(MQueue);
}

bool fusion_wrapper::is_in_fusion_mode() const { return false; }

void fusion_wrapper::start_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

void fusion_wrapper::cancel_fusion() {
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

event fusion_wrapper::complete_fusion(const property_list &PropList) {
(void)PropList;
throw sycl::exception(sycl::errc::feature_not_supported,
"Fusion not yet implemented");
}

} // namespace experimental
} // namespace codeplay
} // namespace ext
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
7 changes: 7 additions & 0 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <detail/queue_impl.hpp>
#include <sycl/event.hpp>
#include <sycl/exception_list.hpp>
#include <sycl/ext/codeplay/experimental/fusion_properties.hpp>
#include <sycl/handler.hpp>
#include <sycl/queue.hpp>
#include <sycl/stl.hpp>
Expand Down Expand Up @@ -212,5 +213,11 @@ bool queue::device_has(aspect Aspect) const {
// avoid creating sycl object from impl
return impl->getDeviceImplPtr()->has(Aspect);
}

bool queue::ext_codeplay_supports_fusion() const {
return impl->has_property<
ext::codeplay::experimental::property::queue::enable_fusion>();
}

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
8 changes: 8 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3662,6 +3662,11 @@ _ZN4sycl3_V13ext6oneapi10level_zero12make_contextERKSt6vectorINS0_6deviceESaIS5_
_ZN4sycl3_V13ext6oneapi10level_zero13make_platformEm
_ZN4sycl3_V13ext6oneapi15filter_selectorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZN4sycl3_V13ext6oneapi15filter_selectorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZN4sycl3_V13ext8codeplay12experimental14fusion_wrapper12start_fusionEv
_ZN4sycl3_V13ext8codeplay12experimental14fusion_wrapper13cancel_fusionEv
_ZN4sycl3_V13ext8codeplay12experimental14fusion_wrapper15complete_fusionERKNS0_13property_listE
_ZN4sycl3_V13ext8codeplay12experimental14fusion_wrapperC1ERNS0_5queueE
_ZN4sycl3_V13ext8codeplay12experimental14fusion_wrapperC2ERNS0_5queueE
_ZN4sycl3_V14freeEPvRKNS0_5queueERKNS0_6detail13code_locationE
_ZN4sycl3_V14freeEPvRKNS0_7contextERKNS0_6detail13code_locationE
_ZN4sycl3_V15event13get_wait_listEv
Expand Down Expand Up @@ -4013,6 +4018,8 @@ _ZNK4sycl3_V120accelerator_selectorclERKNS0_6deviceE
_ZNK4sycl3_V13ext6oneapi15filter_selector13select_deviceEv
_ZNK4sycl3_V13ext6oneapi15filter_selector5resetEv
_ZNK4sycl3_V13ext6oneapi15filter_selectorclERKNS0_6deviceE
_ZNK4sycl3_V13ext8codeplay12experimental14fusion_wrapper17is_in_fusion_modeEv
_ZNK4sycl3_V13ext8codeplay12experimental14fusion_wrapper9get_queueEv
_ZNK4sycl3_V15event11get_backendEv
_ZNK4sycl3_V15event15getNativeVectorEv
_ZNK4sycl3_V15event18get_profiling_infoINS0_4info15event_profiling11command_endEEENS0_6detail28is_event_profiling_info_descIT_E11return_typeEv
Expand All @@ -4033,6 +4040,7 @@ _ZNK4sycl3_V15queue12get_propertyINS0_8property5queue16enable_profilingEEET_v
_ZNK4sycl3_V15queue12get_propertyINS0_8property5queue8in_orderEEET_v
_ZNK4sycl3_V15queue12has_propertyINS0_8property5queue16enable_profilingEEEbv
_ZNK4sycl3_V15queue12has_propertyINS0_8property5queue8in_orderEEEbv
_ZNK4sycl3_V15queue28ext_codeplay_supports_fusionEv
_ZNK4sycl3_V15queue3getEv
_ZNK4sycl3_V15queue7is_hostEv
_ZNK4sycl3_V15queue8get_infoINS0_4info5queue15reference_countEEENS0_6detail18is_queue_info_descIT_E11return_typeEv
Expand Down