Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ approved by an Intel representative.
- A new approval is needed if the PR was updated (e.g. during code review).
- Once the PR is approved and all checks have passed, the pull request is
ready for merge.
- Sometimes unrelated fails can be observed in the PR. It's author
responsibility to find/guess the reason of these fails and post a comment in
the PR with:
- possible reason of fails, ideally with a link to the PR, which caused fails
- link to other PR(s), which expected to fix fails
- person who is currently looking into fails
- link to existing open issue
- if author cannot identify any of these, the minimal action expected is to
open a new [issue](/../../issues)

### Merge

Expand All @@ -89,7 +98,8 @@ and merge] and using PR description as the commit message, replacing all
individual comments made per commit. Authors of the change must ensure PR
description is up to date at the merge stage, as sometimes comments addressed
during code reviews can invalidate original PR description. Feel free to ping
@intel/llvm-gatekeepers if your PR is green and can be merged.
@intel/llvm-gatekeepers if your PR is green and can be merged. Note that
gatekeepers will require explanation for any failures observed.

Pulldown from LLVM upstream is done through merge commits to preserve hashes of
the original commits pulled from the LLVM community repository.
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Intel LLVM-based projects:
- [oneAPI DPC++ compiler](#oneapi-dpc-compiler)
- [Late-outline OpenMP and OpenMP Offload](#late-outline-openmp-and-openmp-offload)

For general contribution process see [CONTRIBUTING.md](./CONTRIBUTING.md)

## oneAPI DPC++ compiler

[![](https://spec.oneapi.io/oneapi-logo-white-scaled.jpg)](https://www.oneapi.io/)
Expand Down
11 changes: 10 additions & 1 deletion buildbot/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ def do_configure(args):
libclc_amd_target_names = ';amdgcn--;amdgcn--amdhsa'
libclc_nvidia_target_names = ';nvptx64--;nvptx64--nvidiacl'

sycl_enable_fusion = "OFF"
if not args.disable_fusion:
llvm_external_projects += ";sycl-fusion"
sycl_enable_fusion = "ON"

if args.llvm_external_projects:
llvm_external_projects += ";" + args.llvm_external_projects.replace(",", ";")

Expand All @@ -32,6 +37,7 @@ def do_configure(args):
xpti_dir = os.path.join(abs_src_dir, "xpti")
xptifw_dir = os.path.join(abs_src_dir, "xptifw")
libdevice_dir = os.path.join(abs_src_dir, "libdevice")
fusion_dir = os.path.join(abs_src_dir, "sycl-fusion")
llvm_targets_to_build = args.host_target
llvm_enable_projects = 'clang;' + llvm_external_projects
libclc_targets_to_build = ''
Expand Down Expand Up @@ -144,6 +150,7 @@ def do_configure(args):
"-DXPTI_SOURCE_DIR={}".format(xpti_dir),
"-DLLVM_EXTERNAL_XPTIFW_SOURCE_DIR={}".format(xptifw_dir),
"-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR={}".format(libdevice_dir),
"-DLLVM_EXTERNAL_SYCL_FUSION_SOURCE_DIR={}".format(fusion_dir),
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DLIBCLC_TARGETS_TO_BUILD={}".format(libclc_targets_to_build),
"-DLIBCLC_GENERATE_REMANGLED_VARIANTS={}".format(libclc_gen_remangled_variants),
Expand All @@ -159,7 +166,8 @@ def do_configure(args):
"-DLLVM_ENABLE_LLD={}".format(llvm_enable_lld),
"-DXPTI_ENABLE_WERROR={}".format(xpti_enable_werror),
"-DSYCL_CLANG_EXTRA_FLAGS={}".format(sycl_clang_extra_flags),
"-DSYCL_ENABLE_PLUGINS={}".format(';'.join(set(sycl_enabled_plugins)))
"-DSYCL_ENABLE_PLUGINS={}".format(';'.join(set(sycl_enabled_plugins))),
"-DSYCL_ENABLE_KERNEL_FUSION={}".format(sycl_enable_fusion)
]

if args.l0_headers and args.l0_loader:
Expand Down Expand Up @@ -238,6 +246,7 @@ def main():
parser.add_argument("--llvm-external-projects", help="Add external projects to build. Add as comma seperated list.")
parser.add_argument("--ci-defaults", action="store_true", help="Enable default CI parameters")
parser.add_argument("--enable-plugin", action='append', help="Enable SYCL plugin")
parser.add_argument("--disable-fusion", action="store_true", help="Disable the kernel fusion JIT compiler")
args = parser.parse_args()

print("args:{}".format(args))
Expand Down
2 changes: 1 addition & 1 deletion sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ install(DIRECTORY ${OpenCL_INCLUDE_DIR}/CL
COMPONENT OpenCL-Headers)

# Option to enable online kernel fusion via a JIT compiler
option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" OFF)
option(SYCL_ENABLE_KERNEL_FUSION "Enable kernel fusion via JIT compiler" ON)

# Needed for feature_test.hpp
if ("cuda" IN_LIST SYCL_ENABLE_PLUGINS)
Expand Down
5 changes: 5 additions & 0 deletions sycl/cmake/modules/AddSYCLUnitTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ macro(add_sycl_unittest test_dirname link_variant)
OpenCL-Headers
${SYCL_LINK_LIBS}
)

if(SYCL_ENABLE_KERNEL_FUSION)
target_link_libraries(${test_dirname} PRIVATE sycl-fusion)
endif(SYCL_ENABLE_KERNEL_FUSION)

target_include_directories(${test_dirname}
PRIVATE SYSTEM
${sycl_inc_dir}
Expand Down
9 changes: 9 additions & 0 deletions sycl/doc/GetStartedGuide.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and a wide range of compute accelerators such as GPU and FPGA.
- [Build DPC++ toolchain with support for HIP AMD](#build-dpc-toolchain-with-support-for-hip-amd)
- [Build DPC++ toolchain with support for HIP NVIDIA](#build-dpc-toolchain-with-support-for-hip-nvidia)
- [Build DPC++ toolchain with support for ESIMD CPU Emulation](#build-dpc-toolchain-with-support-for-esimd-emulator)
- [Build DPC++ toolchain with support for runtime kernel fusion](#build-dpc-toolchain-with-support-for-runtime-kernel-fusion)
- [Build Doxygen documentation](#build-doxygen-documentation)
- [Deployment](#deployment)
- [Use DPC++ toolchain](#use-dpc-toolchain)
Expand Down Expand Up @@ -298,6 +299,14 @@ Enabling this flag requires following packages installed.
Currently, this feature was tested and verified on Ubuntu 20.04
environment.

### Build DPC++ toolchain with support for runtime kernel fusion

Support for the experimental SYCL extension for user-driven kernel fusion
at runtime is enabled by default.

To disable support for this feature, follow the instructions for the
Linux DPC++ toolchain, but add the `--disable-fusion` flag.

### Build Doxygen documentation

Building Doxygen documentation is similar to building the product itself. First,
Expand Down
4 changes: 4 additions & 0 deletions sycl/doc/developer/ContributeToDPCPP.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Contributing to DPC++

## General guidelines

Read [CONTRIBUTING.md](/CONTRIBUTING.md) first.

## Maintaining stable ABI/API

All changes made to the DPC++ compiler and runtime library should generally
Expand Down
9 changes: 9 additions & 0 deletions sycl/include/CL/__spirv/spirv_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,22 @@ enum class GroupOperation : uint32_t {
ExclusiveScan = 2
};

#if (SYCL_EXT_ONEAPI_MATRIX_VERSION > 1)
enum class MatrixLayout : uint32_t {
RowMajor = 0,
ColumnMajor = 1,
Packed = 2,
Dynamic = 3
};
#else
enum class MatrixLayout : uint32_t {
RowMajor = 0,
ColumnMajor = 1,
PackedA = 2,
PackedB = 3,
Unused = 4
};
#endif

enum class MatrixUse : uint32_t { MatrixA = 0, MatrixB = 1, Accumulator = 2 };

Expand Down
4 changes: 4 additions & 0 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ class CG {

CGTYPE getType() { return MType; }

std::vector<std::vector<char>> &getArgsStorage() { return MArgsStorage; }

std::vector<detail::AccessorImplPtr> &getAccStorage() { return MAccStorage; }

virtual ~CG() = default;

private:
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/sycl/detail/defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#endif
#endif

#if __has_attribute(sycl_special_class)
#if __has_attribute(sycl_special_class) && (defined __SYCL_DEVICE_ONLY__)
#define __SYCL_SPECIAL_CLASS __attribute__((sycl_special_class))
#else
#define __SYCL_SPECIAL_CLASS
Expand Down
58 changes: 39 additions & 19 deletions sycl/include/sycl/ext/oneapi/device_global/device_global.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <sycl/exception.hpp>
#include <sycl/ext/oneapi/device_global/properties.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/pointers.hpp>

#ifdef __SYCL_DEVICE_ONLY__
#define __SYCL_HOST_NOT_SUPPORTED(Op)
Expand Down Expand Up @@ -42,9 +43,27 @@ struct HasArrowOperator<
template <typename T, typename PropertyListT, typename = void>
class device_global_base {
protected:
T *usmptr;
T *get_ptr() noexcept { return usmptr; }
const T *get_ptr() const noexcept { return usmptr; }
using pointer_t = typename decorated_global_ptr<T>::pointer;
pointer_t usmptr;
pointer_t get_ptr() noexcept { return usmptr; }
const pointer_t get_ptr() const noexcept { return usmptr; }

public:
template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return multi_ptr<T, access::address_space::global_space, IsDecorated>{
get_ptr()};
}

template <access::decorated IsDecorated>
multi_ptr<const T, access::address_space::global_space, IsDecorated>
get_multi_ptr() const noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return multi_ptr<const T, access::address_space::global_space, IsDecorated>{
get_ptr()};
}
};

// Specialization of device_global base class for when device_image_scope is in
Expand All @@ -58,6 +77,23 @@ class device_global_base<
T val{};
T *get_ptr() noexcept { return &val; }
const T *get_ptr() const noexcept { return &val; }

public:
template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return address_space_cast<access::address_space::global_space, IsDecorated,
T>(this->get_ptr());
}

template <access::decorated IsDecorated>
multi_ptr<const T, access::address_space::global_space, IsDecorated>
get_multi_ptr() const noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return address_space_cast<access::address_space::global_space, IsDecorated,
const T>(this->get_ptr());
}
};
} // namespace detail

Expand Down Expand Up @@ -113,22 +149,6 @@ class
device_global &operator=(const device_global &) = delete;
device_global &operator=(const device_global &&) = delete;

template <access::decorated IsDecorated>
multi_ptr<T, access::address_space::global_space, IsDecorated>
get_multi_ptr() noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return address_space_cast<access::address_space::global_space, IsDecorated>(
this->get_ptr());
}

template <access::decorated IsDecorated>
multi_ptr<const T, access::address_space::global_space, IsDecorated>
get_multi_ptr() const noexcept {
__SYCL_HOST_NOT_SUPPORTED("get_multi_ptr()")
return address_space_cast<access::address_space::global_space, IsDecorated,
const T>(this->get_ptr());
}

T &get() noexcept {
__SYCL_HOST_NOT_SUPPORTED("get()")
return *this->get_ptr();
Expand Down
Loading