Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions openmp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ else()
endif()

# Use the current compiler target to determine the appropriate runtime to build.
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx" OR
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx")
if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn|^nvptx|^spirv64" OR
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^amdgcn|^nvptx|^spirv64")
add_subdirectory(device)
else()
add_subdirectory(module)
Expand Down
16 changes: 13 additions & 3 deletions openmp/device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ if("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^amdgcn" OR
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^nvptx" OR
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^nvptx")
set(target_name "nvptx")
list(APPEND compile_flags --cuda-feature=+ptx63)
list(APPEND compile_options --cuda-feature=+ptx63)
elseif("${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv64" OR
"${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv64")
set(target_name "spirv")
endif()

# Trick to combine these into a bitcode file via the linker's LTO pass.
Expand All @@ -110,8 +113,15 @@ target_include_directories(libompdevice PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/../../libc
${CMAKE_CURRENT_SOURCE_DIR}/../../offload/include)
target_compile_options(libompdevice PRIVATE ${compile_options} ${compile_flags})
target_link_options(libompdevice PRIVATE
"-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
if(NOT "${LLVM_DEFAULT_TARGET_TRIPLE}" MATCHES "^spirv" AND
NOT "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "^spirv")
target_link_options(libompdevice PRIVATE
"-flto" "-r" "-nostdlib" "-Wl,--lto-emit-llvm")
else()
target_link_options(libompdevice PRIVATE
"-nostdlib" "-emit-llvm")
endif()

if(LLVM_DEFAULT_TARGET_TRIPLE)
target_link_options(libompdevice PRIVATE "--target=${LLVM_DEFAULT_TARGET_TRIPLE}")
endif()
Expand Down
12 changes: 11 additions & 1 deletion openmp/device/include/DeviceTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,17 @@ struct IdentTy {

using __kmpc_impl_lanemask_t = LaneMaskTy;

using ParallelRegionFnTy = void *;
#ifdef __SPIRV__
// Function pointers in SPIRV backend have a special address space 9.
// Since function pointers are passed as regular void * pointers it is
// necessary to annotate them with proper address space to avoid casting
// errors during compilation.
using FnPtrTy = void [[clang::address_space(9)]] *;
#else
using FnPtrTy = void *;
#endif

using ParallelRegionFnTy = FnPtrTy;

using CriticalNameTy = int32_t[8];

Expand Down
2 changes: 1 addition & 1 deletion openmp/device/include/State.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
__builtin_unreachable();
}

[[gnu::always_inline, gnu::flatten]] inline void *&
[[gnu::always_inline, gnu::flatten]] inline FnPtrTy &
lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
switch (Kind) {
case state::VK_ParallelRegionFn:
Expand Down
13 changes: 13 additions & 0 deletions openmp/device/include/Synchronization.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,20 @@ enum MemScopeTy {
template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
V inc(Ty *Address, V Val, atomic::OrderingTy Ordering,
MemScopeTy MemScope = MemScopeTy::device) {
#if defined(__SPIRV__)
uint32_t Old;
while (true) {
Old = load(Address, Ordering, MemScope);
if (Old >= Val) {
if (cas(Address, Old, 0u, Ordering, Ordering, MemScope))
break;
} else if (cas(Address, Old, Old + 1, Ordering, Ordering, MemScope))
break;
}
return Old;
#else
return __scoped_atomic_fetch_uinc(Address, Val, Ordering, MemScope);
#endif
}

template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
Expand Down
3 changes: 2 additions & 1 deletion openmp/device/src/Allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ extern "C" {
}
#endif

#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
//#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
#if (defined(__AMDGPU__) || defined(__SPIRV__)) && !defined(OMPTARGET_HAS_LIBC)
[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
#else
Expand Down
10 changes: 5 additions & 5 deletions openmp/device/src/Parallelism.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {

// Invoke an outlined parallel function unwrapping arguments (up to 32).
[[clang::always_inline]] void invokeMicrotask(int32_t global_tid,
int32_t bound_tid, void *fn,
int32_t bound_tid, FnPtrTy fn,
void **args, int64_t nargs) {
switch (nargs) {
#include "generated_microtask_cases.gen"
Expand All @@ -84,7 +84,7 @@ extern "C" {

[[clang::always_inline]] void __kmpc_parallel_spmd(IdentTy *ident,
int32_t num_threads,
void *fn, void **args,
FnPtrTy fn, void **args,
const int64_t nargs) {
uint32_t TId = mapping::getThreadIdInBlock();
uint32_t NumThreads = determineNumberOfThreads(num_threads);
Expand Down Expand Up @@ -142,8 +142,8 @@ extern "C" {

[[clang::always_inline]] void
__kmpc_parallel_60(IdentTy *ident, int32_t, int32_t if_expr,
int32_t num_threads, int proc_bind, void *fn,
void *wrapper_fn, void **args, int64_t nargs,
int32_t num_threads, int proc_bind, FnPtrTy fn,
FnPtrTy wrapper_fn, void **args, int64_t nargs,
int32_t nt_strict) {
uint32_t TId = mapping::getThreadIdInBlock();

Expand Down Expand Up @@ -261,7 +261,7 @@ __kmpc_parallel_60(IdentTy *ident, int32_t, int32_t if_expr,
1u, true, ident,
/*ForceTeamState=*/true);
state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
(void *)nullptr, true, ident,
(FnPtrTy) nullptr, true, ident,
/*ForceTeamState=*/true);
state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident,
/*ForceTeamState=*/true);
Expand Down
31 changes: 31 additions & 0 deletions openmp/device/src/Synchronization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,37 @@ void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
#endif
///}

#if defined(__SPIRV__)
void namedBarrierInit() { __builtin_trap(); } // TODO
void namedBarrier() { __builtin_trap(); } // TODO

void unsetLock(omp_lock_t *Lock) {
atomic::store((int32_t *)Lock, 0, atomic::seq_cst);
}
int testLock(omp_lock_t *Lock) {
return atomic::add((int32_t *)Lock, 0, atomic::seq_cst);
}
void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
void setLock(omp_lock_t *Lock) {
int32_t *Lock_ptr = (int32_t *)Lock;
bool Acquired = false;
int32_t Expected;
while (!Acquired) {
Expected = 0;
if (Expected == atomic::load(Lock_ptr, atomic::seq_cst))
Acquired =
atomic::cas(Lock_ptr, Expected, 1, atomic::seq_cst, atomic::seq_cst);
}
}

void unsetCriticalLock(omp_lock_t *Lock) { unsetLock(Lock); }
void setCriticalLock(omp_lock_t *Lock) { setLock(Lock); }
void syncThreadsAligned(atomic::OrderingTy Ordering) {
synchronize::threads(Ordering);
}
#endif

} // namespace impl

void synchronize::init(bool IsSPMD) {
Expand Down
Loading