Skip to content

Commit dc37294

Browse files
Merge commit '435558d0d02526aa4892671ca0871070a3c3cd56' into bolt-435558
2 parents 8f81ee5 + 435558d commit dc37294

File tree

107 files changed

+2895
-1641
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+2895
-1641
lines changed

CMakeLists.txt

+9-5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ else()
4848
set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe)
4949
set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe)
5050
endif()
51+
52+
list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LLVM_MAIN_INCLUDE_DIR} ${LLVM_BINARY_DIR}/include)
5153
endif()
5254

5355
# Check and set up common compiler flags.
@@ -78,21 +80,23 @@ if (APPLE OR WIN32 OR NOT OPENMP_HAVE_STD_CPP14_FLAG)
7880
endif()
7981

8082
# Attempt to locate LLVM source, required by libomptarget
81-
if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR)
83+
if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS)
8284
if (LLVM_MAIN_INCLUDE_DIR)
83-
set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_INCLUDE_DIR})
85+
list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LLVM_MAIN_INCLUDE_DIR})
8486
elseif (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include)
85-
set(LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include)
87+
list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../llvm/include)
8688
endif()
8789
endif()
8890

89-
if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR)
90-
message(STATUS "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR, disabling libomptarget")
91+
if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS)
92+
message(STATUS "Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS, disabling libomptarget")
9193
set(ENABLE_LIBOMPTARGET OFF)
9294
endif()
9395

9496
option(OPENMP_ENABLE_LIBOMPTARGET "Enable building libomptarget for offloading."
9597
${ENABLE_LIBOMPTARGET})
98+
option(OPENMP_ENABLE_LIBOMPTARGET_PROFILING "Enable time profiling for libomptarget."
99+
${ENABLE_LIBOMPTARGET})
96100
if (OPENMP_ENABLE_LIBOMPTARGET)
97101
# Check that the library can actually be built.
98102
if (APPLE OR WIN32)

cmake/OpenMPTesting.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,14 @@ function(add_openmp_testsuite target comment)
190190
${comment}
191191
${ARG_UNPARSED_ARGUMENTS}
192192
EXCLUDE_FROM_CHECK_ALL
193-
DEPENDS clang clang-resource-headers FileCheck ${ARG_DEPENDS}
193+
DEPENDS clang FileCheck ${ARG_DEPENDS}
194194
ARGS ${ARG_ARGS}
195195
)
196196
else()
197197
add_lit_testsuite(${target}
198198
${comment}
199199
${ARG_UNPARSED_ARGUMENTS}
200-
DEPENDS clang clang-resource-headers FileCheck ${ARG_DEPENDS}
200+
DEPENDS clang FileCheck ${ARG_DEPENDS}
201201
ARGS ${ARG_ARGS}
202202
)
203203
endif()

libomptarget/CMakeLists.txt

+14-18
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
##===----------------------------------------------------------------------===##
2-
#
2+
#
33
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
# See https://llvm.org/LICENSE.txt for license information.
55
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6-
#
6+
#
77
##===----------------------------------------------------------------------===##
88
#
99
# Build offloading library and related plugins.
@@ -17,11 +17,12 @@ endif()
1717
# Add cmake directory to search for custom cmake functions.
1818
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules ${CMAKE_MODULE_PATH})
1919

20-
if(OPENMP_STANDALONE_BUILD)
21-
# Build all libraries into a common place so that tests can find them.
22-
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
23-
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
24-
endif()
20+
# Set the path of all resulting libraries to a unified location so that it can
21+
# be used for testing.
22+
set(LIBOMPTARGET_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
23+
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LIBOMPTARGET_LIBRARY_DIR})
24+
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LIBOMPTARGET_LIBRARY_DIR})
25+
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LIBOMPTARGET_LIBRARY_DIR})
2526

2627
# Message utilities.
2728
include(LibomptargetUtils)
@@ -30,8 +31,8 @@ include(LibomptargetUtils)
3031
include(LibomptargetGetDependencies)
3132

3233
# LLVM source tree is required at build time for libomptarget
33-
if (NOT LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR)
34-
message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_MAIN_INCLUDE_DIR")
34+
if (NOT LIBOMPTARGET_LLVM_INCLUDE_DIRS)
35+
message(FATAL_ERROR "Missing definition for LIBOMPTARGET_LLVM_INCLUDE_DIRS")
3536
endif()
3637

3738
# This is a list of all the targets that are supported/tested right now.
@@ -59,17 +60,12 @@ if(LIBOMPTARGET_ENABLE_DEBUG)
5960
add_definitions(-DOMPTARGET_DEBUG)
6061
endif()
6162

62-
include_directories(include)
63+
set(LIBOMPTARGET_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
64+
include_directories(${LIBOMPTARGET_INCLUDE_DIR})
6365

6466
# Build target agnostic offloading library.
65-
add_subdirectory(src)
66-
67-
# Retrieve the path to the resulting library so that it can be used for
68-
# testing.
69-
get_target_property(LIBOMPTARGET_LIBRARY_DIR bolt-omptarget LIBRARY_OUTPUT_DIRECTORY)
70-
if(NOT LIBOMPTARGET_LIBRARY_DIR)
71-
set(LIBOMPTARGET_LIBRARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
72-
endif()
67+
set(LIBOMPTARGET_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
68+
add_subdirectory(${LIBOMPTARGET_SRC_DIR})
7369

7470
# Definitions for testing, for reuse when testing libomptarget-nvptx.
7571
if(OPENMP_STANDALONE_BUILD)

libomptarget/deviceRTLs/amdgcn/CMakeLists.txt

-2
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,12 @@ set(cuda_sources
7373

7474
set(h_files
7575
${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
76-
${CMAKE_CURRENT_SOURCE_DIR}/src/hip_atomics.h
7776
${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
7877
${devicertl_base_directory}/common/debug.h
7978
${devicertl_base_directory}/common/device_environment.h
8079
${devicertl_base_directory}/common/omptarget.h
8180
${devicertl_base_directory}/common/omptargeti.h
8281
${devicertl_base_directory}/common/state-queue.h
83-
${devicertl_base_directory}/common/target_atomic.h
8482
${devicertl_base_directory}/common/state-queuei.h
8583
${devicertl_base_directory}/common/support.h)
8684

libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h

+2
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,6 @@
1515
typedef uint64_t __kmpc_impl_lanemask_t;
1616
typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
1717

18+
EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads();
19+
1820
#endif

libomptarget/deviceRTLs/amdgcn/src/amdgcn_locks.hip

+3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
// a SIMD => wavefront mapping once that is implemented.
1515
//
1616
//===----------------------------------------------------------------------===//
17+
#pragma omp declare target
1718

1819
#include "common/debug.h"
1920

@@ -26,3 +27,5 @@ DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *) { warn(); }
2627
DEVICE void __kmpc_impl_set_lock(omp_lock_t *) { warn(); }
2728
DEVICE void __kmpc_impl_unset_lock(omp_lock_t *) { warn(); }
2829
DEVICE int __kmpc_impl_test_lock(omp_lock_t *lock) { warn(); }
30+
31+
#pragma omp end declare target

libomptarget/deviceRTLs/amdgcn/src/amdgcn_smid.hip

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
#pragma omp declare target
89

910
#include "target_impl.h"
1011

@@ -59,3 +60,5 @@ DEVICE uint32_t __kmpc_impl_smid() {
5960
ENCODE_HWREG(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID));
6061
return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
6162
}
63+
64+
#pragma omp end declare target

libomptarget/deviceRTLs/amdgcn/src/hip_atomics.h

-41
This file was deleted.

libomptarget/deviceRTLs/amdgcn/src/target_impl.h

+29-14
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929
#define SHARED __attribute__((shared))
3030
#define ALIGN(N) __attribute__((aligned(N)))
3131

32-
#include "hip_atomics.h"
33-
3432
////////////////////////////////////////////////////////////////////////////////
3533
// Kernel options
3634
////////////////////////////////////////////////////////////////////////////////
@@ -65,6 +63,10 @@ enum DATA_SHARING_SIZES {
6563
DS_Max_Warp_Number = 16,
6664
};
6765

66+
enum : __kmpc_impl_lanemask_t {
67+
__kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
68+
};
69+
6870
INLINE void __kmpc_impl_unpack(uint64_t val, uint32_t &lo, uint32_t &hi) {
6971
lo = (uint32_t)(val & UINT64_C(0x00000000FFFFFFFF));
7072
hi = (uint32_t)((val & UINT64_C(0xFFFFFFFF00000000)) >> 32);
@@ -74,27 +76,15 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
7476
return (((uint64_t)hi) << 32) | (uint64_t)lo;
7577
}
7678

77-
static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes =
78-
UINT64_C(0xffffffffffffffff);
79-
8079
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt();
81-
8280
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt();
83-
8481
DEVICE uint32_t __kmpc_impl_smid();
85-
8682
DEVICE double __kmpc_impl_get_wtick();
87-
8883
DEVICE double __kmpc_impl_get_wtime();
8984

9085
INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
91-
9286
INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
9387

94-
template <typename T> INLINE T __kmpc_impl_min(T x, T y) {
95-
return x < y ? x : y;
96-
}
97-
9888
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask();
9989

10090
DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
@@ -135,6 +125,31 @@ DEVICE int GetNumberOfThreadsInBlock();
135125
DEVICE unsigned GetWarpId();
136126
DEVICE unsigned GetLaneId();
137127

128+
// Atomics
129+
template <typename T> INLINE T __kmpc_atomic_add(T *address, T val) {
130+
return __atomic_fetch_add(address, val, __ATOMIC_SEQ_CST);
131+
}
132+
133+
INLINE uint32_t __kmpc_atomic_inc(uint32_t *address, uint32_t max) {
134+
return __builtin_amdgcn_atomic_inc32(address, max, __ATOMIC_SEQ_CST, "");
135+
}
136+
137+
template <typename T> INLINE T __kmpc_atomic_max(T *address, T val) {
138+
return __atomic_fetch_max(address, val, __ATOMIC_SEQ_CST);
139+
}
140+
141+
template <typename T> INLINE T __kmpc_atomic_exchange(T *address, T val) {
142+
T r;
143+
__atomic_exchange(address, &val, &r, __ATOMIC_SEQ_CST);
144+
return r;
145+
}
146+
147+
template <typename T> INLINE T __kmpc_atomic_cas(T *address, T compare, T val) {
148+
(void)__atomic_compare_exchange(address, &compare, &val, false,
149+
__ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
150+
return compare;
151+
}
152+
138153
// Locks
139154
DEVICE void __kmpc_impl_init_lock(omp_lock_t *lock);
140155
DEVICE void __kmpc_impl_destroy_lock(omp_lock_t *lock);

libomptarget/deviceRTLs/amdgcn/src/target_impl.hip

+8-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// Definitions of target specific functions
1010
//
1111
//===----------------------------------------------------------------------===//
12+
#pragma omp declare target
1213

1314
#include "target_impl.h"
1415

@@ -144,6 +145,12 @@ DEVICE unsigned GetLaneId() {
144145
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
145146
}
146147

148+
EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() {
149+
return GetNumberOfThreadsInBlock();
150+
}
151+
147152
// Stub implementations
148-
DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr }
153+
DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; }
149154
DEVICE void __kmpc_impl_free(void *) {}
155+
156+
#pragma omp end declare target

libomptarget/deviceRTLs/common/omptarget.h

+11-10
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ class omptarget_nvptx_SharedArgs {
7474
extern DEVICE SHARED omptarget_nvptx_SharedArgs
7575
omptarget_nvptx_globalArgs;
7676

77+
// Worker slot type which is initialized with the default worker slot
78+
// size of 4*32 bytes.
79+
struct __kmpc_data_sharing_slot {
80+
__kmpc_data_sharing_slot *Next;
81+
__kmpc_data_sharing_slot *Prev;
82+
void *PrevSlotStackPtr;
83+
void *DataEnd;
84+
char Data[DS_Worker_Warp_Slot_Size];
85+
};
86+
7787
// Data structure to keep in shared memory that traces the current slot, stack,
7888
// and frame pointer as well as the active threads that didn't exit the current
7989
// environment.
@@ -83,15 +93,6 @@ struct DataSharingStateTy {
8393
void * volatile FramePtr[DS_Max_Warp_Number];
8494
__kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
8595
};
86-
// Additional worker slot type which is initialized with the default worker slot
87-
// size of 4*32 bytes.
88-
struct __kmpc_data_sharing_worker_slot_static {
89-
__kmpc_data_sharing_slot *Next;
90-
__kmpc_data_sharing_slot *Prev;
91-
void *PrevSlotStackPtr;
92-
void *DataEnd;
93-
char Data[DS_Worker_Warp_Slot_Size];
94-
};
9596

9697
extern DEVICE SHARED DataSharingStateTy DataSharingState;
9798

@@ -213,7 +214,7 @@ class omptarget_nvptx_TeamDescr {
213214
workDescrForActiveParallel; // one, ONLY for the active par
214215

215216
ALIGN(16)
216-
__kmpc_data_sharing_worker_slot_static worker_rootS[DS_Max_Warp_Number];
217+
__kmpc_data_sharing_slot worker_rootS[DS_Max_Warp_Number];
217218
};
218219

219220
////////////////////////////////////////////////////////////////////////////////

libomptarget/deviceRTLs/common/omptargeti.h

-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14-
#include "common/target_atomic.h"
15-
1614
////////////////////////////////////////////////////////////////////////////////
1715
// Task Descriptor
1816
////////////////////////////////////////////////////////////////////////////////

libomptarget/deviceRTLs/common/src/cancel.cu

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// Interface to be used in the implementation of OpenMP cancel.
1010
//
1111
//===----------------------------------------------------------------------===//
12+
#pragma omp declare target
1213

1314
#include "interface.h"
1415
#include "common/debug.h"
@@ -26,3 +27,5 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
2627
// disabled
2728
return 0;
2829
}
30+
31+
#pragma omp end declare target

libomptarget/deviceRTLs/common/src/critical.cu

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// This file contains the implementation of critical with KMPC interface
1010
//
1111
//===----------------------------------------------------------------------===//
12+
#pragma omp declare target
1213

1314
#include "interface.h"
1415
#include "common/debug.h"
@@ -26,3 +27,5 @@ void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
2627
PRINT0(LD_IO, "call to kmpc_end_critical()\n");
2728
omp_unset_lock((omp_lock_t *)lck);
2829
}
30+
31+
#pragma omp end declare target

0 commit comments

Comments
 (0)