29
29
#define SHARED __attribute__ ((shared))
30
30
#define ALIGN (N ) __attribute__((aligned(N)))
31
31
32
- #include " hip_atomics.h"
33
-
34
32
// //////////////////////////////////////////////////////////////////////////////
35
33
// Kernel options
36
34
// //////////////////////////////////////////////////////////////////////////////
@@ -65,6 +63,10 @@ enum DATA_SHARING_SIZES {
65
63
DS_Max_Warp_Number = 16 ,
66
64
};
67
65
66
+ enum : __kmpc_impl_lanemask_t {
67
+ __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t )0
68
+ };
69
+
68
70
INLINE void __kmpc_impl_unpack (uint64_t val, uint32_t &lo, uint32_t &hi) {
69
71
lo = (uint32_t )(val & UINT64_C (0x00000000FFFFFFFF ));
70
72
hi = (uint32_t )((val & UINT64_C (0xFFFFFFFF00000000 )) >> 32 );
@@ -74,27 +76,15 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
74
76
return (((uint64_t )hi) << 32 ) | (uint64_t )lo;
75
77
}
76
78
77
- static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes =
78
- UINT64_C (0xffffffffffffffff );
79
-
80
79
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt ();
81
-
82
80
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt ();
83
-
84
81
DEVICE uint32_t __kmpc_impl_smid ();
85
-
86
82
DEVICE double __kmpc_impl_get_wtick ();
87
-
88
83
DEVICE double __kmpc_impl_get_wtime ();
89
84
90
85
INLINE uint64_t __kmpc_impl_ffs (uint64_t x) { return __builtin_ffsl (x); }
91
-
92
86
INLINE uint64_t __kmpc_impl_popc (uint64_t x) { return __builtin_popcountl (x); }
93
87
94
- template <typename T> INLINE T __kmpc_impl_min (T x, T y) {
95
- return x < y ? x : y;
96
- }
97
-
98
88
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask ();
99
89
100
90
DEVICE int32_t __kmpc_impl_shfl_sync (__kmpc_impl_lanemask_t , int32_t Var,
@@ -135,6 +125,31 @@ DEVICE int GetNumberOfThreadsInBlock();
135
125
DEVICE unsigned GetWarpId ();
136
126
DEVICE unsigned GetLaneId ();
137
127
128
+ // Atomics
129
+ template <typename T> INLINE T __kmpc_atomic_add (T *address, T val) {
130
+ return __atomic_fetch_add (address, val, __ATOMIC_SEQ_CST);
131
+ }
132
+
133
+ INLINE uint32_t __kmpc_atomic_inc (uint32_t *address, uint32_t max) {
134
+ return __builtin_amdgcn_atomic_inc32 (address, max, __ATOMIC_SEQ_CST, " " );
135
+ }
136
+
137
+ template <typename T> INLINE T __kmpc_atomic_max (T *address, T val) {
138
+ return __atomic_fetch_max (address, val, __ATOMIC_SEQ_CST);
139
+ }
140
+
141
+ template <typename T> INLINE T __kmpc_atomic_exchange (T *address, T val) {
142
+ T r;
143
+ __atomic_exchange (address, &val, &r, __ATOMIC_SEQ_CST);
144
+ return r;
145
+ }
146
+
147
+ template <typename T> INLINE T __kmpc_atomic_cas (T *address, T compare, T val) {
148
+ (void )__atomic_compare_exchange (address, &compare, &val, false ,
149
+ __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
150
+ return compare;
151
+ }
152
+
138
153
// Locks
139
154
DEVICE void __kmpc_impl_init_lock (omp_lock_t *lock);
140
155
DEVICE void __kmpc_impl_destroy_lock (omp_lock_t *lock);
0 commit comments