Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/hip/hcc_detail/hip_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ __device__ float __shfl(float input, int lane, int width = warpSize);
__device__ float __shfl_up(float input, unsigned int lane_delta, int width = warpSize);
__device__ float __shfl_down(float input, unsigned int lane_delta, int width = warpSize);
__device__ float __shfl_xor(float input, int lane_mask, int width = warpSize);
__device__ double __shfl(double input, int lane, int width = warpSize);
__device__ double __shfl_up(double input, unsigned int lane_delta, int width = warpSize);
__device__ double __shfl_down(double input, unsigned int lane_delta, int width = warpSize);
__device__ double __shfl_xor(double input, int lane_mask, int width = warpSize);
#else
__device__ int __shfl(int input, int lane, int width);
__device__ int __shfl_up(int input, unsigned int lane_delta, int width);
Expand All @@ -296,6 +300,10 @@ __device__ float __shfl(float input, int lane, int width);
__device__ float __shfl_up(float input, unsigned int lane_delta, int width);
__device__ float __shfl_down(float input, unsigned int lane_delta, int width);
__device__ float __shfl_xor(float input, int lane_mask, int width);
__device__ double __shfl(double input, int lane, int width);
__device__ double __shfl_up(double input, unsigned int lane_delta, int width);
__device__ double __shfl_down(double input, unsigned int lane_delta, int width);
__device__ double __shfl_xor(double input, int lane_mask, int width);
#endif //__cplusplus

__device__ unsigned __hip_ds_bpermute(int index, unsigned src);
Expand Down
16 changes: 15 additions & 1 deletion src/device_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,20 @@ __device__ float __shfl_xor(float input, int lane_mask, int width) {
return hc::__shfl_xor(input, lane_mask, width);
}

__device__ double __shfl(double input, int lane, int width) { return hc::__shfl(input, lane, width); }

__device__ double __shfl_up(double input, unsigned int lane_delta, int width) {
return hc::__shfl_up(input, lane_delta, width);
}

__device__ double __shfl_down(double input, unsigned int lane_delta, int width) {
return hc::__shfl_down(input, lane_delta, width);
}

__device__ double __shfl_xor(double input, int lane_mask, int width) {
return hc::__shfl_xor(input, lane_mask, width);
}

__host__ __device__ int min(int arg1, int arg2) {
return (int)(hc::precise_math::fmin((float)arg1, (float)arg2));
}
Expand All @@ -333,4 +347,4 @@ __device__ void* __get_dynamicgroupbaseptr() {
__host__ void* __get_dynamicgroupbaseptr() { return nullptr; }


__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }
__device__ void __threadfence_system(void) { std::atomic_thread_fence(std::memory_order_seq_cst); }