Add hipclang amdgcn functions#515
Conversation
|
So far I have added assertfail, ballot, clock, lanemask, memory, named sync, smid, sync, and trap. These previously existed as .cl and .ll files in https://github.com/RadeonOpenCompute/ROCm-Device-Libs/tree/9b46f23b1795b56600bb39ff2594ce129ab0222e/hip/src |
|
|
||
| // hip.amdgcn.bc - sync threads | ||
| // extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); | ||
| #define CLK_LOCAL_MEM_FENCE 0x01 |
There was a problem hiding this comment.
please prefix with __, since we don't want to pollute users' name space
| // hip.amdgcn.bc - sync threads | ||
| // extern "C" __device__ __attribute__((noduplicate)) void __syncthreads(); | ||
| #define CLK_LOCAL_MEM_FENCE 0x01 | ||
| #define local __attribute__((address_space(3))) |
| #define CLK_LOCAL_MEM_FENCE 0x01 | ||
| #define local __attribute__((address_space(3))) | ||
|
|
||
| typedef unsigned cl_mem_fence_flags; |
|
|
||
| typedef unsigned cl_mem_fence_flags; | ||
|
|
||
| typedef enum memory_scope { |
| } memory_scope; | ||
|
|
||
| // enum values aligned with what clang uses in EmitAtomicExpr() | ||
| typedef enum memory_order |
| } memory_order; | ||
|
|
||
| extern "C" __device__ __attribute__((overloadable)) | ||
| void atomic_work_item_fence(cl_mem_fence_flags, memory_order, memory_scope); |
There was a problem hiding this comment.
we cannot introduce this function name without prefix __. we need either implement hc_work_group_barrier with functions the name of which is prefixed with __, or add alias prefixed with __ to atomic_work_item_fence in the device library.
|
|
||
| __device__ | ||
| inline | ||
| void* __get_dynamicgroupbaseptr() { return get_dynamic_group_segment_base_pointer(); } |
There was a problem hiding this comment.
can we remove get_dynamic_group_segment_base_pointer and implement __get_dynamicgroupbaseptr directly? I think the disabled code in __amdgcn_get_dynamicgroupbaseptr is related to this.
|
|
||
| __device__ | ||
| inline | ||
| static void hc_work_group_barrier(__cl_mem_fence_flags flags, __memory_scope scope) |
There was a problem hiding this comment.
can we rename it to __work_group_barrier?
|
|
||
| __device__ | ||
| inline | ||
| static void hc_barrier(int n) |
There was a problem hiding this comment.
can we rename it as __barrier?
|
I think it is cleaner to create two new header files: llvm_intrinsics.h - containing declarations for wrapper functions for llvm intrinsics like llvm.amdgcn.s.barrier device_library_decls.h - containing declarations for types and functions in device library Other changes in this patch can be moved to device_functions.h |
|
I'm working on separating this into 3 files, llvm_intrinsics.h, device_library_decls.h, and device_functions.h. Thanks @yxsamliu |
f74ce78 to
219e717
Compare
These are moving from hipclang in device library to hip headers. These are required for the functionality of HIPclang project.
Move all Integer Intrinsics, device_functions.cpp definitions and HIP specific device functions into HIP headers. Implement the device functions using llvm_intrinsics and device-libs functions instead of calling hc::__* functions. Remove device_functions.cpp since everything is now defined in header.
Move all __hip_hc_ir_* functions from hip_hc.ll into HIP header as inline asm. Remove hip_hc.ll and build dependencies from HIP.
Fix surface test on HIP clang path.
|
@mangupta I've added a few more changes to allow hip-clang path to pass tests: hipSurfaceObj2D, hip_test_ldg, hip_threadfence_system, and hipNullStream. Thanks! |
| return ballot; | ||
| } | ||
|
|
||
| #ifdef __HIP_DEVICE_COMPILE__ |
There was a problem hiding this comment.
we still need the device functions to be declared for host compilation, therefore the code should be like this
#ifdef __HIP_DEVICE_COMPILE__
__device__
inline
void* __get_dynamicgroupbaseptr()
{
// Get group segment base pointer.
return (char*)__local_to_generic(__to_local(__llvm_amdgcn_groupstaticsize()));
}
#else
__device__
void* __get_dynamicgroupbaseptr();
#endif // __HIP_DEVICE_COMPILE__
There was a problem hiding this comment.
Hi Sam, I've cherry-picked your fixes into this PR related to your review. Thanks!
|
Pleas hold on. We need Brian to review it. Thanks. |
|
@b-sumner - I will make a new commit to remove the old usage of __activelanemake_v4_b64_b1. |
Replace implementation of __any and __all functions using OCKL functions and replaced __ballot implementation to use llvm intrinsic llvm.amdgcn.icmp.i32 instead of calls to __activelanemask_v4_b64_b1 which is not convergent.
|
|
||
| extern "C" __device__ int32_t __ockl_activelane_u32(void); | ||
|
|
||
| extern "C" __device__ uint __ockl_mul24_u32(uint, uint); |
There was a problem hiding this comment.
Please copy the function attributes these functions are given in ocml.h.
| //__device__ void __threadfence_system(void) __attribute__((deprecated("Provided with workaround | ||
| //configuration, see hip_kernel_language.md for details"))); | ||
| __device__ void __threadfence_system(void); | ||
| __device__ inline static int min(int arg1, int arg2) { |
There was a problem hiding this comment.
These can't work. An f32 can not exactly represent every i32. Just use arg1 < arg2 ? arg1 : arg2. The compiler will do the rest.
|
|
||
| #include "hip/hcc_detail/host_defines.h" | ||
|
|
||
| __device__ ulong __llvm_amdgcn_icmp_i32(uint x, uint y, uint z) __asm("llvm.amdgcn.icmp.i32"); |
There was a problem hiding this comment.
This should have the convergent attribute if it is really needed.
| __device__ static inline int __float2int_ru(float x) { return (int)__ocml_ceil_f32(x); } | ||
| __device__ static inline int __float2int_rz(float x) { return (int)__ocml_trunc_f32(x); } | ||
|
|
||
| __device__ static inline long long int __float2ll_rd(float x) { return (long long int)x; } |
There was a problem hiding this comment.
These are not correct. There should at least be a comment indicating that. Are correct implementations needed?
There was a problem hiding this comment.
I copied these conversion functions from old HIP implementation. How can I fix it?
There was a problem hiding this comment.
I think the correct versions cause isel failures in backend. Probably just add ToDo.
|
|
||
| __device__ static inline double __int2double_rn(int x) { return (double)x; } | ||
|
|
||
| __device__ static inline float __int2float_rd(int x) { return (float)x; } |
There was a problem hiding this comment.
These are not correct. Do they need to be?
|
I only noted a few of the conversion function problems. There are several others. |
|
Thanks @b-sumner, I've added the missing attributes, changed llvm.amdgcn.icmp.i32 asm and changed min/max as you suggested. For the conversion functions, I've left a TODO comment for the future. |
Hi Brian, I've revised to your reviews. Let me know if it is resolved. Also we've discussed that we can keep the conversion functions as it is for now with a TODO.
|
Looks good to me. I will work on the conversion functions after this is checked in. |
|
@mangupta ping - we need this merged for internal HIP testing and investigation. |
These are moving from hipclang in device library to hip headers. These are required for the functionality of HIPclang project.