Skip to content

Commit 35e6a9c

Browse files
committed
replace __inline__ with __forceinline__
1 parent f952a14 commit 35e6a9c

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

cuda_helper.h

+13-13
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ static __device__ __forceinline__ uint2 operator* (uint2 a, uint2 b)
668668

669669
// uint2 method
670670
#if __CUDA_ARCH__ >= 320 && !defined NOASM
671-
static __device__ __inline__ uint2 ROR2(const uint2 a, const int offset)
671+
static __device__ __forceinline__ uint2 ROR2(const uint2 a, const int offset)
672672
{
673673
uint2 result;
674674
if (offset < 32) {
@@ -682,7 +682,7 @@ static __device__ __inline__ uint2 ROR2(const uint2 a, const int offset)
682682
return result;
683683
}
684684
#else
685-
static __device__ __inline__ uint2 ROR2(const uint2 v, const int n)
685+
static __device__ __forceinline__ uint2 ROR2(const uint2 v, const int n)
686686
{
687687
uint2 result;
688688
if (n <= 32)
@@ -699,26 +699,26 @@ static __device__ __inline__ uint2 ROR2(const uint2 v, const int n)
699699
}
700700
#endif
701701

702-
static __device__ __inline__ uint32_t ROL8(const uint32_t x)
702+
static __device__ __forceinline__ uint32_t ROL8(const uint32_t x)
703703
{
704704
#ifdef __CUDA_ARCH__
705705
return __byte_perm(x, x, 0x2103);
706706
#endif
707707
}
708-
static __device__ __inline__ uint32_t ROL16(const uint32_t x)
708+
static __device__ __forceinline__ uint32_t ROL16(const uint32_t x)
709709
{
710710
#ifdef __CUDA_ARCH__
711711
return __byte_perm(x, x, 0x1032);
712712
#endif
713713
}
714-
static __device__ __inline__ uint32_t ROL24(const uint32_t x)
714+
static __device__ __forceinline__ uint32_t ROL24(const uint32_t x)
715715
{
716716
#ifdef __CUDA_ARCH__
717717
return __byte_perm(x, x, 0x0321);
718718
#endif
719719
}
720720

721-
static __device__ __inline__ uint2 ROR8(const uint2 a)
721+
static __device__ __forceinline__ uint2 ROR8(const uint2 a)
722722
{
723723
#ifdef __CUDA_ARCH__
724724
uint2 result;
@@ -729,7 +729,7 @@ static __device__ __inline__ uint2 ROR8(const uint2 a)
729729
#endif
730730
}
731731

732-
static __device__ __inline__ uint2 ROR16(const uint2 a)
732+
static __device__ __forceinline__ uint2 ROR16(const uint2 a)
733733
{
734734
#ifdef __CUDA_ARCH__
735735
uint2 result;
@@ -740,7 +740,7 @@ static __device__ __inline__ uint2 ROR16(const uint2 a)
740740
#endif
741741
}
742742

743-
static __device__ __inline__ uint2 ROR24(const uint2 a)
743+
static __device__ __forceinline__ uint2 ROR24(const uint2 a)
744744
{
745745
#ifdef __CUDA_ARCH__
746746
uint2 result;
@@ -751,7 +751,7 @@ static __device__ __inline__ uint2 ROR24(const uint2 a)
751751
#endif
752752
}
753753

754-
static __device__ __inline__ uint2 ROL8(const uint2 a)
754+
static __device__ __forceinline__ uint2 ROL8(const uint2 a)
755755
{
756756
#ifdef __CUDA_ARCH__
757757
uint2 result;
@@ -762,7 +762,7 @@ static __device__ __inline__ uint2 ROL8(const uint2 a)
762762
#endif
763763
}
764764

765-
static __device__ __inline__ uint2 ROL16(const uint2 a)
765+
static __device__ __forceinline__ uint2 ROL16(const uint2 a)
766766
{
767767
#ifdef __CUDA_ARCH__
768768
uint2 result;
@@ -773,7 +773,7 @@ static __device__ __inline__ uint2 ROL16(const uint2 a)
773773
#endif
774774
}
775775

776-
static __device__ __inline__ uint2 ROL24(const uint2 a)
776+
static __device__ __forceinline__ uint2 ROL24(const uint2 a)
777777
{
778778
#ifdef __CUDA_ARCH__
779779
uint2 result;
@@ -787,7 +787,7 @@ static __device__ __inline__ uint2 ROL24(const uint2 a)
787787
#if __CUDA_ARCH__ >= 320 && !defined NOASM
788788

789789

790-
__inline__ static __device__ uint2 ROL2(const uint2 a, const int offset) {
790+
__forceinline__ static __device__ uint2 ROL2(const uint2 a, const int offset) {
791791
uint2 result;
792792
if (offset >= 32) {
793793
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
@@ -800,7 +800,7 @@ __inline__ static __device__ uint2 ROL2(const uint2 a, const int offset) {
800800
return result;
801801
}
802802
#else
803-
__inline__ static __device__ uint2 ROL2(const uint2 v, const int n)
803+
__forceinline__ static __device__ uint2 ROL2(const uint2 v, const int n)
804804
{
805805
uint2 result;
806806
if (n <= 32)

0 commit comments

Comments
 (0)