Skip to content

Commit af625cf

Browse files
committed
[libclc] Move fmin/fmax to the CLC library
Note the CLC versions of these builtins don't offer the vector/scalar forms, for simplicity. The OpenCL layer converts the vector/scalar form to vector/vector. The CLC builtins use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets.
1 parent 13a313f commit af625cf

File tree

10 files changed

+96
-52
lines changed

10 files changed

+96
-52
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include <clc/clcmacro.h>
2+
#include <clc/utils.h>
3+
4+
#ifndef __CLC_BUILTIN
5+
#define __CLC_BUILTIN __CLC_XCONCAT(__clc_, __CLC_FUNCTION)
6+
#endif
7+
8+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(float, __CLC_FUNCTION, __CLC_BUILTIN,
9+
float, float)
10+
11+
#ifdef cl_khr_fp64
12+
13+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
14+
15+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(double, __CLC_FUNCTION, __CLC_BUILTIN,
16+
double, double)
17+
18+
#endif
19+
20+
#ifdef cl_khr_fp16
21+
22+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
23+
24+
_CLC_DEFINE_BINARY_BUILTIN_NO_SCALARIZE(half, __CLC_FUNCTION, __CLC_BUILTIN,
25+
half, half)
26+
27+
#endif
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_MATH_CLC_FMAX_H__
2+
#define __CLC_MATH_CLC_FMAX_H__
3+
4+
#define __CLC_FUNCTION __clc_fmax
5+
#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
6+
7+
#include <clc/math/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_FMAX_H__
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_MATH_CLC_FMIN_H__
2+
#define __CLC_MATH_CLC_FMIN_H__
3+
4+
#define __CLC_FUNCTION __clc_fmin
5+
#define __CLC_BODY <clc/shared/binary_decl_with_scalar_second_arg.inc>
6+
7+
#include <clc/math/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_MATH_CLC_FMIN_H__
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
2+
__CLC_GENTYPE y);
3+
4+
#ifndef __CLC_SCALAR
5+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x,
6+
__CLC_SCALAR_GENTYPE y);
7+
#endif
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <clc/utils.h>
2+
3+
#ifndef __CLC_FUNCTION
4+
#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
5+
#endif
6+
7+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
8+
__CLC_GENTYPE b) {
9+
return __CLC_FUNCTION(FUNCTION)(a, b);
10+
}
11+
12+
#ifndef __CLC_SCALAR
13+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE FUNCTION(__CLC_GENTYPE a,
14+
__CLC_SCALAR_GENTYPE b) {
15+
return __CLC_FUNCTION(FUNCTION)(a, (__CLC_GENTYPE)b);
16+
}
17+
#endif

libclc/clc/lib/generic/SOURCES

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ math/clc_cospi.cl
3434
math/clc_ep_log.cl
3535
math/clc_fabs.cl
3636
math/clc_fma.cl
37-
math/clc_fmod.cl
37+
math/clc_fmax.cl
38+
math/clc_fmin.cl
3839
math/clc_floor.cl
40+
math/clc_fmod.cl
3941
math/clc_frexp.cl
4042
math/clc_hypot.cl
4143
math/clc_ldexp.cl
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_fmax
5+
#define __CLC_BUILTIN __builtin_elementwise_max
6+
#include <clc/math/binary_builtin.inc>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#include <clc/internal/clc.h>
2+
3+
#undef __CLC_FUNCTION
4+
#define __CLC_FUNCTION __clc_fmin
5+
#define __CLC_BUILTIN __builtin_elementwise_min
6+
#include <clc/math/binary_builtin.inc>

libclc/generic/lib/math/fmax.cl

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,9 @@
88

99
#include <clc/clc.h>
1010
#include <clc/clcmacro.h>
11+
#include <clc/math/clc_fmax.h>
1112

12-
_CLC_DEFINE_BINARY_BUILTIN(float, fmax, __builtin_fmaxf, float, float);
13+
#define FUNCTION fmax
14+
#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
1315

14-
#ifdef cl_khr_fp64
15-
16-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
17-
18-
_CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
19-
20-
#endif
21-
22-
#ifdef cl_khr_fp16
23-
24-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
25-
26-
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
27-
{
28-
if (isnan(x))
29-
return y;
30-
if (isnan(y))
31-
return x;
32-
return (x < y) ? y : x;
33-
}
34-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
35-
36-
#endif
37-
38-
#define __CLC_BODY <fmax.inc>
3916
#include <clc/math/gentype.inc>

libclc/generic/lib/math/fmin.cl

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,9 @@
88

99
#include <clc/clc.h>
1010
#include <clc/clcmacro.h>
11+
#include <clc/math/clc_fmin.h>
1112

12-
_CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
13+
#define FUNCTION fmin
14+
#define __CLC_BODY <clc/shared/binary_def_with_scalar_second_arg.inc>
1315

14-
#ifdef cl_khr_fp64
15-
16-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
17-
18-
_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
19-
20-
#endif
21-
#ifdef cl_khr_fp16
22-
23-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
24-
25-
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
26-
{
27-
if (isnan(x))
28-
return y;
29-
if (isnan(y))
30-
return x;
31-
return (y < x) ? y : x;
32-
}
33-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
34-
35-
#endif
36-
37-
#define __CLC_BODY <fmin.inc>
3816
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)