Skip to content

Commit 4e18a86

Browse files
gbaraldigiordano
authored andcommitted
Change min from llvmcall to intrinsic
1 parent 5255bcc commit 4e18a86

File tree

6 files changed

+85
-8
lines changed

6 files changed

+85
-8
lines changed

Compiler/src/tfuncs.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ add_tfunc(add_float, 2, 2, math_tfunc, 2)
189189
add_tfunc(sub_float, 2, 2, math_tfunc, 2)
190190
add_tfunc(mul_float, 2, 2, math_tfunc, 8)
191191
add_tfunc(div_float, 2, 2, math_tfunc, 10)
192+
add_tfunc(min_float, 2, 2, math_tfunc, 1)
193+
add_tfunc(max_float, 2, 2, math_tfunc, 1)
192194
add_tfunc(fma_float, 3, 3, math_tfunc, 8)
193195
add_tfunc(muladd_float, 3, 3, math_tfunc, 8)
194196

@@ -198,6 +200,8 @@ add_tfunc(add_float_fast, 2, 2, math_tfunc, 2)
198200
add_tfunc(sub_float_fast, 2, 2, math_tfunc, 2)
199201
add_tfunc(mul_float_fast, 2, 2, math_tfunc, 8)
200202
add_tfunc(div_float_fast, 2, 2, math_tfunc, 10)
203+
add_tfunc(min_float_fast, 2, 2, math_tfunc, 1)
204+
add_tfunc(max_float_fast, 2, 2, math_tfunc, 1)
201205

202206
# bitwise operators
203207
# -----------------

base/fastmath.jl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ module FastMath
2828
export @fastmath
2929

3030
import Core.Intrinsics: sqrt_llvm_fast, neg_float_fast,
31-
add_float_fast, sub_float_fast, mul_float_fast, div_float_fast,
31+
add_float_fast, sub_float_fast, mul_float_fast, div_float_fast, min_float_fast, max_float_fast,
3232
eq_float_fast, ne_float_fast, lt_float_fast, le_float_fast
3333
import Base: afoldl
3434

@@ -168,6 +168,9 @@ add_fast(x::T, y::T) where {T<:FloatTypes} = add_float_fast(x, y)
168168
sub_fast(x::T, y::T) where {T<:FloatTypes} = sub_float_fast(x, y)
169169
mul_fast(x::T, y::T) where {T<:FloatTypes} = mul_float_fast(x, y)
170170
div_fast(x::T, y::T) where {T<:FloatTypes} = div_float_fast(x, y)
171+
max_fast(x::T, y::T) where {T<:FloatTypes} = max_float_fast(x, y)
172+
min_fast(x::T, y::T) where {T<:FloatTypes} = min_float_fast(x, y)
173+
minmax_fast(x::T, y::T) where {T<:FloatTypes} = (min_fast(x, y), max_fast(x, y))
171174

172175
@fastmath begin
173176
cmp_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(x==y, 0, ifelse(x<y, -1, +1))
@@ -236,11 +239,6 @@ ComplexTypes = Union{ComplexF32, ComplexF64}
236239

237240
ne_fast(x::T, y::T) where {T<:ComplexTypes} = !(x==y)
238241

239-
# Note: we use the same comparison for min, max, and minmax, so
240-
# that the compiler can convert between them
241-
max_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, y, x)
242-
min_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, x, y)
243-
minmax_fast(x::T, y::T) where {T<:FloatTypes} = ifelse(y > x, (x,y), (y,x))
244242
end
245243

246244
# fall-back implementations and type promotion

src/intrinsics.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,14 @@ const auto &float_func() {
8383
float_func[sub_float] = true;
8484
float_func[mul_float] = true;
8585
float_func[div_float] = true;
86+
float_func[min_float] = true;
87+
float_func[max_float] = true;
8688
float_func[add_float_fast] = true;
8789
float_func[sub_float_fast] = true;
8890
float_func[mul_float_fast] = true;
8991
float_func[div_float_fast] = true;
92+
float_func[min_float_fast] = true;
93+
float_func[max_float_fast] = true;
9094
float_func[fma_float] = true;
9195
float_func[muladd_float] = true;
9296
float_func[eq_float] = true;
@@ -134,7 +138,7 @@ uint32_t jl_get_LLVM_VERSION_impl(void)
134138
the bitcast function does nothing except change the type tag
135139
of a value. At the user-level, it is perhaps better known as reinterpret.
136140
boxing is delayed until absolutely necessary, and handled at the point
137-
where the box is needed.
141+
where the box is nefeded.
138142
all intrinsics have a non-compiled implementation, this file contains
139143
the optimizations for handling them unboxed
140144
*/
@@ -1490,6 +1494,34 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, ArrayRef<Va
14901494
case sub_float: return math_builder(ctx)().CreateFSub(x, y);
14911495
case mul_float: return math_builder(ctx)().CreateFMul(x, y);
14921496
case div_float: return math_builder(ctx)().CreateFDiv(x, y);
1497+
case min_float: {
1498+
assert(x->getType() == y->getType());
1499+
FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
1500+
return ctx.builder.CreateCall(minintr, {x, y});
1501+
}
1502+
case max_float: {
1503+
assert(x->getType() == y->getType());
1504+
FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
1505+
return ctx.builder.CreateCall(maxintr, {x, y});
1506+
}
1507+
case min_float_fast: {
1508+
assert(x->getType() == y->getType());
1509+
FunctionCallee minintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::minimum, ArrayRef<Type*>(t));
1510+
auto call = ctx.builder.CreateCall(minintr, {x, y});
1511+
auto fmf = call->getFastMathFlags();
1512+
fmf.setFast();
1513+
call->copyFastMathFlags(fmf);
1514+
return call;
1515+
}
1516+
case max_float_fast: {
1517+
assert(x->getType() == y->getType());
1518+
FunctionCallee maxintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::maximum, ArrayRef<Type*>(t));
1519+
auto call = ctx.builder.CreateCall(maxintr, {x, y});
1520+
auto fmf = call->getFastMathFlags();
1521+
fmf.setFast();
1522+
call->copyFastMathFlags(fmf);
1523+
return call;
1524+
}
14931525
case add_float_fast: return math_builder(ctx, true)().CreateFAdd(x, y);
14941526
case sub_float_fast: return math_builder(ctx, true)().CreateFSub(x, y);
14951527
case mul_float_fast: return math_builder(ctx, true)().CreateFMul(x, y);

src/intrinsics.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
ADD_I(sub_float, 2) \
1818
ADD_I(mul_float, 2) \
1919
ADD_I(div_float, 2) \
20+
ADD_I(min_float, 2) \
21+
ADD_I(max_float, 2) \
2022
ADD_I(fma_float, 3) \
2123
ADD_I(muladd_float, 3) \
2224
/* fast arithmetic */ \
@@ -25,6 +27,8 @@
2527
ALIAS(sub_float_fast, sub_float) \
2628
ALIAS(mul_float_fast, mul_float) \
2729
ALIAS(div_float_fast, div_float) \
30+
ALIAS(min_float_fast, min_float) \
31+
ALIAS(max_float_fast, max_float) \
2832
/* same-type comparisons */ \
2933
ADD_I(eq_int, 2) \
3034
ADD_I(ne_int, 2) \

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,6 +1595,8 @@ JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b);
15951595
JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b);
15961596
JL_DLLEXPORT jl_value_t *jl_mul_float(jl_value_t *a, jl_value_t *b);
15971597
JL_DLLEXPORT jl_value_t *jl_div_float(jl_value_t *a, jl_value_t *b);
1598+
JL_DLLEXPORT jl_value_t *jl_min_float(jl_value_t *a, jl_value_t *b);
1599+
JL_DLLEXPORT jl_value_t *jl_max_float(jl_value_t *a, jl_value_t *b);
15981600
JL_DLLEXPORT jl_value_t *jl_fma_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
15991601
JL_DLLEXPORT jl_value_t *jl_muladd_float(jl_value_t *a, jl_value_t *b, jl_value_t *c);
16001602

src/runtime_intrinsics.c

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1398,13 +1398,50 @@ bi_iintrinsic_fast(LLVMURem, rem, urem_int, u)
13981398
bi_iintrinsic_fast(jl_LLVMSMod, smod, smod_int, )
13991399
#define frem(a, b) \
14001400
fp_select2(a, b, fmod)
1401-
14021401
un_fintrinsic(neg_float,neg_float)
14031402
bi_fintrinsic(add,add_float)
14041403
bi_fintrinsic(sub,sub_float)
14051404
bi_fintrinsic(mul,mul_float)
14061405
bi_fintrinsic(div,div_float)
14071406

1407+
float min_float(float x, float y)
1408+
{
1409+
float diff = x - y;
1410+
float argmin = signbit(diff) ? x : y;
1411+
int is_nan = isnan(x) || isnan(y);
1412+
return is_nan ? diff : argmin;
1413+
}
1414+
1415+
double min_double(double x, double y)
1416+
{
1417+
double diff = x - y;
1418+
double argmin = signbit(diff) ? x : y;
1419+
int is_nan = isnan(x) || isnan(y);
1420+
return is_nan ? diff : argmin;
1421+
}
1422+
1423+
#define _min(a, b) sizeof(a) == sizeof(float) ? min_float(a, b) : min_double(a, b)
1424+
bi_fintrinsic(_min, min_float)
1425+
1426+
float max_float(float x, float y)
1427+
{
1428+
float diff = x - y;
1429+
float argmin = signbit(diff) ? y : x;
1430+
int is_nan = isnan(x) || isnan(y);
1431+
return is_nan ? diff : argmin;
1432+
}
1433+
1434+
double max_double(double x, double y)
1435+
{
1436+
double diff = x - y;
1437+
double argmin = signbit(diff) ? x : y;
1438+
int is_nan = isnan(x) || isnan(y);
1439+
return is_nan ? diff : argmin;
1440+
}
1441+
1442+
#define _max(a, b) sizeof(a) == sizeof(float) ? max_float(a, b) : max_double(a, b)
1443+
bi_fintrinsic(_max, max_float)
1444+
14081445
// ternary operators //
14091446
// runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
14101447
#if defined(_OS_WINDOWS_)

0 commit comments

Comments
 (0)