diff --git a/base/fastmath.jl b/base/fastmath.jl index ed686fb92bf34..8bfbbf4e2e23b 100644 --- a/base/fastmath.jl +++ b/base/fastmath.jl @@ -297,12 +297,13 @@ exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x) # builtins -@inline function pow_fast(x::Float64, y::Integer) +@inline function pow_fast(x::T, y::Integer) where T <: Base.IEEEFloat z = y % Int32 z == y ? pow_fast(x, z) : x^y end -pow_fast(x::Float32, y::Integer) = x^y -pow_fast(x::Float64, y::Int32) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y) +pow_fast(x::Float16, y::Int32) = ccall("llvm.powi", llvmcall, Float16, (Float16, Int32), x, y) +pow_fast(x::Float32, y::Int32) = ccall("llvm.powi", llvmcall, Float32, (Float32, Int32), x, y) +pow_fast(x::Float64, y::Int32) = ccall("llvm.powi", llvmcall, Float64, (Float64, Int32), x, y) pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v) diff --git a/test/fastmath.jl b/test/fastmath.jl index efca5b85c6642..d3d82f40f9f1a 100644 --- a/test/fastmath.jl +++ b/test/fastmath.jl @@ -303,6 +303,31 @@ end @test @fastmath (1 + 1 / n) ^ 4503599627370496 ≈ ℯ end +# Test that x^2 is inlined to fmul for all float types (issue #60639) +@testset "pow_fast inlining for literal powers" begin + for T in (Float16, Float32, Float64) + f(x) = @fastmath x^2 + llvm = sprint(code_llvm, f, (T,)) + # Should be inlined to fmul, not call power_by_squaring + @test occursin("fmul", llvm) + @test !occursin("power_by_squaring", llvm) + end +end + +# Test correctness of pow_fast for Float32/Float16 with various exponents (issue #60639) +@testset "pow_fast correctness" begin + for T in (Float16, Float32) + x = T(2.5) + # Exponents that fit in Int32 + @test (@fastmath x^2) ≈ x^2 + @test (@fastmath x^10) ≈ x^10 + @test (@fastmath x^(-3)) ≈ x^(-3) + # Exponents that don't fit in Int32 + big_exp = Int64(2)^40 + @test (@fastmath x^big_exp) ≈ x^big_exp + end +end + @testset "sincos fall-backs" begin struct FloatWrapper inner::Float64