diff --git a/Project.toml b/Project.toml index 270353467..a7894dcd4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" authors = ["Chris Elrod "] -version = "0.12.163" +version = "0.12.164" [deps] diff --git a/benchmark/looptests.jl b/benchmark/looptests.jl index a5f417afb..feb9a09ce 100644 --- a/benchmark/looptests.jl +++ b/benchmark/looptests.jl @@ -76,12 +76,12 @@ function jgemm!(๐‚, ๐€แต€::Adjoint, ๐แต€::Adjoint) end end gemmavx!(๐‚, ๐€, ๐) = @turbo for m โˆˆ indices((๐€, ๐‚), 1), n โˆˆ indices((๐, ๐‚), 2) - ๐‚โ‚˜โ‚™ = zero(eltype(๐‚)) - for k โˆˆ indices((๐€, ๐), (2, 1)) - ๐‚โ‚˜โ‚™ += ๐€[m, k] * ๐[k, n] - end - ๐‚[m, n] = ๐‚โ‚˜โ‚™ + ๐‚โ‚˜โ‚™ = zero(eltype(๐‚)) + for k โˆˆ indices((๐€, ๐), (2, 1)) + ๐‚โ‚˜โ‚™ += ๐€[m, k] * ๐[k, n] end + ๐‚[m, n] = ๐‚โ‚˜โ‚™ +end function gemmavx!( Cc::AbstractMatrix{Complex{T}}, Ac::AbstractMatrix{Complex{T}}, @@ -102,12 +102,12 @@ function gemmavx!( end end gemmavxt!(๐‚, ๐€, ๐) = @tturbo for m โˆˆ indices((๐€, ๐‚), 1), n โˆˆ indices((๐, ๐‚), 2) - ๐‚โ‚˜โ‚™ = zero(eltype(๐‚)) - for k โˆˆ indices((๐€, ๐), (2, 1)) - ๐‚โ‚˜โ‚™ += ๐€[m, k] * ๐[k, n] - end - ๐‚[m, n] = ๐‚โ‚˜โ‚™ + ๐‚โ‚˜โ‚™ = zero(eltype(๐‚)) + for k โˆˆ indices((๐€, ๐), (2, 1)) + ๐‚โ‚˜โ‚™ += ๐€[m, k] * ๐[k, n] end + ๐‚[m, n] = ๐‚โ‚˜โ‚™ +end function gemmavxt!( Cc::AbstractMatrix{Complex{T}}, Ac::AbstractMatrix{Complex{T}}, @@ -204,11 +204,11 @@ function jdot3avx(x, A, y) s end jvexp!(b, a) = @inbounds for i โˆˆ eachindex(a) - b[i] = exp(a[i]) - end + b[i] = exp(a[i]) +end jvexpavx!(b, a) = @turbo for i โˆˆ eachindex(a) - b[i] = exp(a[i]) - end + b[i] = exp(a[i]) +end function jsvexp(a) s = zero(eltype(a)) @inbounds for i โˆˆ eachindex(a) @@ -242,12 +242,12 @@ function jgemv!(๐ฒ, ๐€แต€::Adjoint, ๐ฑ) end end jgemvavx!(๐ฒ, ๐€, ๐ฑ) = @turbo for i โˆˆ eachindex(๐ฒ) - ๐ฒแตข = zero(eltype(๐ฒ)) - for j โˆˆ eachindex(๐ฑ) - ๐ฒแตข += ๐€[i, j] * ๐ฑ[j] - end - ๐ฒ[i] = ๐ฒแตข + ๐ฒแตข = zero(eltype(๐ฒ)) + for j โˆˆ eachindex(๐ฑ) + ๐ฒแตข += ๐€[i, j] * ๐ฑ[j] end + ๐ฒ[i] = ๐ฒแตข +end function jvar!(๐ฌยฒ, ๐€, xฬ„) @. sยฒ = zero(eltype(๐ฌยฒ)) @inbounds @fastmath for i โˆˆ 1:size(๐€, 2) @@ -258,14 +258,14 @@ function jvar!(๐ฌยฒ, ๐€, xฬ„) end end jvaravx!(๐ฌยฒ, ๐€, xฬ„) = @turbo for j โˆˆ eachindex(๐ฌยฒ) - ๐ฌยฒโฑผ = zero(eltype(๐ฌยฒ)) - xฬ„โฑผ = xฬ„[j] - for i โˆˆ 1:size(๐€, 2) - ฮด = ๐€[j, i] - xฬ„โฑผ - ๐ฌยฒโฑผ += ฮด * ฮด - end - ๐ฌยฒ[j] = ๐ฌยฒโฑผ + ๐ฌยฒโฑผ = zero(eltype(๐ฌยฒ)) + xฬ„โฑผ = xฬ„[j] + for i โˆˆ 1:size(๐€, 2) + ฮด = ๐€[j, i] - xฬ„โฑผ + ๐ฌยฒโฑผ += ฮด * ฮด end + ๐ฌยฒ[j] = ๐ฌยฒโฑผ +end japlucBc!(D, a, B, c) = @. D = a + B * c'; japlucBcavx!(D, a, B, c) = @turbo @. D = a + B * c'; diff --git a/benchmark/plotbenchmarks.jl b/benchmark/plotbenchmarks.jl index e9f984505..125f435cc 100644 --- a/benchmark/plotbenchmarks.jl +++ b/benchmark/plotbenchmarks.jl @@ -29,7 +29,8 @@ else # const COLOR_MAP = Dict{String,RGB{Float64}}() # const COLOR_MAP = Dict{String,RGB{Colors.N0f8}}() const COLOR_MAP64 = Dict{String,RGB{Float64}}() - getcolor(s::String) = get!(COLOR_MAP64, s) do + getcolor(s::String) = + get!(COLOR_MAP64, s) do COLORS[length(COLOR_MAP64)+1] end replace_and(str) = replace(str, '&' => "with") diff --git a/ext/ForwardDiffExt.jl b/ext/ForwardDiffExt.jl index 00dfc5565..26227f694 100644 --- a/ext/ForwardDiffExt.jl +++ b/ext/ForwardDiffExt.jl @@ -157,8 +157,8 @@ end end end -@generated function ifelse( - m::AbstractMask, +@generated function _ifelse( + m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}}, x::ForwardDiff.Dual{TAG,V,P}, y::ForwardDiff.Dual{TAG,V,P} ) where {TAG,V,P} @@ -171,8 +171,8 @@ end ForwardDiff.Dual{$TAG}(z, ForwardDiff.Partials(p)) end end -@generated function ifelse( - m::AbstractMask, +@generated function _ifelse( + m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}}, x::Number, y::ForwardDiff.Dual{TAG,V,P} ) where {TAG,V,P} @@ -184,8 +184,8 @@ end ForwardDiff.Dual{$TAG}(z, ForwardDiff.Partials(p)) end end -@generated function ifelse( - m::AbstractMask, +@generated function _ifelse( + m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}}, x::ForwardDiff.Dual{TAG,V,P}, y::Number ) where {TAG,V,P} @@ -197,6 +197,29 @@ end ForwardDiff.Dual{$TAG}(z, ForwardDiff.Partials(p)) end end +@inline ifelse(m::AbstractMask, x::ForwardDiff.Dual, y::Number) = + _ifelse(m, x, y) +@inline ifelse(m::AbstractMask, x::ForwardDiff.Dual, y::ForwardDiff.Dual) = + _ifelse(m, x, y) +@inline ifelse(m::AbstractMask, y::Number, x::ForwardDiff.Dual) = + _ifelse(m, y, x) + +@inline ifelse( + m::VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}, + x::ForwardDiff.Dual, + y::Number +) = _ifelse(m, x, y) +@inline ifelse( + m::VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}, + x::ForwardDiff.Dual, + y::ForwardDiff.Dual +) = _ifelse(m, x, y) +@inline ifelse( + m::VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}, + y::Number, + x::ForwardDiff.Dual +) = _ifelse(m, y, x) + @inline function SLEEFPirates.softplus(x::ForwardDiff.Dual{TAG}) where {TAG} val = ForwardDiff.value(x) expx = exp(val) diff --git a/src/LoopVectorization.jl b/src/LoopVectorization.jl index 7856fcc6c..d8033d307 100644 --- a/src/LoopVectorization.jl +++ b/src/LoopVectorization.jl @@ -108,7 +108,8 @@ using VectorizationBase: contract_or, collapse_or, max_mask, - maybestaticsize,zero_mask + maybestaticsize, + zero_mask using HostCPUFeatures: pick_vector_width, diff --git a/src/codegen/split_loops.jl b/src/codegen/split_loops.jl index af1eafba8..95978a0f5 100644 --- a/src/codegen/split_loops.jl +++ b/src/codegen/split_loops.jl @@ -76,7 +76,8 @@ function add_operation!( opnew end -append_if_included!(vnew, vold, included) = for (i, v) โˆˆ vold +append_if_included!(vnew, vold, included) = + for (i, v) โˆˆ vold id = included[i] iszero(id) || push!(vnew, (id, v)) end diff --git a/src/modeling/costs.jl b/src/modeling/costs.jl index 8c2c2bbb5..0d2b40771 100644 --- a/src/modeling/costs.jl +++ b/src/modeling/costs.jl @@ -13,7 +13,8 @@ struct Instruction end # lower(instr::Instruction) = Expr(:(.), instr.mod, QuoteNode(instr.instr)) # Base.convert(::Type{Expr}, instr::Instruction) = Expr(:(.), instr.mod, QuoteNode(instr.instr)) -callexpr(instr::Instruction) = if instr.mod === :LoopVectorization +callexpr(instr::Instruction) = + if instr.mod === :LoopVectorization Expr(:call, lv(instr.instr)) else#if instr.mod === :Main Expr(:call, instr.instr) @@ -563,7 +564,8 @@ function reduction_to_single_vector(x::Float64) throw("Reduction not found.") end end -reduce_to_onevecunroll(x::Float64) = if x == ADDITIVE_IN_REDUCTIONS +reduce_to_onevecunroll(x::Float64) = + if x == ADDITIVE_IN_REDUCTIONS :+ elseif x == MULTIPLICATIVE_IN_REDUCTIONS :* @@ -578,7 +580,8 @@ reduce_to_onevecunroll(x::Float64) = if x == ADDITIVE_IN_REDUCTIONS else throw("Reduction not found.") end -reduce_number_of_vectors(x::Float64) = if x == ADDITIVE_IN_REDUCTIONS +reduce_number_of_vectors(x::Float64) = + if x == ADDITIVE_IN_REDUCTIONS :contract_add elseif x == MULTIPLICATIVE_IN_REDUCTIONS :contract_mul @@ -593,7 +596,8 @@ reduce_number_of_vectors(x::Float64) = if x == ADDITIVE_IN_REDUCTIONS else throw("Reduction not found.") end -reduction_to_scalar(x::Float64) = if x == ADDITIVE_IN_REDUCTIONS +reduction_to_scalar(x::Float64) = + if x == ADDITIVE_IN_REDUCTIONS :vsum elseif x == MULTIPLICATIVE_IN_REDUCTIONS :vprod diff --git a/src/predicates.jl b/src/predicates.jl index 69af70c8b..48291974d 100644 --- a/src/predicates.jl +++ b/src/predicates.jl @@ -11,7 +11,7 @@ isscopedname(:(Base.Checked.checked_add), (:Base, :Checked), :checked_add) function isscopedname(ex, modpath, name::Symbol) isexpr(ex, :(.), 2) && (a = ex.args[2]; isa(a, QuoteNode) && a.value === name) && - hasscope(ex.args[1], modpath) + hasscope(ex.args[1], modpath) end hasscope(modex, mod::Symbol) = modex === mod hasscope(modex, mod::Tuple{Symbol}) = hasscope(modex, mod[1]) diff --git a/src/reconstruct_loopset.jl b/src/reconstruct_loopset.jl index 000ffc208..d044fbeb3 100644 --- a/src/reconstruct_loopset.jl +++ b/src/reconstruct_loopset.jl @@ -27,7 +27,7 @@ Base.promote_rule( ::Type{UpperBoundedInteger{N,T}}, ::Type{T} ) where {N,T<:Base.BitInteger} = T -Base.convert(::Type{T}, i::UpperBoundedInteger) where {T<:Number} = +Base.convert(::Type{T}, i::UpperBoundedInteger) where {T<:Integer} = convert(T, i.i) Base.convert( ::Type{UpperBoundedInteger{N,T}}, diff --git a/src/simdfunctionals/mapreduce.jl b/src/simdfunctionals/mapreduce.jl index 01b93014e..47393abcc 100644 --- a/src/simdfunctionals/mapreduce.jl +++ b/src/simdfunctionals/mapreduce.jl @@ -115,7 +115,8 @@ end Vectorized version of `sum`. Providing a function as the first argument will apply the function to each element of `A` before summing. """ -@inline vsum(f::F, A::AbstractArray{T}) where {F,T<:NativeTypes} = vmapreduce(f, +, A) +@inline vsum(f::F, A::AbstractArray{T}) where {F,T<:NativeTypes} = + vmapreduce(f, +, A) @inline vsum(A::AbstractArray{T}) where {T<:NativeTypes} = vsum(identity, A) length_one_axis(::Base.OneTo) = Base.OneTo(1) diff --git a/test/Project.toml b/test/Project.toml index e57e06bb3..dabb95f63 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,8 +1,10 @@ [deps] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -12,4 +14,5 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/test/forwarddiffext.jl b/test/forwarddiffext.jl new file mode 100644 index 000000000..90b167e33 --- /dev/null +++ b/test/forwarddiffext.jl @@ -0,0 +1,37 @@ + +using NNlib, LoopVectorization, VectorizationBase, ForwardDiff, Test +randnvec() = Vec(ntuple(_ -> randn(), pick_vector_width(Float64))...) + +tovec(x::Vec{W,T}) where {W,T} = T[Tuple(x)...] +tovec(x::VecUnroll) = reduce(vcat, map(tovec, VectorizationBase.data(x))) +function tovec(x::ForwardDiff.Dual{T,V,N}) where {T,V,N} + v = tovec(ForwardDiff.value(x)) + dv = map(tovec, Tuple(ForwardDiff.partials(x))) + D = ForwardDiff.Dual{T,eltype(v),N} + ret = Vector{D}(undef, length(v)) + for i in eachindex(v) + ret[i] = ForwardDiff.Dual(v[i], map(Base.Fix2(Base.getindex, i), dv)...) + end + return ret +end + + +vx0 = randnvec() +vx1 = randnvec() +vx2 = randnvec() +vx3 = randnvec() +vx4 = randnvec() +vx5 = randnvec() + +vd0 = ForwardDiff.Dual(vx0, vx1, vx2, vx3, vx4, vx5) + +vu0 = VecUnroll((vx0, vx1)) +vu1 = VecUnroll((vx2, vx3)) +vu2 = VecUnroll((vx4, vx5)) + +vud = ForwardDiff.Dual(vu0, vu1, vu2) + +@test reinterpret(Float64, tovec(NNlib.leakyrelu(vd0))) โ‰ˆ + reinterpret(Float64, NNlib.leakyrelu.(tovec(vd0))) +@test reinterpret(Float64, tovec(NNlib.leakyrelu(vud))) โ‰ˆ + reinterpret(Float64, NNlib.leakyrelu.(tovec(vud))) diff --git a/test/grouptests.jl b/test/grouptests.jl index e97a629b2..7e7c78908 100644 --- a/test/grouptests.jl +++ b/test/grouptests.jl @@ -116,6 +116,7 @@ const START_TIME = time() Pkg.activate(joinpath(precompiledir, "LVUser")) @time include(joinpath(precompiledir, "precompile.jl")) Pkg.activate(cproj) + @time include("forwarddiffext.jl") end end