From 0d629549f22595189fae7249a0e203d24e983031 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Wed, 25 May 2022 11:58:10 -0400 Subject: [PATCH] fix integer outer reduct initialization on simd_integer_register_size < register_size arches --- Project.toml | 2 +- src/codegen/lowering.jl | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 367700aa9..82202603f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LoopVectorization" uuid = "bdcacae8-1622-11e9-2a5c-532679323890" authors = ["Chris Elrod "] -version = "0.12.111" +version = "0.12.112" [deps] ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" diff --git a/src/codegen/lowering.jl b/src/codegen/lowering.jl index 9d011783e..df26ef26d 100644 --- a/src/codegen/lowering.jl +++ b/src/codegen/lowering.jl @@ -515,20 +515,28 @@ function pointerremcomparison( end end -@generated function of_same_size(::Type{T}, ::Type{S}) where {T,S} +@generated function of_same_size(::Type{T}, ::Type{S}, ::StaticInt{R}) where {T,S,R} sizeof_S = sizeof(S) + if T <: Integer && sizeof(T) == 8 + # sizeof(T) == 8 && max(..., 4) to maybe demote Int64 -> Int32 + # but otherwise, we're giving up too much with the demotion. + sizeof_S *= max(8 ÷ R, 4) + end sizeof(T) == sizeof_S && return T # Tfloat = T <: Union{Float32,Float64} if T <: Union{Float32,Float64} sizeof_S ≥ 8 ? Float64 : Float32 elseif T <: Signed - Symbol(:Int, 8sizeof_S) + Symbol(:Int, sizeof_S) elseif (T <: Unsigned) | (T === Bool) - Symbol(:UInt, 8sizeof_S) + Symbol(:UInt, sizeof_S) else S end end +@inline function of_same_size(::Type{T}, ::Type{S}) where {T,S} + of_same_size(T, S, VectorizationBase.register_size() ÷ VectorizationBase.simd_integer_register_size()) +end function outer_reduction_zero( op::Operation, u₁u::Bool,