From badbe6e0b9162e7613e44288fda6922b5999ae2e Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Thu, 13 Feb 2020 10:02:20 +0100 Subject: [PATCH] rename LLVM module to Intrinsics --- src/LLVM_intrinsics.jl | 17 +--- src/arrayops.jl | 12 +-- src/simdvec.jl | 199 +++++++++++++++++++++-------------------- 3 files changed, 107 insertions(+), 121 deletions(-) diff --git a/src/LLVM_intrinsics.jl b/src/LLVM_intrinsics.jl index 202da78..fada669 100644 --- a/src/LLVM_intrinsics.jl +++ b/src/LLVM_intrinsics.jl @@ -1,5 +1,5 @@ # LLVM operations and intrinsics -module LLVM +module Intrinsics # TODO: fastmath flags @@ -100,21 +100,6 @@ end ) end -@generated function xor(x::LVec{N, T}) where {N, T <: IntegerTypes} - ff = llvm_name(:xor, N, T) - shfl = join((string(d[T], " ", T == Bool ? 1 : -1) for i in 1:N), ", ") - s = """ - %res = xor <$N x $(d[T])> %0, <$shfl> - ret <$N x $(d[T])> %res - """ - return :( - $(Expr(:meta, :inline)); - Base.llvmcall($s, LVec{N, T}, Tuple{LVec{N, T}}, x) - ) -end - - - ##################### # Binary operators # ##################### diff --git a/src/arrayops.jl b/src/arrayops.jl index b9155ec..c23594f 100644 --- a/src/arrayops.jl +++ b/src/arrayops.jl @@ -42,9 +42,9 @@ FastContiguousArray{T,N} = Union{DenseArray{T,N}, Base.FastContiguousSubArray{T, @inline function vload(::Type{Vec{N, T}}, ptr::Ptr{T}, mask::Union{Nothing, Vec{N, Bool}}=nothing, ::Val{Aligned}=Val(false), ::Val{Nontemporal}=Val(false)) where {N, T, Aligned, Nontemporal} if mask === nothing - Vec(LLVM.load(LLVM.LVec{N, T}, ptr, Val(Aligned), Val(Nontemporal))) + Vec(Intrinsics.load(Intrinsics.LVec{N, T}, ptr, Val(Aligned), Val(Nontemporal))) else - Vec(LLVM.maskedload(ptr, mask.data, Val(Aligned), Val(Nontemporal))) + Vec(Intrinsics.maskedload(ptr, mask.data, Val(Aligned), Val(Nontemporal))) end end @@ -63,9 +63,9 @@ end @inline function vstore(x::Vec{N, T}, ptr::Ptr{T}, mask::Union{Nothing, Vec{N, Bool}}=nothing, ::Val{Aligned}=Val(false), ::Val{Nontemporal}=Val(false)) where {N, T, Aligned, Nontemporal} if mask === nothing - LLVM.store(x.data, ptr, Val(Aligned), Val(Nontemporal)) + Intrinsics.store(x.data, ptr, Val(Aligned), Val(Nontemporal)) else - LLVM.maskedstore(x.data, ptr, mask.data, Val(Aligned), Val(Nontemporal)) + Intrinsics.maskedstore(x.data, ptr, mask.data, Val(Aligned), Val(Nontemporal)) end end @inline function vstore(x::Vec{N, T}, a::FastContiguousArray{T,1}, i::Integer, mask=nothing, @@ -117,7 +117,7 @@ end @inline vgather(ptrs::Vec{N,Ptr{T}}, mask::Vec{N,Bool}=one(Vec{N,Bool}), ::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} = - return Vec(LLVM.maskedgather(ptrs.data, mask.data)) + return Vec(Intrinsics.maskedgather(ptrs.data, mask.data)) @propagate_inbounds function vgather(a::FastContiguousArray{T,1}, idx::Vec{N, Int}, mask::Vec{N,Bool}=one(Vec{N,Bool}), ::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} @@ -138,7 +138,7 @@ end @propagate_inbounds vscatter(x::Vec{N,T}, ptrs::Vec{N,Ptr{T}}, mask::Vec{N,Bool}, ::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} = - LLVM.maskedscatter(x.data, ptrs.data, mask.data) + Intrinsics.maskedscatter(x.data, ptrs.data, mask.data) @propagate_inbounds function vscatter(x::Vec{N,T}, a::FastContiguousArray{T,1}, idx::Vec{N, Int}, mask::Vec{N,Bool}=one(Vec{N, Bool}), ::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} diff --git a/src/simdvec.jl b/src/simdvec.jl index 0085627..617c55d 100644 --- a/src/simdvec.jl +++ b/src/simdvec.jl @@ -13,7 +13,7 @@ Base.copy(v::Vec) = v @inline _unsafe_convert(::Type{T}, v) where {T <: IntegerTypes} = v % T @inline _unsafe_convert(::Type{T}, v) where {T <: VecTypes} = convert(T, v) @inline constantvector(v::T1, ::Type{Vec{N, T2}}) where {N, T1, T2} = - Vec(LLVM.constantvector(_unsafe_convert(T2, v), LLVM.LVec{N, T2})) + Vec(Intrinsics.constantvector(_unsafe_convert(T2, v), Intrinsics.LVec{N, T2})) @inline Vec{N, T}(v::Vec{N, T}) where {N, T<:VecTypes} = v @inline Vec{N, T}(v::Vec{N, T}) where {N, T<:FloatingTypes} = v @@ -26,30 +26,30 @@ Base.copy(v::Vec) = v if T1 <: Union{IntegerTypes, Ptr} if T2 <: Union{IntegerTypes, Ptr} if sizeof(T1) < sizeof(T2) - return Vec(LLVM.trunc(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.trunc(Intrinsics.LVec{N, T1}, v.data)) elseif sizeof(T1) == sizeof(T2) - return Vec(LLVM.bitcast(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T1}, v.data)) else - return Vec(LLVM.sext(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.sext(Intrinsics.LVec{N, T1}, v.data)) end elseif T2 <: FloatingTypes if T1 <: UIntTypes - return Vec(LLVM.fptoui(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.fptoui(Intrinsics.LVec{N, T1}, v.data)) elseif T1 <: IntTypes - return Vec(LLVM.fptosi(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.fptosi(Intrinsics.LVec{N, T1}, v.data)) end end end if T1 <: FloatingTypes if T2 <: UIntTypes - return Vec(LLVM.uitofp(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.uitofp(Intrinsics.LVec{N, T1}, v.data)) elseif T2 <: IntTypes - return Vec(LLVM.sitofp(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.sitofp(Intrinsics.LVec{N, T1}, v.data)) elseif T2 <: FloatingTypes if sizeof(T1) < sizeof(T2) - return Vec(LLVM.fptrunc(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.fptrunc(Intrinsics.LVec{N, T1}, v.data)) else - return Vec(LLVM.fpext(LLVM.LVec{N, T1}, v.data)) + return Vec(Intrinsics.fpext(Intrinsics.LVec{N, T1}, v.data)) end end end @@ -83,12 +83,12 @@ end function Base.getindex(v::Vec, i::IntegerTypes) @boundscheck checkbounds(v, i) - return LLVM.extractelement(v.data, i-1) + return Intrinsics.extractelement(v.data, i-1) end @inline function Base.setindex(v::Vec{N,T}, x, i::IntegerTypes) where {N,T} @boundscheck checkbounds(v, i) - Vec(LLVM.insertelement(v.data, _unsafe_convert(T, x), i-1)) + Vec(Intrinsics.insertelement(v.data, _unsafe_convert(T, x), i-1)) end Base.zero(::Type{Vec{N,T}}) where {N, T} = Vec{N,T}(zero(T)) @@ -96,9 +96,9 @@ Base.zero(::Vec{N,T}) where {N, T} = zero(Vec{N, T}) Base.one(::Type{Vec{N,T}}) where {N, T} = Vec{N, T}(one(T)) Base.one(::Vec{N,T}) where {N, T} = one(Vec{N, T}) -Base.reinterpret(::Type{Vec{N, T}}, v::Vec) where {T, N} = Vec(LLVM.bitcast(LLVM.LVec{N, T}, v.data)) -Base.reinterpret(::Type{Vec{N, T}}, v::ScalarTypes) where {T, N} = Vec(LLVM.bitcast(LLVM.LVec{N, T}, v)) -Base.reinterpret(::Type{T}, v::Vec) where {T} = LLVM.bitcast(T, v.data) +Base.reinterpret(::Type{Vec{N, T}}, v::Vec) where {T, N} = Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T}, v.data)) +Base.reinterpret(::Type{Vec{N, T}}, v::ScalarTypes) where {T, N} = Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T}, v)) +Base.reinterpret(::Type{T}, v::Vec) where {T} = Intrinsics.bitcast(T, v.data) ################### @@ -106,28 +106,27 @@ Base.reinterpret(::Type{T}, v::Vec) where {T} = LLVM.bitcast(T, v.data) ################### const UNARY_OPS = [ - (:sqrt , FloatingTypes , LLVM.sqrt) , - (:sin , FloatingTypes , LLVM.sin) , - (:trunc , FloatingTypes , LLVM.trunc) , - (:cos , FloatingTypes , LLVM.cos) , - (:exp , FloatingTypes , LLVM.exp) , - (:exp2 , FloatingTypes , LLVM.exp2) , - (:log , FloatingTypes , LLVM.log) , - (:log10 , FloatingTypes , LLVM.log10) , - (:log2 , FloatingTypes , LLVM.log2) , - (:abs , FloatingTypes , LLVM.fabs) , - (:floor , FloatingTypes , LLVM.floor) , - (:ceil , FloatingTypes , LLVM.ceil) , - # (:rint , FloatingTypes , LLVM) , - # (:nearbyint , FloatingTypes , LLVM) , - (:round , FloatingTypes , LLVM.round) , - - # (:bitreverse , IntegerTypes , LLVM.bitreverse) , - (:bswap , IntegerTypes , LLVM.bswap) , - (:count_ones , IntegerTypes , LLVM.ctpop) , - (:leading_zeros , IntegerTypes , LLVM.ctlz) , - (:trailing_zeros , IntegerTypes , LLVM.cttz) , - (:~ , IntegerTypes , LLVM.xor) + (:sqrt , FloatingTypes , Intrinsics.sqrt) , + (:sin , FloatingTypes , Intrinsics.sin) , + (:trunc , FloatingTypes , Intrinsics.trunc) , + (:cos , FloatingTypes , Intrinsics.cos) , + (:exp , FloatingTypes , Intrinsics.exp) , + (:exp2 , FloatingTypes , Intrinsics.exp2) , + (:log , FloatingTypes , Intrinsics.log) , + (:log10 , FloatingTypes , Intrinsics.log10) , + (:log2 , FloatingTypes , Intrinsics.log2) , + (:abs , FloatingTypes , Intrinsics.fabs) , + (:floor , FloatingTypes , Intrinsics.floor) , + (:ceil , FloatingTypes , Intrinsics.ceil) , + # (:rint , FloatingTypes , Intrinsics) , + # (:nearbyint , FloatingTypes , Intrinsics) , + (:round , FloatingTypes , Intrinsics.round) , + + # (:bitreverse , IntegerTypes , Intrinsics.bitreverse) , + (:bswap , IntegerTypes , Intrinsics.bswap) , + (:count_ones , IntegerTypes , Intrinsics.ctpop) , + (:leading_zeros , IntegerTypes , Intrinsics.ctlz) , + (:trailing_zeros , IntegerTypes , Intrinsics.cttz) , ] for (op, constraint, llvmop) in UNARY_OPS @@ -137,7 +136,9 @@ end Base.:+(v::Vec) = v Base.:-(v::Vec{<:Any, <:IntegerTypes}) = zero(v) - v -Base.:-(v::Vec{<:Any, <:FloatingTypes}) = Vec(LLVM.fneg(v.data)) +Base.:-(v::Vec{<:Any, <:FloatingTypes}) = Vec(Intrinsics.fneg(v.data)) +Base.:~(v::Vec{N, T}) where {N, T<:IntegerTypes} = Vec(Intrinsics.xor(v.data, Vec{N, T}(-1).data)) +Base.:~(v::Vec{N, Bool}) where {N} = Vec(Intrinsics.xor(v.data, Vec{N, Bool}(true).data)) Base.abs(v::Vec{N, T}) where {N, T} = Vec(vifelse(v < zero(T), -v, v)) Base.:!(v1::Vec{N,Bool}) where {N} = ~v1 Base.inv(v::Vec{N, T}) where {N, T<:FloatingTypes} = one(T) / v @@ -172,46 +173,46 @@ end #################### const BINARY_OPS = [ - (:+ , IntegerTypes , LLVM.add) - (:- , IntegerTypes , LLVM.sub) - (:* , IntegerTypes , LLVM.mul) - (:div , UIntTypes , LLVM.udiv) - (:div , IntTypes , LLVM.sdiv) - (:rem , UIntTypes , LLVM.urem) - (:rem , IntTypes , LLVM.srem) - - (:+ , FloatingTypes , LLVM.fadd) - (:- , FloatingTypes , LLVM.fsub) - (:* , FloatingTypes , LLVM.fmul) - (:^ , FloatingTypes , LLVM.pow) - (:/ , FloatingTypes , LLVM.fdiv) - (:rem , FloatingTypes , LLVM.frem) - (:min , FloatingTypes , LLVM.minnum) - (:max , FloatingTypes , LLVM.maxnum) - (:copysign , FloatingTypes , LLVM.copysign) - - (:~ , IntegerTypes , LLVM.xor) - (:& , IntegerTypes , LLVM.and) - (:| , IntegerTypes , LLVM.or) - (:⊻ , IntegerTypes , LLVM.xor) - - (:(==) , IntegerTypes , LLVM.icmp_eq) - (:(!=) , IntegerTypes , LLVM.icmp_ne) - (:(>) , IntTypes , LLVM.icmp_sgt) - (:(>=) , IntTypes , LLVM.icmp_sge) - (:(<) , IntTypes , LLVM.icmp_slt) - (:(<=) , IntTypes , LLVM.icmp_sle) - (:(>) , UIntTypes , LLVM.icmp_ugt) - (:(>=) , UIntTypes , LLVM.icmp_uge) - (:(<) , UIntTypes , LLVM.icmp_ult) - (:(<=) , UIntTypes , LLVM.icmp_ule) - - (:(==) , FloatingTypes , LLVM.fcmp_oeq) - (:(!=) , FloatingTypes , LLVM.fcmp_une) - (:(>) , FloatingTypes , LLVM.fcmp_ogt) - (:(>=) , FloatingTypes , LLVM.fcmp_oge) - (:(<) , FloatingTypes , LLVM.fcmp_olt) - (:(<=) , FloatingTypes , LLVM.fcmp_ole) + (:+ , IntegerTypes , Intrinsics.add) + (:- , IntegerTypes , Intrinsics.sub) + (:* , IntegerTypes , Intrinsics.mul) + (:div , UIntTypes , Intrinsics.udiv) + (:div , IntTypes , Intrinsics.sdiv) + (:rem , UIntTypes , Intrinsics.urem) + (:rem , IntTypes , Intrinsics.srem) + + (:+ , FloatingTypes , Intrinsics.fadd) + (:- , FloatingTypes , Intrinsics.fsub) + (:* , FloatingTypes , Intrinsics.fmul) + (:^ , FloatingTypes , Intrinsics.pow) + (:/ , FloatingTypes , Intrinsics.fdiv) + (:rem , FloatingTypes , Intrinsics.frem) + (:min , FloatingTypes , Intrinsics.minnum) + (:max , FloatingTypes , Intrinsics.maxnum) + (:copysign , FloatingTypes , Intrinsics.copysign) + + (:~ , IntegerTypes , Intrinsics.xor) + (:& , IntegerTypes , Intrinsics.and) + (:| , IntegerTypes , Intrinsics.or) + (:⊻ , IntegerTypes , Intrinsics.xor) + + (:(==) , IntegerTypes , Intrinsics.icmp_eq) + (:(!=) , IntegerTypes , Intrinsics.icmp_ne) + (:(>) , IntTypes , Intrinsics.icmp_sgt) + (:(>=) , IntTypes , Intrinsics.icmp_sge) + (:(<) , IntTypes , Intrinsics.icmp_slt) + (:(<=) , IntTypes , Intrinsics.icmp_sle) + (:(>) , UIntTypes , Intrinsics.icmp_ugt) + (:(>=) , UIntTypes , Intrinsics.icmp_uge) + (:(<) , UIntTypes , Intrinsics.icmp_ult) + (:(<=) , UIntTypes , Intrinsics.icmp_ule) + + (:(==) , FloatingTypes , Intrinsics.fcmp_oeq) + (:(!=) , FloatingTypes , Intrinsics.fcmp_une) + (:(>) , FloatingTypes , Intrinsics.fcmp_ogt) + (:(>=) , FloatingTypes , Intrinsics.fcmp_oge) + (:(<) , FloatingTypes , Intrinsics.fcmp_olt) + (:(<=) , FloatingTypes , Intrinsics.fcmp_ole) ] for (op, constraint, llvmop) in BINARY_OPS @@ -228,7 +229,7 @@ end # Pow @inline Base.:^(x::Vec{N,T}, y::IntegerTypes) where {N,T<:FloatingTypes} = - Vec(LLVM.powi(x.data, y)) + Vec(Intrinsics.powi(x.data, y)) # Do what Base does for HWNumber: @inline Base.literal_pow(::typeof(^), x::Vec, ::Val{0}) = one(typeof(x)) @inline Base.literal_pow(::typeof(^), x::Vec, ::Val{1}) = x @@ -266,19 +267,19 @@ end @inline function shl_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes} vifelse(y > sizeof(T1) * 8, zero(Vec{N, T1}), - Vec(LLVM.shl(x.data, convert(Vec{N,T1}, y).data))) + Vec(Intrinsics.shl(x.data, convert(Vec{N,T1}, y).data))) end @inline function lshr_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes} vifelse(y > sizeof(T1) * 8, zero(Vec{N, T1}), - Vec(LLVM.lshr(x.data, convert(Vec{N,T1}, y).data))) + Vec(Intrinsics.lshr(x.data, convert(Vec{N,T1}, y).data))) end @inline function ashr_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes} vifelse(y > sizeof(T1) * 8, - Vec(LLVM.ashr(x.data, Vec{N,T1}(sizeof(T1)*8-1).data)), - Vec(LLVM.ashr(x.data, Vec{N,T1}(y).data))) + Vec(Intrinsics.ashr(x.data, Vec{N,T1}(sizeof(T1)*8-1).data)), + Vec(Intrinsics.ashr(x.data, Vec{N,T1}(y).data))) end # See https://github.com/JuliaLang/julia/blob/a211abcdfacc05cb93c15774a59ce8961c16dac4/base/int.jl#L422-L435 @@ -341,12 +342,12 @@ end @inline vifelse(v::Bool, v1::T, v2::T) where {T} = ifelse(v, v1, v2) @inline vifelse(v::Vec{N, Bool}, v1::Vec{N, T}, v2::Vec{N, T}) where {N, T} = - Vec(LLVM.select(v.data, v1.data, v2.data)) + Vec(Intrinsics.select(v.data, v1.data, v2.data)) @inline vifelse(v::Vec{N, Bool}, v1::T2, v2::Vec{N, T}) where {N, T, T2 <:ScalarTypes} = vifelse(v, Vec{N, T}(v1), v2) @inline vifelse(v::Vec{N, Bool}, v1::Vec{N, T}, v2::T2) where {N, T, T2 <:ScalarTypes} = vifelse(v, v1, Vec{N, T}(v2)) # fma, muladd and vectorization of these -for (op, llvmop) in [(:fma, LLVM.fma), (:muladd, LLVM.fmuladd)] +for (op, llvmop) in [(:fma, Intrinsics.fma), (:muladd, Intrinsics.fmuladd)] @eval begin @inline Base.$op(a::Vec{N, T}, b::Vec{N, T}, c::Vec{N, T}) where {N,T<:FloatingTypes} = Vec($llvmop(a.data, b.data, c.data)) @@ -370,18 +371,18 @@ end # Reductions # ############## const HORZ_REDUCTION_OPS = [ - (& , IntegerTypes , LLVM.reduce_and) - (| , IntegerTypes , LLVM.reduce_or) - (max , IntTypes , LLVM.reduce_smax) - (max , UIntTypes , LLVM.reduce_umax) - (max , FloatingTypes , LLVM.reduce_fmax) - (min , IntTypes , LLVM.reduce_smin) - (min , UIntTypes , LLVM.reduce_umin) - (min , FloatingTypes , LLVM.reduce_fmin) - (+ , IntegerTypes , LLVM.reduce_add) - (* , IntegerTypes , LLVM.reduce_mul) - (+ , FloatingTypes , LLVM.reduce_fadd) - (* , FloatingTypes , LLVM.reduce_fmul) + (& , IntegerTypes , Intrinsics.reduce_and) + (| , IntegerTypes , Intrinsics.reduce_or) + (max , IntTypes , Intrinsics.reduce_smax) + (max , UIntTypes , Intrinsics.reduce_umax) + (max , FloatingTypes , Intrinsics.reduce_fmax) + (min , IntTypes , Intrinsics.reduce_smin) + (min , UIntTypes , Intrinsics.reduce_umin) + (min , FloatingTypes , Intrinsics.reduce_fmin) + (+ , IntegerTypes , Intrinsics.reduce_add) + (* , IntegerTypes , Intrinsics.reduce_mul) + (+ , FloatingTypes , Intrinsics.reduce_fadd) + (* , FloatingTypes , Intrinsics.reduce_fmul) ] for (op, constraint, llvmop) in HORZ_REDUCTION_OPS @@ -402,8 +403,8 @@ Base.reduce(F::Any, v::Vec) = error("reduction not defined for SIMD.Vec on $F") ############ @inline function shufflevector(x::Vec{N, T}, ::Val{I}) where {N, T, I} - Vec(LLVM.shufflevector(x.data, Val(I))) + Vec(Intrinsics.shufflevector(x.data, Val(I))) end @inline function shufflevector(x::Vec{N, T}, y::Vec{N, T}, ::Val{I}) where {N, T, I} - Vec(LLVM.shufflevector(x.data, y.data, Val(I))) + Vec(Intrinsics.shufflevector(x.data, y.data, Val(I))) end