Skip to content

Commit

Permalink
rename LLVM module to Intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
KristofferC committed Feb 13, 2020
1 parent e9bc504 commit badbe6e
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 121 deletions.
17 changes: 1 addition & 16 deletions src/LLVM_intrinsics.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# LLVM operations and intrinsics
module LLVM
module Intrinsics

# TODO: fastmath flags

Expand Down Expand Up @@ -100,21 +100,6 @@ end
)
end

@generated function xor(x::LVec{N, T}) where {N, T <: IntegerTypes}
ff = llvm_name(:xor, N, T)
shfl = join((string(d[T], " ", T == Bool ? 1 : -1) for i in 1:N), ", ")
s = """
%res = xor <$N x $(d[T])> %0, <$shfl>
ret <$N x $(d[T])> %res
"""
return :(
$(Expr(:meta, :inline));
Base.llvmcall($s, LVec{N, T}, Tuple{LVec{N, T}}, x)
)
end



#####################
# Binary operators #
#####################
Expand Down
12 changes: 6 additions & 6 deletions src/arrayops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ FastContiguousArray{T,N} = Union{DenseArray{T,N}, Base.FastContiguousSubArray{T,
@inline function vload(::Type{Vec{N, T}}, ptr::Ptr{T}, mask::Union{Nothing, Vec{N, Bool}}=nothing,
::Val{Aligned}=Val(false), ::Val{Nontemporal}=Val(false)) where {N, T, Aligned, Nontemporal}
if mask === nothing
Vec(LLVM.load(LLVM.LVec{N, T}, ptr, Val(Aligned), Val(Nontemporal)))
Vec(Intrinsics.load(Intrinsics.LVec{N, T}, ptr, Val(Aligned), Val(Nontemporal)))
else
Vec(LLVM.maskedload(ptr, mask.data, Val(Aligned), Val(Nontemporal)))
Vec(Intrinsics.maskedload(ptr, mask.data, Val(Aligned), Val(Nontemporal)))
end
end

Expand All @@ -63,9 +63,9 @@ end
@inline function vstore(x::Vec{N, T}, ptr::Ptr{T}, mask::Union{Nothing, Vec{N, Bool}}=nothing,
::Val{Aligned}=Val(false), ::Val{Nontemporal}=Val(false)) where {N, T, Aligned, Nontemporal}
if mask === nothing
LLVM.store(x.data, ptr, Val(Aligned), Val(Nontemporal))
Intrinsics.store(x.data, ptr, Val(Aligned), Val(Nontemporal))
else
LLVM.maskedstore(x.data, ptr, mask.data, Val(Aligned), Val(Nontemporal))
Intrinsics.maskedstore(x.data, ptr, mask.data, Val(Aligned), Val(Nontemporal))
end
end
@inline function vstore(x::Vec{N, T}, a::FastContiguousArray{T,1}, i::Integer, mask=nothing,
Expand Down Expand Up @@ -117,7 +117,7 @@ end
@inline vgather(ptrs::Vec{N,Ptr{T}},
mask::Vec{N,Bool}=one(Vec{N,Bool}),
::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} =
return Vec(LLVM.maskedgather(ptrs.data, mask.data))
return Vec(Intrinsics.maskedgather(ptrs.data, mask.data))
@propagate_inbounds function vgather(a::FastContiguousArray{T,1}, idx::Vec{N, Int},
mask::Vec{N,Bool}=one(Vec{N,Bool}),
::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned}
Expand All @@ -138,7 +138,7 @@ end

@propagate_inbounds vscatter(x::Vec{N,T}, ptrs::Vec{N,Ptr{T}},
mask::Vec{N,Bool}, ::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned} =
LLVM.maskedscatter(x.data, ptrs.data, mask.data)
Intrinsics.maskedscatter(x.data, ptrs.data, mask.data)
@propagate_inbounds function vscatter(x::Vec{N,T}, a::FastContiguousArray{T,1}, idx::Vec{N, Int},
mask::Vec{N,Bool}=one(Vec{N, Bool}),
::Val{Aligned}=Val(false)) where {N, T<:ScalarTypes, Aligned}
Expand Down
199 changes: 100 additions & 99 deletions src/simdvec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Base.copy(v::Vec) = v
@inline _unsafe_convert(::Type{T}, v) where {T <: IntegerTypes} = v % T
@inline _unsafe_convert(::Type{T}, v) where {T <: VecTypes} = convert(T, v)
@inline constantvector(v::T1, ::Type{Vec{N, T2}}) where {N, T1, T2} =
Vec(LLVM.constantvector(_unsafe_convert(T2, v), LLVM.LVec{N, T2}))
Vec(Intrinsics.constantvector(_unsafe_convert(T2, v), Intrinsics.LVec{N, T2}))

@inline Vec{N, T}(v::Vec{N, T}) where {N, T<:VecTypes} = v
@inline Vec{N, T}(v::Vec{N, T}) where {N, T<:FloatingTypes} = v
Expand All @@ -26,30 +26,30 @@ Base.copy(v::Vec) = v
if T1 <: Union{IntegerTypes, Ptr}
if T2 <: Union{IntegerTypes, Ptr}
if sizeof(T1) < sizeof(T2)
return Vec(LLVM.trunc(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.trunc(Intrinsics.LVec{N, T1}, v.data))
elseif sizeof(T1) == sizeof(T2)
return Vec(LLVM.bitcast(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T1}, v.data))
else
return Vec(LLVM.sext(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.sext(Intrinsics.LVec{N, T1}, v.data))
end
elseif T2 <: FloatingTypes
if T1 <: UIntTypes
return Vec(LLVM.fptoui(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.fptoui(Intrinsics.LVec{N, T1}, v.data))
elseif T1 <: IntTypes
return Vec(LLVM.fptosi(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.fptosi(Intrinsics.LVec{N, T1}, v.data))
end
end
end
if T1 <: FloatingTypes
if T2 <: UIntTypes
return Vec(LLVM.uitofp(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.uitofp(Intrinsics.LVec{N, T1}, v.data))
elseif T2 <: IntTypes
return Vec(LLVM.sitofp(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.sitofp(Intrinsics.LVec{N, T1}, v.data))
elseif T2 <: FloatingTypes
if sizeof(T1) < sizeof(T2)
return Vec(LLVM.fptrunc(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.fptrunc(Intrinsics.LVec{N, T1}, v.data))
else
return Vec(LLVM.fpext(LLVM.LVec{N, T1}, v.data))
return Vec(Intrinsics.fpext(Intrinsics.LVec{N, T1}, v.data))
end
end
end
Expand Down Expand Up @@ -83,51 +83,50 @@ end

function Base.getindex(v::Vec, i::IntegerTypes)
@boundscheck checkbounds(v, i)
return LLVM.extractelement(v.data, i-1)
return Intrinsics.extractelement(v.data, i-1)
end

@inline function Base.setindex(v::Vec{N,T}, x, i::IntegerTypes) where {N,T}
@boundscheck checkbounds(v, i)
Vec(LLVM.insertelement(v.data, _unsafe_convert(T, x), i-1))
Vec(Intrinsics.insertelement(v.data, _unsafe_convert(T, x), i-1))
end

Base.zero(::Type{Vec{N,T}}) where {N, T} = Vec{N,T}(zero(T))
Base.zero(::Vec{N,T}) where {N, T} = zero(Vec{N, T})
Base.one(::Type{Vec{N,T}}) where {N, T} = Vec{N, T}(one(T))
Base.one(::Vec{N,T}) where {N, T} = one(Vec{N, T})

Base.reinterpret(::Type{Vec{N, T}}, v::Vec) where {T, N} = Vec(LLVM.bitcast(LLVM.LVec{N, T}, v.data))
Base.reinterpret(::Type{Vec{N, T}}, v::ScalarTypes) where {T, N} = Vec(LLVM.bitcast(LLVM.LVec{N, T}, v))
Base.reinterpret(::Type{T}, v::Vec) where {T} = LLVM.bitcast(T, v.data)
Base.reinterpret(::Type{Vec{N, T}}, v::Vec) where {T, N} = Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T}, v.data))
Base.reinterpret(::Type{Vec{N, T}}, v::ScalarTypes) where {T, N} = Vec(Intrinsics.bitcast(Intrinsics.LVec{N, T}, v))
Base.reinterpret(::Type{T}, v::Vec) where {T} = Intrinsics.bitcast(T, v.data)


###################
# Unary operators #
###################

const UNARY_OPS = [
(:sqrt , FloatingTypes , LLVM.sqrt) ,
(:sin , FloatingTypes , LLVM.sin) ,
(:trunc , FloatingTypes , LLVM.trunc) ,
(:cos , FloatingTypes , LLVM.cos) ,
(:exp , FloatingTypes , LLVM.exp) ,
(:exp2 , FloatingTypes , LLVM.exp2) ,
(:log , FloatingTypes , LLVM.log) ,
(:log10 , FloatingTypes , LLVM.log10) ,
(:log2 , FloatingTypes , LLVM.log2) ,
(:abs , FloatingTypes , LLVM.fabs) ,
(:floor , FloatingTypes , LLVM.floor) ,
(:ceil , FloatingTypes , LLVM.ceil) ,
# (:rint , FloatingTypes , LLVM) ,
# (:nearbyint , FloatingTypes , LLVM) ,
(:round , FloatingTypes , LLVM.round) ,

# (:bitreverse , IntegerTypes , LLVM.bitreverse) ,
(:bswap , IntegerTypes , LLVM.bswap) ,
(:count_ones , IntegerTypes , LLVM.ctpop) ,
(:leading_zeros , IntegerTypes , LLVM.ctlz) ,
(:trailing_zeros , IntegerTypes , LLVM.cttz) ,
(:~ , IntegerTypes , LLVM.xor)
(:sqrt , FloatingTypes , Intrinsics.sqrt) ,
(:sin , FloatingTypes , Intrinsics.sin) ,
(:trunc , FloatingTypes , Intrinsics.trunc) ,
(:cos , FloatingTypes , Intrinsics.cos) ,
(:exp , FloatingTypes , Intrinsics.exp) ,
(:exp2 , FloatingTypes , Intrinsics.exp2) ,
(:log , FloatingTypes , Intrinsics.log) ,
(:log10 , FloatingTypes , Intrinsics.log10) ,
(:log2 , FloatingTypes , Intrinsics.log2) ,
(:abs , FloatingTypes , Intrinsics.fabs) ,
(:floor , FloatingTypes , Intrinsics.floor) ,
(:ceil , FloatingTypes , Intrinsics.ceil) ,
# (:rint , FloatingTypes , Intrinsics) ,
# (:nearbyint , FloatingTypes , Intrinsics) ,
(:round , FloatingTypes , Intrinsics.round) ,

# (:bitreverse , IntegerTypes , Intrinsics.bitreverse) ,
(:bswap , IntegerTypes , Intrinsics.bswap) ,
(:count_ones , IntegerTypes , Intrinsics.ctpop) ,
(:leading_zeros , IntegerTypes , Intrinsics.ctlz) ,
(:trailing_zeros , IntegerTypes , Intrinsics.cttz) ,
]

for (op, constraint, llvmop) in UNARY_OPS
Expand All @@ -137,7 +136,9 @@ end

Base.:+(v::Vec) = v
Base.:-(v::Vec{<:Any, <:IntegerTypes}) = zero(v) - v
Base.:-(v::Vec{<:Any, <:FloatingTypes}) = Vec(LLVM.fneg(v.data))
Base.:-(v::Vec{<:Any, <:FloatingTypes}) = Vec(Intrinsics.fneg(v.data))
Base.:~(v::Vec{N, T}) where {N, T<:IntegerTypes} = Vec(Intrinsics.xor(v.data, Vec{N, T}(-1).data))
Base.:~(v::Vec{N, Bool}) where {N} = Vec(Intrinsics.xor(v.data, Vec{N, Bool}(true).data))
Base.abs(v::Vec{N, T}) where {N, T} = Vec(vifelse(v < zero(T), -v, v))
Base.:!(v1::Vec{N,Bool}) where {N} = ~v1
Base.inv(v::Vec{N, T}) where {N, T<:FloatingTypes} = one(T) / v
Expand Down Expand Up @@ -172,46 +173,46 @@ end
####################

const BINARY_OPS = [
(:+ , IntegerTypes , LLVM.add)
(:- , IntegerTypes , LLVM.sub)
(:* , IntegerTypes , LLVM.mul)
(:div , UIntTypes , LLVM.udiv)
(:div , IntTypes , LLVM.sdiv)
(:rem , UIntTypes , LLVM.urem)
(:rem , IntTypes , LLVM.srem)

(:+ , FloatingTypes , LLVM.fadd)
(:- , FloatingTypes , LLVM.fsub)
(:* , FloatingTypes , LLVM.fmul)
(:^ , FloatingTypes , LLVM.pow)
(:/ , FloatingTypes , LLVM.fdiv)
(:rem , FloatingTypes , LLVM.frem)
(:min , FloatingTypes , LLVM.minnum)
(:max , FloatingTypes , LLVM.maxnum)
(:copysign , FloatingTypes , LLVM.copysign)

(:~ , IntegerTypes , LLVM.xor)
(:& , IntegerTypes , LLVM.and)
(:| , IntegerTypes , LLVM.or)
(: , IntegerTypes , LLVM.xor)

(:(==) , IntegerTypes , LLVM.icmp_eq)
(:(!=) , IntegerTypes , LLVM.icmp_ne)
(:(>) , IntTypes , LLVM.icmp_sgt)
(:(>=) , IntTypes , LLVM.icmp_sge)
(:(<) , IntTypes , LLVM.icmp_slt)
(:(<=) , IntTypes , LLVM.icmp_sle)
(:(>) , UIntTypes , LLVM.icmp_ugt)
(:(>=) , UIntTypes , LLVM.icmp_uge)
(:(<) , UIntTypes , LLVM.icmp_ult)
(:(<=) , UIntTypes , LLVM.icmp_ule)

(:(==) , FloatingTypes , LLVM.fcmp_oeq)
(:(!=) , FloatingTypes , LLVM.fcmp_une)
(:(>) , FloatingTypes , LLVM.fcmp_ogt)
(:(>=) , FloatingTypes , LLVM.fcmp_oge)
(:(<) , FloatingTypes , LLVM.fcmp_olt)
(:(<=) , FloatingTypes , LLVM.fcmp_ole)
(:+ , IntegerTypes , Intrinsics.add)
(:- , IntegerTypes , Intrinsics.sub)
(:* , IntegerTypes , Intrinsics.mul)
(:div , UIntTypes , Intrinsics.udiv)
(:div , IntTypes , Intrinsics.sdiv)
(:rem , UIntTypes , Intrinsics.urem)
(:rem , IntTypes , Intrinsics.srem)

(:+ , FloatingTypes , Intrinsics.fadd)
(:- , FloatingTypes , Intrinsics.fsub)
(:* , FloatingTypes , Intrinsics.fmul)
(:^ , FloatingTypes , Intrinsics.pow)
(:/ , FloatingTypes , Intrinsics.fdiv)
(:rem , FloatingTypes , Intrinsics.frem)
(:min , FloatingTypes , Intrinsics.minnum)
(:max , FloatingTypes , Intrinsics.maxnum)
(:copysign , FloatingTypes , Intrinsics.copysign)

(:~ , IntegerTypes , Intrinsics.xor)
(:& , IntegerTypes , Intrinsics.and)
(:| , IntegerTypes , Intrinsics.or)
(: , IntegerTypes , Intrinsics.xor)

(:(==) , IntegerTypes , Intrinsics.icmp_eq)
(:(!=) , IntegerTypes , Intrinsics.icmp_ne)
(:(>) , IntTypes , Intrinsics.icmp_sgt)
(:(>=) , IntTypes , Intrinsics.icmp_sge)
(:(<) , IntTypes , Intrinsics.icmp_slt)
(:(<=) , IntTypes , Intrinsics.icmp_sle)
(:(>) , UIntTypes , Intrinsics.icmp_ugt)
(:(>=) , UIntTypes , Intrinsics.icmp_uge)
(:(<) , UIntTypes , Intrinsics.icmp_ult)
(:(<=) , UIntTypes , Intrinsics.icmp_ule)

(:(==) , FloatingTypes , Intrinsics.fcmp_oeq)
(:(!=) , FloatingTypes , Intrinsics.fcmp_une)
(:(>) , FloatingTypes , Intrinsics.fcmp_ogt)
(:(>=) , FloatingTypes , Intrinsics.fcmp_oge)
(:(<) , FloatingTypes , Intrinsics.fcmp_olt)
(:(<=) , FloatingTypes , Intrinsics.fcmp_ole)
]

for (op, constraint, llvmop) in BINARY_OPS
Expand All @@ -228,7 +229,7 @@ end

# Pow
@inline Base.:^(x::Vec{N,T}, y::IntegerTypes) where {N,T<:FloatingTypes} =
Vec(LLVM.powi(x.data, y))
Vec(Intrinsics.powi(x.data, y))
# Do what Base does for HWNumber:
@inline Base.literal_pow(::typeof(^), x::Vec, ::Val{0}) = one(typeof(x))
@inline Base.literal_pow(::typeof(^), x::Vec, ::Val{1}) = x
Expand Down Expand Up @@ -266,19 +267,19 @@ end
@inline function shl_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes}
vifelse(y > sizeof(T1) * 8,
zero(Vec{N, T1}),
Vec(LLVM.shl(x.data, convert(Vec{N,T1}, y).data)))
Vec(Intrinsics.shl(x.data, convert(Vec{N,T1}, y).data)))
end

@inline function lshr_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes}
vifelse(y > sizeof(T1) * 8,
zero(Vec{N, T1}),
Vec(LLVM.lshr(x.data, convert(Vec{N,T1}, y).data)))
Vec(Intrinsics.lshr(x.data, convert(Vec{N,T1}, y).data)))
end

@inline function ashr_int(x::Vec{N, T1}, y::Vec{N, T2}) where {N, T1<:IntegerTypes, T2<:IntegerTypes}
vifelse(y > sizeof(T1) * 8,
Vec(LLVM.ashr(x.data, Vec{N,T1}(sizeof(T1)*8-1).data)),
Vec(LLVM.ashr(x.data, Vec{N,T1}(y).data)))
Vec(Intrinsics.ashr(x.data, Vec{N,T1}(sizeof(T1)*8-1).data)),
Vec(Intrinsics.ashr(x.data, Vec{N,T1}(y).data)))
end

# See https://github.com/JuliaLang/julia/blob/a211abcdfacc05cb93c15774a59ce8961c16dac4/base/int.jl#L422-L435
Expand Down Expand Up @@ -341,12 +342,12 @@ end

@inline vifelse(v::Bool, v1::T, v2::T) where {T} = ifelse(v, v1, v2)
@inline vifelse(v::Vec{N, Bool}, v1::Vec{N, T}, v2::Vec{N, T}) where {N, T} =
Vec(LLVM.select(v.data, v1.data, v2.data))
Vec(Intrinsics.select(v.data, v1.data, v2.data))
@inline vifelse(v::Vec{N, Bool}, v1::T2, v2::Vec{N, T}) where {N, T, T2 <:ScalarTypes} = vifelse(v, Vec{N, T}(v1), v2)
@inline vifelse(v::Vec{N, Bool}, v1::Vec{N, T}, v2::T2) where {N, T, T2 <:ScalarTypes} = vifelse(v, v1, Vec{N, T}(v2))

# fma, muladd and vectorization of these
for (op, llvmop) in [(:fma, LLVM.fma), (:muladd, LLVM.fmuladd)]
for (op, llvmop) in [(:fma, Intrinsics.fma), (:muladd, Intrinsics.fmuladd)]
@eval begin
@inline Base.$op(a::Vec{N, T}, b::Vec{N, T}, c::Vec{N, T}) where {N,T<:FloatingTypes} =
Vec($llvmop(a.data, b.data, c.data))
Expand All @@ -370,18 +371,18 @@ end
# Reductions #
##############
const HORZ_REDUCTION_OPS = [
(& , IntegerTypes , LLVM.reduce_and)
(| , IntegerTypes , LLVM.reduce_or)
(max , IntTypes , LLVM.reduce_smax)
(max , UIntTypes , LLVM.reduce_umax)
(max , FloatingTypes , LLVM.reduce_fmax)
(min , IntTypes , LLVM.reduce_smin)
(min , UIntTypes , LLVM.reduce_umin)
(min , FloatingTypes , LLVM.reduce_fmin)
(+ , IntegerTypes , LLVM.reduce_add)
(* , IntegerTypes , LLVM.reduce_mul)
(+ , FloatingTypes , LLVM.reduce_fadd)
(* , FloatingTypes , LLVM.reduce_fmul)
(& , IntegerTypes , Intrinsics.reduce_and)
(| , IntegerTypes , Intrinsics.reduce_or)
(max , IntTypes , Intrinsics.reduce_smax)
(max , UIntTypes , Intrinsics.reduce_umax)
(max , FloatingTypes , Intrinsics.reduce_fmax)
(min , IntTypes , Intrinsics.reduce_smin)
(min , UIntTypes , Intrinsics.reduce_umin)
(min , FloatingTypes , Intrinsics.reduce_fmin)
(+ , IntegerTypes , Intrinsics.reduce_add)
(* , IntegerTypes , Intrinsics.reduce_mul)
(+ , FloatingTypes , Intrinsics.reduce_fadd)
(* , FloatingTypes , Intrinsics.reduce_fmul)
]

for (op, constraint, llvmop) in HORZ_REDUCTION_OPS
Expand All @@ -402,8 +403,8 @@ Base.reduce(F::Any, v::Vec) = error("reduction not defined for SIMD.Vec on $F")
############

@inline function shufflevector(x::Vec{N, T}, ::Val{I}) where {N, T, I}
Vec(LLVM.shufflevector(x.data, Val(I)))
Vec(Intrinsics.shufflevector(x.data, Val(I)))
end
@inline function shufflevector(x::Vec{N, T}, y::Vec{N, T}, ::Val{I}) where {N, T, I}
Vec(LLVM.shufflevector(x.data, y.data, Val(I)))
Vec(Intrinsics.shufflevector(x.data, y.data, Val(I)))
end

0 comments on commit badbe6e

Please sign in to comment.