diff --git a/NEWS.md b/NEWS.md index e34b72f9f435f..daf8c5b36dc31 100644 --- a/NEWS.md +++ b/NEWS.md @@ -233,6 +233,9 @@ This section lists changes that do not have deprecation warnings. * All command line arguments passed via `-e`, `-E`, and `-L` will be executed in the order given on the command line ([#23665]). + * The return type of `reinterpret` has changed to `ReinterpretArray`. `reinterpret` on sparse + arrays has been discontinued. + Library improvements -------------------- @@ -300,6 +303,10 @@ Library improvements * New function `equalto(x)`, which returns a function that compares its argument to `x` using `isequal` ([#23812]). + * `reinterpret` now works on any AbstractArray using the new `ReinterpretArray` type. + This supersedes the old behavior of reinterpret on Arrays. As a result, reinterpreting + arrays with different alignment requirements (removed in 0.6) is once again allowed ([#23750]). + Compiler/Runtime improvements ----------------------------- @@ -498,6 +505,11 @@ Deprecated or removed * `find` functions now operate only on booleans by default. To look for non-zeros, use `x->x!=0` or `!iszero` ([#23120]). + * The ability of `reinterpret` to yield `Array`s of different type than the underlying storage + has been removed. The `reinterpret` function is still available, but now returns a + `ReinterpretArray`. The three argument form of `reinterpret` that implicitly reshapes + has been deprecated ([#23750]). + Command-line option changes --------------------------- diff --git a/base/array.jl b/base/array.jl index 91e9f800f8cb4..b8ebcb909ec91 100644 --- a/base/array.jl +++ b/base/array.jl @@ -218,33 +218,6 @@ original. """ copy(a::T) where {T<:Array} = ccall(:jl_array_copy, Ref{T}, (Any,), a) -function reinterpret(::Type{T}, a::Array{S,1}) where T where S - nel = Int(div(length(a) * sizeof(S), sizeof(T))) - # TODO: maybe check that remainder is zero? - return reinterpret(T, a, (nel,)) -end - -function reinterpret(::Type{T}, a::Array{S}) where T where S - if sizeof(S) != sizeof(T) - throw(ArgumentError("result shape not specified")) - end - reinterpret(T, a, size(a)) -end - -function reinterpret(::Type{T}, a::Array{S}, dims::NTuple{N,Int}) where T where S where N - function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U} - @_noinline_meta - throw(ArgumentError("cannot reinterpret Array{$(S)} to ::Type{Array{$(T)}}, type $(U) is not a bits type")) - end - isbits(T) || throwbits(S, T, T) - isbits(S) || throwbits(S, T, S) - nel = div(length(a) * sizeof(S), sizeof(T)) - if prod(dims) != nel - _throw_dmrsa(dims, nel) - end - ccall(:jl_reshape_array, Array{T,N}, (Any, Any, Any), Array{T,N}, a, dims) -end - # reshaping to same # of dimensions function reshape(a::Array{T,N}, dims::NTuple{N,Int}) where T where N if prod(dims) != length(a) diff --git a/base/deprecated.jl b/base/deprecated.jl index 552d7f45d2343..cb55ef5edbf62 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -1871,6 +1871,10 @@ end # also remove deprecation warnings in find* functions in array.jl, sparse/sparsematrix.jl, # and sparse/sparsevector.jl. +# issue #22849 +@deprecate reinterpret(::Type{T}, a::Array{S}, dims::NTuple{N,Int}) where {T, S, N} reshape(reinterpret(T, vec(a)), dims) +@deprecate reinterpret(::Type{T}, a::SparseMatrixCSC{S}, dims::NTuple{N,Int}) where {T, S, N} reinterpret(T, reshape(a, dims)) + # END 0.7 deprecations # BEGIN 1.0 deprecations diff --git a/base/essentials.jl b/base/essentials.jl index 450058d81322c..d58bdf5a9c87c 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -321,20 +321,12 @@ unsafe_convert(::Type{P}, x::Ptr) where {P<:Ptr} = convert(P, x) reinterpret(type, A) Change the type-interpretation of a block of memory. -For arrays, this constructs an array with the same binary data as the given +For arrays, this constructs a view of the array with the same binary data as the given array, but with the specified element type. For example, `reinterpret(Float32, UInt32(7))` interprets the 4 bytes corresponding to `UInt32(7)` as a [`Float32`](@ref). -!!! warning - - It is not allowed to `reinterpret` an array to an element type with a larger alignment then - the alignment of the array. For a normal `Array`, this is the alignment of its element type. - For a reinterpreted array, this is the alignment of the `Array` it was reinterpreted from. - For example, `reinterpret(UInt32, UInt8[0, 0, 0, 0])` is not allowed but - `reinterpret(UInt32, reinterpret(UInt8, Float32[1.0]))` is allowed. - # Examples ```jldoctest julia> reinterpret(Float32, UInt32(7)) diff --git a/base/inference.jl b/base/inference.jl index e4dce78d1af54..775c3baa9bd52 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -504,6 +504,8 @@ add_tfunc(sdiv_int, 2, 2, math_tfunc, 30) add_tfunc(udiv_int, 2, 2, math_tfunc, 30) add_tfunc(srem_int, 2, 2, math_tfunc, 30) add_tfunc(urem_int, 2, 2, math_tfunc, 30) +add_tfunc(add_ptr, 2, 2, math_tfunc, 1) +add_tfunc(sub_ptr, 2, 2, math_tfunc, 1) add_tfunc(neg_float, 1, 1, math_tfunc, 1) add_tfunc(add_float, 2, 2, math_tfunc, 1) add_tfunc(sub_float, 2, 2, math_tfunc, 1) diff --git a/base/io.jl b/base/io.jl index 4b3a74e30e264..f64273602ebba 100644 --- a/base/io.jl +++ b/base/io.jl @@ -267,15 +267,16 @@ readlines(s=STDIN; chomp::Bool=true) = collect(eachline(s, chomp=chomp)) ## byte-order mark, ntoh & hton ## -let endian_boms = reinterpret(UInt8, UInt32[0x01020304]) +let a = UInt32[0x01020304] + endian_bom = @gc_preserve a unsafe_load(convert(Ptr{UInt8}, pointer(a))) global ntoh, hton, ltoh, htol - if endian_boms == UInt8[1:4;] + if endian_bom == 0x01 ntoh(x) = x hton(x) = x ltoh(x) = bswap(x) htol(x) = bswap(x) const global ENDIAN_BOM = 0x01020304 - elseif endian_boms == UInt8[4:-1:1;] + elseif endian_bom == 0x04 ntoh(x) = bswap(x) hton(x) = bswap(x) ltoh(x) = x diff --git a/base/linalg/factorization.jl b/base/linalg/factorization.jl index 9acaef101ccaa..6ab24cfc42fa9 100644 --- a/base/linalg/factorization.jl +++ b/base/linalg/factorization.jl @@ -56,9 +56,9 @@ Base.isequal(F::T, G::T) where {T<:Factorization} = all(f -> isequal(getfield(F, # With a real lhs and complex rhs with the same precision, we can reinterpret # the complex rhs as a real rhs with twice the number of columns function (\)(F::Factorization{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal - c2r = reshape(transpose(reinterpret(T, B, (2, length(B)))), size(B, 1), 2*size(B, 2)) + c2r = reshape(transpose(reinterpret(T, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2)) x = A_ldiv_B!(F, c2r) - return reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)), _ret_size(F, B)) + return reshape(collect(reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)))), _ret_size(F, B)) end for (f1, f2) in ((:\, :A_ldiv_B!), diff --git a/base/linalg/lq.jl b/base/linalg/lq.jl index b878468617cf8..356b3e99b9228 100644 --- a/base/linalg/lq.jl +++ b/base/linalg/lq.jl @@ -267,10 +267,10 @@ end # With a real lhs and complex rhs with the same precision, we can reinterpret # the complex rhs as a real rhs with twice the number of columns function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal - c2r = reshape(transpose(reinterpret(T, B, (2, length(B)))), size(B, 1), 2*size(B, 2)) + c2r = reshape(transpose(reinterpret(T, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2)) x = A_ldiv_B!(F, c2r) - return reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)), - isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2))) + return reshape(collect(reinterpret(Complex{T}, transpose(reshape(x, div(length(x), 2), 2)))), + isa(B, AbstractVector) ? (size(F,2),) : (size(F,2), size(B,2))) end diff --git a/base/linalg/matmul.jl b/base/linalg/matmul.jl index 0ca518a58fb41..88885be32dcf5 100644 --- a/base/linalg/matmul.jl +++ b/base/linalg/matmul.jl @@ -90,7 +90,7 @@ A_mul_B!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T}) where for elty in (Float32,Float64) @eval begin function A_mul_B!(y::StridedVector{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, x::StridedVector{$elty}) - Afl = reinterpret($elty,A,(2size(A,1),size(A,2))) + Afl = reinterpret($elty,A) yfl = reinterpret($elty,y) gemv!(yfl,'N',Afl,x) return y @@ -148,8 +148,8 @@ A_mul_B!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}) wher for elty in (Float32,Float64) @eval begin function A_mul_B!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, B::StridedVecOrMat{$elty}) - Afl = reinterpret($elty, A, (2size(A,1), size(A,2))) - Cfl = reinterpret($elty, C, (2size(C,1), size(C,2))) + Afl = reinterpret($elty, A) + Cfl = reinterpret($elty, C) gemm_wrapper!(Cfl, 'N', 'N', Afl, B) return C end @@ -190,8 +190,8 @@ A_mul_Bt!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T}) whe for elty in (Float32,Float64) @eval begin function A_mul_Bt!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, B::StridedVecOrMat{$elty}) - Afl = reinterpret($elty, A, (2size(A,1), size(A,2))) - Cfl = reinterpret($elty, C, (2size(C,1), size(C,2))) + Afl = reinterpret($elty, A) + Cfl = reinterpret($elty, C) gemm_wrapper!(Cfl, 'N', 'T', Afl, B) return C end diff --git a/base/linalg/qr.jl b/base/linalg/qr.jl index 859ccd659bed9..0090880dbe027 100644 --- a/base/linalg/qr.jl +++ b/base/linalg/qr.jl @@ -923,7 +923,7 @@ function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{ # |z2|z4| -> |y1|y2|y3|y4| -> |x2|y2| -> |x2|y2|x4|y4| # |x3|y3| # |x4|y4| - B = reshape(transpose(reinterpret(T, BIn, (2, length(BIn)))), size(BIn, 1), 2*size(BIn, 2)) + B = reshape(transpose(reinterpret(T, reshape(BIn, (1, length(BIn))))), size(BIn, 1), 2*size(BIn, 2)) X = A_ldiv_B!(A, _append_zeros(B, T, n)) @@ -931,7 +931,7 @@ function (\)(A::Union{QR{T},QRCompactWY{T},QRPivoted{T}}, BIn::VecOrMat{Complex{ # |z2|z4| <- |y1|y2|y3|y4| <- |x2|y2| <- |x2|y2|x4|y4| # |x3|y3| # |x4|y4| - XX = reinterpret(Complex{T}, transpose(reshape(X, div(length(X), 2), 2)), _ret_size(A, BIn)) + XX = reshape(collect(reinterpret(Complex{T}, transpose(reshape(X, div(length(X), 2), 2)))), _ret_size(A, BIn)) return _cut_B(XX, 1:n) end diff --git a/base/pointer.jl b/base/pointer.jl index b2197d21db8c0..2daa2e4a4408a 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -147,8 +147,8 @@ eltype(::Type{Ptr{T}}) where {T} = T isless(x::Ptr, y::Ptr) = isless(UInt(x), UInt(y)) -(x::Ptr, y::Ptr) = UInt(x) - UInt(y) -+(x::Ptr, y::Integer) = oftype(x, (UInt(x) + (y % UInt) % UInt)) --(x::Ptr, y::Integer) = oftype(x, (UInt(x) - (y % UInt) % UInt)) ++(x::Ptr, y::Integer) = oftype(x, Intrinsics.add_ptr(UInt(x), (y % UInt) % UInt)) +-(x::Ptr, y::Integer) = oftype(x, Intrinsics.sub_ptr(UInt(x), (y % UInt) % UInt)) +(x::Integer, y::Ptr) = y + x """ diff --git a/base/random/dSFMT.jl b/base/random/dSFMT.jl index 2061cc54f9741..d4ae974dbe9fc 100644 --- a/base/random/dSFMT.jl +++ b/base/random/dSFMT.jl @@ -104,7 +104,8 @@ function dsfmt_jump(s::DSFMT_state, jp::AbstractString) val = s.val nval = length(val) index = val[nval - 1] - work = zeros(UInt64, JN32 >> 1) + work = zeros(Int32, JN32) + rwork = reinterpret(UInt64, work) dsfmt = Vector{UInt64}(nval >> 1) ccall(:memcpy, Ptr{Void}, (Ptr{UInt64}, Ptr{Int32}, Csize_t), dsfmt, val, (nval - 1) * sizeof(Int32)) @@ -113,17 +114,17 @@ function dsfmt_jump(s::DSFMT_state, jp::AbstractString) for c in jp bits = parse(UInt8,c,16) for j in 1:4 - (bits & 0x01) != 0x00 && dsfmt_jump_add!(work, dsfmt) + (bits & 0x01) != 0x00 && dsfmt_jump_add!(rwork, dsfmt) bits = bits >> 0x01 dsfmt_jump_next_state!(dsfmt) end end - work[end] = index - return DSFMT_state(reinterpret(Int32, work)) + rwork[end] = index + return DSFMT_state(work) end -function dsfmt_jump_add!(dest::Vector{UInt64}, src::Vector{UInt64}) +function dsfmt_jump_add!(dest::AbstractVector{UInt64}, src::Vector{UInt64}) dp = dest[end] >> 1 sp = src[end] >> 1 diff = ((sp - dp + N) % N) diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl new file mode 100644 index 0000000000000..688b67308e57f --- /dev/null +++ b/base/reinterpretarray.jl @@ -0,0 +1,134 @@ +""" +Gives a reinterpreted view (of element type T) of the underlying array (of element type S). +If the size of `T` differs from the size of `S`, the array will be compressed/expanded in +the first dimension. +""" +struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N} + parent::A + function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}} + function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U} + @_noinline_meta + throw(ArgumentError("cannot reinterpret `$(S)` `$(T)`, type `$(U)` is not a bits type")) + end + function throwsize0(::Type{S}, ::Type{T}) + @_noinline_meta + throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a different size")) + end + function thrownonint(::Type{S}, ::Type{T}, dim) + @_noinline_meta + throw(ArgumentError(""" + cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`. + The resulting array would have non-integral first dimension. + """)) + end + isbits(T) || throwbits(S, T, T) + isbits(S) || throwbits(S, T, S) + (N != 0 || sizeof(T) == sizeof(S)) || throwsize0(S, T) + if N != 0 && sizeof(S) != sizeof(T) + dim = size(a)[1] + rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim) + end + new{T, N, S, A}(a) + end +end + +parent(a::ReinterpretArray) = a.parent + +eltype(a::ReinterpretArray{T}) where {T} = T +function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S} + psize = size(a.parent) + size1 = div(psize[1]*sizeof(S), sizeof(T)) + tuple(size1, tail(psize)...) +end + +unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent)) + +@inline @propagate_inbounds getindex(a::ReinterpretArray{T,0}) where {T} = reinterpret(T, a.parent[]) +@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1] + +@inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S} + # Make sure to match the scalar reinterpret if that is applicable + if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 + return reinterpret(T, a.parent[inds...]) + else + ind_start, sidx = divrem((inds[1]-1)*sizeof(T), sizeof(S)) + t = Ref{T}() + s = Ref{S}() + @gc_preserve t s begin + tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t)) + sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s)) + i = 1 + nbytes_copied = 0 + # This is a bit complicated to deal with partial elements + # at both the start and the end. LLVM will fold as appropriate, + # once it knows the data layout + while nbytes_copied < sizeof(T) + s[] = a.parent[ind_start + i, tail(inds)...] + while nbytes_copied < sizeof(T) && sidx < sizeof(S) + unsafe_store!(tptr, unsafe_load(sptr, sidx + 1), nbytes_copied + 1) + sidx += 1 + nbytes_copied += 1 + end + sidx = 0 + i += 1 + end + end + return t[] + end +end + +@inline @propagate_inbounds setindex!(a::ReinterpretArray{T,0,S} where T, v) where {S} = (a.parent[] = reinterpret(S, v)) +@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v) + +@inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S} + v = convert(T, v)::T + # Make sure to match the scalar reinterpret if that is applicable + if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0 + return setindex!(a.parent, reinterpret(S, v), inds...) + else + ind_start, sidx = divrem((inds[1]-1)*sizeof(T), sizeof(S)) + t = Ref{T}(v) + s = Ref{S}() + @gc_preserve t s begin + tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t)) + sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s)) + nbytes_copied = 0 + i = 1 + # Deal with any partial elements at the start. We'll have to copy in the + # element from the original array and overwrite the relevant parts + if sidx != 0 + s[] = a.parent[ind_start + i, tail(inds)...] + while nbytes_copied < sizeof(T) && sidx < sizeof(S) + unsafe_store!(sptr, unsafe_load(tptr, nbytes_copied + 1), sidx + 1) + sidx += 1 + nbytes_copied += 1 + end + a.parent[ind_start + i, tail(inds)...] = s[] + i += 1 + sidx = 0 + end + # Deal with the main body of elements + while nbytes_copied < sizeof(T) && (sizeof(T) - nbytes_copied) > sizeof(S) + while nbytes_copied < sizeof(T) && sidx < sizeof(S) + unsafe_store!(sptr, unsafe_load(tptr, nbytes_copied + 1), sidx + 1) + sidx += 1 + nbytes_copied += 1 + end + a.parent[ind_start + i, tail(inds)...] = s[] + i += 1 + sidx = 0 + end + # Deal with trailing partial elements + if nbytes_copied < sizeof(T) + s[] = a.parent[ind_start + i, tail(inds)...] + while nbytes_copied < sizeof(T) && sidx < sizeof(S) + unsafe_store!(sptr, unsafe_load(tptr, nbytes_copied + 1), sidx + 1) + sidx += 1 + nbytes_copied += 1 + end + a.parent[ind_start + i, tail(inds)...] = s[] + end + end + end + return a +end diff --git a/base/show.jl b/base/show.jl index ce9f3eb17dff4..65c91412ebd7b 100644 --- a/base/show.jl +++ b/base/show.jl @@ -1888,6 +1888,12 @@ function showarg(io::IO, r::ReshapedArray, toplevel) toplevel && print(io, " with eltype ", eltype(r)) end +function showarg(io::IO, r::ReinterpretArray{T}, toplevel) where {T} + print(io, "reinterpret($T, ") + showarg(io, parent(r), false) + print(io, ')') +end + # n-dimensional arrays function show_nd(io::IO, a::AbstractArray, print_matrix, label_slices) limit::Bool = get(io, :limit, false) diff --git a/base/sparse/abstractsparse.jl b/base/sparse/abstractsparse.jl index e17d3be97dbcb..8c84f33b7e18c 100644 --- a/base/sparse/abstractsparse.jl +++ b/base/sparse/abstractsparse.jl @@ -21,3 +21,10 @@ issparse(S::UpperTriangular{<:Any,<:AbstractSparseMatrix}) = true issparse(S::LinAlg.UnitUpperTriangular{<:Any,<:AbstractSparseMatrix}) = true indtype(S::AbstractSparseArray{<:Any,Ti}) where {Ti} = Ti + +function Base.reinterpret(::Type, A::AbstractSparseArray) + error(""" + `reinterpret` on sparse arrays is discontinued. + Try reinterpreting the value itself instead. + """) +end diff --git a/base/sparse/sparse.jl b/base/sparse/sparse.jl index abe6289b18070..45f1dfc0d556a 100644 --- a/base/sparse/sparse.jl +++ b/base/sparse/sparse.jl @@ -22,7 +22,7 @@ import Base: @get!, acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh, broadcast, ceil, complex, cond, conj, convert, copy, copy!, adjoint, diagm, exp, expm1, factorize, find, findmax, findmin, findnz, float, full, getindex, vcat, hcat, hvcat, cat, imag, indmax, ishermitian, kron, length, log, log1p, max, min, - maximum, minimum, norm, one, promote_eltype, real, reinterpret, reshape, rot180, + maximum, minimum, norm, one, promote_eltype, real, reshape, rot180, rotl90, rotr90, round, scale!, setindex!, similar, size, transpose, tril, triu, vec, permute!, map, map! diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index 09255dacdaefa..589047a9fe893 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -210,18 +210,7 @@ function Base.show(io::IOContext, S::SparseMatrixCSC) end end -## Reinterpret and Reshape - -function reinterpret(::Type{T}, a::SparseMatrixCSC{Tv}) where {T,Tv} - if sizeof(T) != sizeof(Tv) - throw(ArgumentError("SparseMatrixCSC reinterpret is only supported for element types of the same size")) - end - mA, nA = size(a) - colptr = copy(a.colptr) - rowval = copy(a.rowval) - nzval = reinterpret(T, a.nzval) - return SparseMatrixCSC(mA, nA, colptr, rowval, nzval) -end +## Reshape function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS::Vector{Ti}, mS::Int, nS::Int, colptrA::Vector{Ti}, @@ -257,25 +246,6 @@ function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS: end end -function reinterpret(::Type{T}, a::SparseMatrixCSC{Tv,Ti}, dims::NTuple{N,Int}) where {T,Tv,Ti,N} - if sizeof(T) != sizeof(Tv) - throw(ArgumentError("SparseMatrixCSC reinterpret is only supported for element types of the same size")) - end - if prod(dims) != length(a) - throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(a))")) - end - mS,nS = dims - mA,nA = size(a) - numnz = nnz(a) - colptr = Vector{Ti}(nS+1) - rowval = similar(a.rowval) - nzval = reinterpret(T, a.nzval) - - sparse_compute_reshaped_colptr_and_rowval(colptr, rowval, mS, nS, a.colptr, a.rowval, mA, nA) - - return SparseMatrixCSC(mS, nS, colptr, rowval, nzval) -end - function copy(ra::ReshapedArray{<:Any,2,<:SparseMatrixCSC}) mS,nS = size(ra) a = parent(ra) diff --git a/base/sparse/spqr.jl b/base/sparse/spqr.jl index d752712027760..e1bf8119ec133 100644 --- a/base/sparse/spqr.jl +++ b/base/sparse/spqr.jl @@ -341,14 +341,14 @@ function (\)(F::QRSparse{Float64}, B::VecOrMat{Complex{Float64}}) # |z2|z4| -> |y1|y2|y3|y4| -> |x2|y2| -> |x2|y2|x4|y4| # |x3|y3| # |x4|y4| - c2r = reshape(transpose(reinterpret(Float64, B, (2, length(B)))), size(B, 1), 2*size(B, 2)) + c2r = reshape(transpose(reinterpret(Float64, reshape(B, (1, length(B))))), size(B, 1), 2*size(B, 2)) x = F\c2r # |z1|z3| reinterpret |x1|x2|x3|x4| transpose |x1|y1| reshape |x1|y1|x3|y3| # |z2|z4| <- |y1|y2|y3|y4| <- |x2|y2| <- |x2|y2|x4|y4| # |x3|y3| # |x4|y4| - return reinterpret(Complex{Float64}, transpose(reshape(x, (length(x) >> 1), 2)), _ret_size(F, B)) + return collect(reshape(reinterpret(Complex{Float64}, transpose(reshape(x, (length(x) >> 1), 2))), _ret_size(F, B))) end function _ldiv_basic(F::QRSparse, B::StridedVecOrMat) diff --git a/base/sysimg.jl b/base/sysimg.jl index 7609ab098fad2..15307bee62fff 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -121,6 +121,7 @@ include("indices.jl") include("array.jl") include("abstractarray.jl") include("subarray.jl") +include("reinterpretarray.jl") # Array convenience converting constructors Array{T}(m::Integer) where {T} = Array{T,1}(Int(m)) @@ -182,15 +183,16 @@ using .Iterators: Flatten, product # for generators # Definition of StridedArray StridedReshapedArray{T,N,A<:Union{DenseArray,FastContiguousSubArray}} = ReshapedArray{T,N,A} +StridedReinterpretArray{T,N,A<:Union{DenseArray,FastContiguousSubArray}} = ReinterpretArray{T,N,S,A} where S StridedArray{T,N,A<:Union{DenseArray,StridedReshapedArray}, I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} = - Union{DenseArray{T,N}, SubArray{T,N,A,I}, StridedReshapedArray{T,N}} + Union{DenseArray{T,N}, SubArray{T,N,A,I}, StridedReshapedArray{T,N}, StridedReinterpretArray{T,N,A}} StridedVector{T,A<:Union{DenseArray,StridedReshapedArray}, I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} = - Union{DenseArray{T,1}, SubArray{T,1,A,I}, StridedReshapedArray{T,1}} + Union{DenseArray{T,1}, SubArray{T,1,A,I}, StridedReshapedArray{T,1}, StridedReinterpretArray{T,1,A}} StridedMatrix{T,A<:Union{DenseArray,StridedReshapedArray}, I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} = - Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}} + Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}, StridedReinterpretArray{T,2,A}} StridedVecOrMat{T} = Union{StridedVector{T}, StridedMatrix{T}} # For OS specific stuff diff --git a/deps/llvm.mk b/deps/llvm.mk index 7f44868aba359..07b77b15298e1 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -460,6 +460,7 @@ $(eval $(call LLVM_PATCH,llvm-D32593)) $(eval $(call LLVM_PATCH,llvm-D33179)) $(eval $(call LLVM_PATCH,llvm-PR29010-i386-xmm)) # Remove for 4.0 $(eval $(call LLVM_PATCH,llvm-3.9.0-D37576-NVPTX-sm_70)) # NVPTX, Remove for 6.0 +$(eval $(call LLVM_PATCH,llvm-D37939-Mem2Reg-Also-handle-memcpy)) else ifeq ($(LLVM_VER_SHORT),4.0) # Cygwin and openSUSE still use win32-threads mingw, https://llvm.org/bugs/show_bug.cgi?id=26365 $(eval $(call LLVM_PATCH,llvm-4.0.0_threads)) diff --git a/deps/patches/llvm-D37939-Mem2Reg-Also-handle-memcpy.patch b/deps/patches/llvm-D37939-Mem2Reg-Also-handle-memcpy.patch new file mode 100644 index 0000000000000..b8753b0439ba0 --- /dev/null +++ b/deps/patches/llvm-D37939-Mem2Reg-Also-handle-memcpy.patch @@ -0,0 +1,365 @@ +From da4504b2d3c6629fbd58634bf76f1b85939d07cf Mon Sep 17 00:00:00 2001 +From: Keno Fischer +Date: Fri, 15 Sep 2017 18:30:59 -0400 +Subject: [PATCH] [Mem2Reg] Also handle memcpy + +Summary: +In julia, when we know we're moving data between two memory locations, +we always emit that as a memcpy rather than a load/store pair. However, +this can give worse optimization results in certain cases because some +optimizations that can handle load/store pairs cannot handle memcpys. +Mem2reg is one of these optimizations. This patch adds rudamentary +support for mem2reg for recognizing memcpys that cover the whole alloca +we're promoting. While several more sophisticated passes (SROA, GVN) +can get similar optimizations, it is preferable to have these kinds +of cases caught early to expose optimization opportunities before +getting to these later passes. The approach taken here is to split +the memcpy into a load/store pair early (after legality analysis) +and retain the rest of the analysis only on loads/stores. It would +be possible of course to leave the memcpy as is and generate the +left over load or store only on demand. However, that would entail +a significantly larger patch for unclear benefit. + +Reviewers: chandlerc, dberlin + +Subscribers: llvm-commits + +Differential Revision: https://reviews.llvm.org/D37939 +--- + lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 166 ++++++++++++++++++++--- + test/Transforms/Mem2Reg/memcpy.ll | 101 ++++++++++++++ + 2 files changed, 251 insertions(+), 16 deletions(-) + create mode 100644 test/Transforms/Mem2Reg/memcpy.ll + +diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +index ac28f59..b08a0a1 100644 +--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp ++++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +@@ -49,6 +49,58 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); + STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); + STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); + ++static bool isSplittableMemCpy(const MemCpyInst *MCI, const AllocaInst *AI) { ++ // Punt if this alloca is an array allocation ++ if (AI->isArrayAllocation()) ++ return false; ++ if (MCI->isVolatile()) ++ return false; ++ Value *Length = MCI->getLength(); ++ if (!isa(Length)) ++ return false; ++ // Anything less than the full alloca, we leave for SROA ++ const DataLayout &DL = AI->getModule()->getDataLayout(); ++ size_t AIElSize = DL.getTypeAllocSize(AI->getAllocatedType()); ++ if (cast(Length)->getZExtValue() != AIElSize) ++ return false; ++ // If the other argument is also an alloca, we need to be sure that either ++ // the types are bitcastable, or the other alloca is not eligible for ++ // promotion (e.g. because the memcpy is for less than the whole size of ++ // that alloca), otherwise we risk turning an allocatable alloca into a ++ // non-allocatable one when splitting the memcpy. ++ AllocaInst *OtherAI = dyn_cast( ++ AI == MCI->getSource() ? MCI->getDest() : MCI->getSource()); ++ if (OtherAI) { ++ if (!CastInst::isBitCastable(AI->getAllocatedType(), ++ OtherAI->getAllocatedType()) && ++ DL.getTypeAllocSize(OtherAI->getAllocatedType()) == AIElSize) ++ return false; ++ } ++ return true; ++} ++ ++/// Look at the result of a bitcast and see if it's only used by lifetime ++/// intrinsics or splittable memcpys. This is needed, because IRBuilder ++/// will always insert a bitcast to i8* for these intrinsics. ++static bool onlyHasCanonicalizableUsers(const AllocaInst *AI, const Value *V) { ++ for (const User *U : V->users()) { ++ const IntrinsicInst *II = dyn_cast(U); ++ if (!II) ++ return false; ++ ++ if (isa(II)) { ++ if (!isSplittableMemCpy(cast(II), AI)) ++ return false; ++ continue; ++ } ++ ++ if (II->getIntrinsicID() != Intrinsic::lifetime_start && ++ II->getIntrinsicID() != Intrinsic::lifetime_end) ++ return false; ++ } ++ return true; ++} ++ + bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // FIXME: If the memory unit is of pointer or integer type, we can permit + // assignments to subsections of the memory unit. +@@ -68,6 +120,9 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { + // not have any meaning for a local alloca. + if (SI->isVolatile()) + return false; ++ } else if (const MemCpyInst *MCI = dyn_cast(U)) { ++ if (!isSplittableMemCpy(MCI, AI)) ++ return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) +@@ -75,7 +130,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { + } else if (const BitCastInst *BCI = dyn_cast(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + return false; +- if (!onlyUsedByLifetimeMarkers(BCI)) ++ if (!onlyHasCanonicalizableUsers(AI, BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) +@@ -181,7 +235,13 @@ public: + /// This code only looks at accesses to allocas. + static bool isInterestingInstruction(const Instruction *I) { ++ if (isa(I)) { ++ const MemCpyInst *MCI = cast(I); ++ return isa(MCI->getSource()) || ++ isa(MCI->getDest()); ++ } else { + return (isa(I) && isa(I->getOperand(0))) || + (isa(I) && isa(I->getOperand(1))); + } ++ } + + /// Get or calculate the index of the specified instruction. +@@ -208,6 +264,25 @@ public: + return It->second; + } + ++ // When we split a memcpy intrinsic, we need to update the numbering in this ++ // struct. To make sure the relative ordering remains the same, we give both ++ // the LI and the SI the number that the MCI used to have (if they are both ++ // interesting). This means that they will have equal numbers, which usually ++ // can't happen. However, since they can never reference the same alloca ++ // (since memcpy operands may not overlap), this is fine, because we will ++ // never compare instruction indices for instructions that operate on distinct ++ // allocas. ++ void splitMemCpy(MemCpyInst *MCI, LoadInst *LI, StoreInst *SI) { ++ DenseMap::iterator It = ++ InstNumbers.find(MCI); ++ if (It == InstNumbers.end()) ++ return; ++ unsigned MemCpyNumber = It->second; ++ InstNumbers[LI] = MemCpyNumber; ++ InstNumbers[SI] = MemCpyNumber; ++ deleteValue(MCI); ++ } ++ + void deleteValue(const Instruction *I) { InstNumbers.erase(I); } + + void clear() { InstNumbers.clear(); } +@@ -305,9 +380,58 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { + AC->registerAssumption(CI); + } + +-static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { +- // Knowing that this alloca is promotable, we know that it's safe to kill all +- // instructions except for load and store. ++/// Split a memcpy instruction into the corresponding load/store. It is a little ++/// more complicated than one might imagine, because we need to deal with the ++/// fact that the side of the copy we're not currently processing might also ++/// be a promotable alloca. We need to be careful to not break the promotable ++/// predicate for that other alloca (if any). ++static void doMemCpySplit(LargeBlockInfo &LBI, MemCpyInst *MCI, ++ AllocaInst *AI) { ++ AAMDNodes AA; ++ MCI->getAAMetadata(AA); ++ Value *MCISrc = MCI->getSource(); ++ Type *LoadType = AI->getAllocatedType(); ++ AllocaInst *SrcAI = dyn_cast(MCISrc); ++ if (SrcAI && SrcAI->getType() != AI->getType()) { ++ if (CastInst::isBitCastable(SrcAI->getAllocatedType(), LoadType)) ++ LoadType = SrcAI->getAllocatedType(); ++ } ++ if (cast(MCISrc->getType())->getElementType() != LoadType) ++ MCISrc = CastInst::Create( ++ Instruction::BitCast, MCISrc, ++ LoadType->getPointerTo( ++ cast(MCISrc->getType())->getAddressSpace()), ++ "", MCI); ++ // This might add to the end of the use list, but that's fine. At worst, ++ // we'd not visit the instructions we insert here, but we don't care ++ // about them in this loop anyway. ++ LoadInst *LI = new LoadInst(LoadType, MCISrc, "", MCI->isVolatile(), ++ MCI->getAlignment(), MCI); ++ Value *Val = LI; ++ Value *MCIDest = MCI->getDest(); ++ AllocaInst *DestAI = dyn_cast(MCIDest); ++ Type *DestElTy = DestAI ? DestAI->getAllocatedType() : AI->getAllocatedType(); ++ if (LI->getType() != DestElTy && ++ CastInst::isBitCastable(LI->getType(), DestElTy)) ++ Val = CastInst::Create(Instruction::BitCast, Val, DestElTy, "", MCI); ++ if (cast(MCIDest->getType())->getElementType() != Val->getType()) ++ MCIDest = CastInst::Create( ++ Instruction::BitCast, MCIDest, ++ Val->getType()->getPointerTo( ++ cast(MCIDest->getType())->getAddressSpace()), ++ "", MCI); ++ StoreInst *SI = ++ new StoreInst(Val, MCIDest, MCI->isVolatile(), MCI->getAlignment(), MCI); ++ LI->setAAMetadata(AA); ++ SI->setAAMetadata(AA); ++ LBI.splitMemCpy(MCI, LI, SI); ++ MCI->eraseFromParent(); ++} ++ ++static void canonicalizeUsers(LargeBlockInfo &LBI, AllocaInst *AI) { ++ // Knowing that this alloca is promotable, we know that it's safe to split ++ // MTIs into load/store and to kill all other instructions except for ++ // load and store. + + for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { + Instruction *I = cast(*UI); +@@ -315,14 +439,24 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + if (isa(I) || isa(I)) + continue; + ++ if (isa(I)) { ++ MemCpyInst *MCI = cast(I); ++ doMemCpySplit(LBI, MCI, AI); ++ continue; ++ } ++ + if (!I->getType()->isVoidTy()) { +- // The only users of this bitcast/GEP instruction are lifetime intrinsics. +- // Follow the use/def chain to erase them now instead of leaving it for +- // dead code elimination later. ++ // The only users of this bitcast/GEP instruction are lifetime/memcpy ++ // intrinsics. Split memcpys and delete lifetime intrinsics. + for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { + Instruction *Inst = cast(*UUI); + ++UUI; +- Inst->eraseFromParent(); ++ if (isa(Inst)) { ++ doMemCpySplit(LBI, cast(Inst), AI); ++ } else { ++ // Must be a lifetime intrinsic ++ Inst->eraseFromParent(); ++ } + } + } + I->eraseFromParent(); +@@ -542,7 +676,7 @@ void PromoteMem2Reg::run() { + assert(AI->getParent()->getParent() == &F && + "All allocas should be in the same function, which is same as DF!"); + +- removeLifetimeIntrinsicUsers(AI); ++ canonicalizeUsers(LBI, AI); + + if (AI->use_empty()) { + // If there are no uses of the alloca, just delete it now. +diff --git a/test/Transforms/Mem2Reg/memcpy.ll b/test/Transforms/Mem2Reg/memcpy.ll +new file mode 100644 +index 0000000..fbc4096 +--- /dev/null ++++ b/test/Transforms/Mem2Reg/memcpy.ll +@@ -0,0 +1,101 @@ ++; RUN: opt < %s -mem2reg -S | FileCheck %s ++ ++target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ++ ++declare void @llvm.memcpy.p0i128.p0i64.i32(i128 *, i64 *, i32, i32, i1) ++declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *, i8 *, i32, i32, i1) ++declare void @llvm.memcpy.p0i64.p0i64.i32(i64 *, i64 *, i32, i32, i1) ++declare void @llvm.memcpy.p0f64.p0i64.i32(double *, i64 *, i32, i32, i1) ++ ++define i128 @test_cpy_different(i64) { ++; CHECK-LABEL: @test_cpy_different ++; CHECK-NOT: alloca i64 ++; CHECK: store i64 %0 ++ %a = alloca i64 ++ %b = alloca i128 ++ store i128 0, i128 *%b ++ store i64 %0, i64 *%a ++ call void @llvm.memcpy.p0i128.p0i64.i32(i128 *%b, i64 *%a, i32 8, i32 0, i1 0) ++ %loaded = load i128, i128 *%b ++ ret i128 %loaded ++} ++ ++define i64 @test_cpy_same(i64) { ++; CHECK-LABEL: @test_cpy_same ++; CHECK-NOT: alloca ++; CHECK: ret i64 %0 ++ %a = alloca i64 ++ %b = alloca i64 ++ store i64 %0, i64 *%a ++ call void @llvm.memcpy.p0i64.p0i64.i32(i64 *%b, i64 *%a, i32 8, i32 0, i1 0) ++ %loaded = load i64, i64 *%b ++ ret i64 %loaded ++} ++ ++define double @test_cpy_different_type(i64) { ++; CHECK-LABEL: @test_cpy_different_type ++; CHECK-NOT: alloca ++; CHECK: bitcast i64 %0 to double ++ %a = alloca i64 ++ %b = alloca double ++ store i64 %0, i64 *%a ++ call void @llvm.memcpy.p0f64.p0i64.i32(double *%b, i64 *%a, i32 8, i32 0, i1 0) ++ %loaded = load double, double *%b ++ ret double %loaded ++} ++ ++define i128 @test_cpy_differenti8(i64) { ++; CHECK-LABEL: @test_cpy_differenti8 ++; CHECK-NOT: alloca i64 ++; CHECK: store i64 %0 ++ %a = alloca i64 ++ %b = alloca i128 ++ store i128 0, i128 *%b ++ store i64 %0, i64 *%a ++ %acast = bitcast i64* %a to i8* ++ %bcast = bitcast i128* %b to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%bcast, i8 *%acast, i32 8, i32 0, i1 0) ++ %loaded = load i128, i128 *%b ++ ret i128 %loaded ++} ++ ++define i64 @test_cpy_samei8(i64) { ++; CHECK-LABEL: @test_cpy_samei8 ++; CHECK-NOT: alloca ++; CHECK: ret i64 %0 ++ %a = alloca i64 ++ %b = alloca i64 ++ store i64 %0, i64 *%a ++ %acast = bitcast i64* %a to i8* ++ %bcast = bitcast i64* %b to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%bcast, i8 *%acast, i32 8, i32 0, i1 0) ++ %loaded = load i64, i64 *%b ++ ret i64 %loaded ++} ++ ++define double @test_cpy_different_typei8(i64) { ++; CHECK-LABEL: @test_cpy_different_typei8 ++; CHECK-NOT: alloca ++; CHECK: bitcast i64 %0 to double ++ %a = alloca i64 ++ %b = alloca double ++ store i64 %0, i64 *%a ++ %acast = bitcast i64* %a to i8* ++ %bcast = bitcast double* %b to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%bcast, i8 *%acast, i32 8, i32 0, i1 0) ++ %loaded = load double, double *%b ++ ret double %loaded ++} ++ ++define i64 @test_cpy_differenti8_reverse(i128) { ++; CHECK-LABEL: @test_cpy_differenti8_reverse ++; CHECK-NOT: alloca i64 ++ %a = alloca i64 ++ %b = alloca i128 ++ store i128 %0, i128 *%b ++ %acast = bitcast i64* %a to i8* ++ %bcast = bitcast i128* %b to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%acast, i8 *%bcast, i32 8, i32 0, i1 0) ++ %loaded = load i64, i64 *%a ++ ret i64 %loaded ++} +-- +2.9.3 + diff --git a/src/array.c b/src/array.c index 0810ab9348958..e519415a6cca0 100644 --- a/src/array.c +++ b/src/array.c @@ -180,6 +180,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, size_t ndims = jl_nfields(_dims); assert(is_ntuple_long(_dims)); size_t *dims = (size_t*)_dims; + assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype))); int ndimwords = jl_array_ndimwords(ndims); int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index af9f99826528a..c6f2006974043 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -235,7 +235,7 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V) #else Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone); #endif - return Call; + return ctx.builder.CreatePtrToInt(Call, T_size); } // --- emitting pointers directly into code --- @@ -368,6 +368,12 @@ static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value) } } +static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) { + if (to != V->getType()) + return emit_bitcast(ctx, V, to); + return V; +} + static Value *julia_binding_gv(jl_codectx_t &ctx, Value *bv) { Value *offset = ConstantInt::get(T_size, offsetof(jl_binding_t, value) / sizeof(size_t)); @@ -1250,8 +1256,8 @@ static void typed_store(jl_codectx_t &ctx, } else { data = ptr; } - Instruction *store = ctx.builder.CreateAlignedStore(r, ctx.builder.CreateGEP(data, - idx_0based), isboxed ? alignment : julia_alignment(jltype, alignment)); + Instruction *store = ctx.builder.CreateAlignedStore(r, idx_0based ? ctx.builder.CreateGEP(data, + idx_0based) : data, isboxed ? alignment : julia_alignment(jltype, alignment)); if (tbaa) tbaa_decorate(tbaa, store); } @@ -1267,7 +1273,7 @@ static Value *julia_bool(jl_codectx_t &ctx, Value *cond) // --- accessing the representations of built-in data types --- static Constant *julia_const_to_llvm(jl_value_t *e); -static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x, Type *astype = T_ppjlvalue) +static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x) { Value *data = x.V; if (x.constant) { @@ -1279,9 +1285,7 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x, Type *astype data = boxed(ctx, x); } } - if (astype && data->getType() != astype) - data = emit_bitcast(ctx, data, astype); - return decay_derived(data); + return data; } static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, Value *src, @@ -1342,7 +1346,7 @@ static Value *get_value_ptr(jl_codectx_t&, Value *ptr) static Value *get_value_ptr(jl_codectx_t &ctx, const jl_cgval_t &v) { - return data_pointer(ctx, v, nullptr); + return data_pointer(ctx, v); } template @@ -1372,7 +1376,9 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, Value *fld = tbaa_decorate(strct.tbaa, maybe_mark_load_dereferenceable( ctx.builder.CreateLoad( - ctx.builder.CreateBitCast(ctx.builder.CreateGEP(decay_derived(data_pointer(ctx, strct)), idx), + ctx.builder.CreateBitCast( + ctx.builder.CreateGEP(decay_derived( + emit_bitcast(ctx, data_pointer(ctx, strct), T_pprjlvalue)), idx), PointerType::get(T_prjlvalue, AddressSpace::Derived))), maybe_null, minimum_field_size)); if (maybe_null) @@ -1384,11 +1390,11 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx, assert(nfields > 0); // nf == 0 trapped by all_pointers case jl_value_t *jt = jl_field_type(stt, 0); idx = emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(T_size, nfields), inbounds); - Value *ptr = data_pointer(ctx, strct); + Value *ptr = decay_derived(data_pointer(ctx, strct)); if (!stt->mutabl) { // just compute the pointer and let user load it when necessary Type *fty = julia_type_to_llvm(jt); - Value *addr = ctx.builder.CreateGEP(emit_bitcast(ctx, decay_derived(ptr), PointerType::get(fty,0)), idx); + Value *addr = ctx.builder.CreateGEP(emit_bitcast(ctx, ptr, PointerType::get(fty,0)), idx); *ret = mark_julia_slot(addr, jt, NULL, strct.tbaa); ret->isimmutable = strct.isimmutable; return true; @@ -1441,28 +1447,34 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st return ghostValue(jfty); Value *fldv = NULL; if (strct.ispointer()) { - Value *addr; + Value *addr = decay_derived(data_pointer(ctx, strct)); bool isboxed; Type *lt = julia_type_to_llvm((jl_value_t*)jt, &isboxed); if (isboxed) { - Value *ptr = decay_derived(data_pointer(ctx, strct, T_pint8)); - Value *llvm_idx = ConstantInt::get(T_size, jl_field_offset(jt, idx)); - addr = ctx.builder.CreateGEP(ptr, llvm_idx); + size_t byte_offset = jl_field_offset(jt, idx); + // byte_offset == 0 is an important special case here, e.g. + // for single field wrapper types. Introducing the bitcast + // can pessimize mem2reg + if (byte_offset > 0) { + addr = ctx.builder.CreateGEP( + emit_bitcast(ctx, addr, T_pint8), + ConstantInt::get(T_size, byte_offset)); + } } else { if (VectorType *vlt = dyn_cast(lt)) { // doesn't have the struct wrapper, so this must have been a VecElement // cast to the element type so that it can be addressed with GEP lt = vlt->getElementType(); - Value *ptr = data_pointer(ctx, strct, lt->getPointerTo()); + Value *ptr = emit_bitcast(ctx, addr, lt->getPointerTo()); Value *llvm_idx = ConstantInt::get(T_size, idx); addr = ctx.builder.CreateGEP(lt, ptr, llvm_idx); } else if (lt->isSingleValueType()) { - addr = data_pointer(ctx, strct, lt->getPointerTo()); + addr = emit_bitcast(ctx, addr, lt->getPointerTo()); } else { - Value *ptr = data_pointer(ctx, strct, lt->getPointerTo()); + Value *ptr = emit_bitcast(ctx, addr, lt->getPointerTo()); addr = ctx.builder.CreateStructGEP(lt, ptr, idx); } } @@ -1503,7 +1515,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st fieldval.isimmutable = strct.isimmutable; return fieldval; } - return typed_load(ctx, addr, ConstantInt::get(T_size, 0), jfty, strct.tbaa, true, align); + return typed_load(ctx, addr, NULL, jfty, strct.tbaa, true, align); } else if (isa(strct.V)) { return jl_cgval_t(); @@ -2152,13 +2164,15 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, const jl_cgval_t &src emit_unbox(ctx, store_ty, src, typ, dest, isVolatile); } else { - Value *src_ptr = data_pointer(ctx, src, T_pint8); - if (dest->getType() != T_pint8) - dest = emit_bitcast(ctx, dest, T_pint8); - if (skip) // copy dest -> dest to simulate an undef value / conditional copy - src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); + Value *src_ptr = data_pointer(ctx, src); + unsigned nb = jl_datatype_size(typ); unsigned alignment = julia_alignment(typ, 0); - emit_memcpy(ctx, dest, src_ptr, jl_datatype_size(typ), alignment, isVolatile, tbaa); + Value *nbytes = ConstantInt::get(T_size, nb); + if (skip) // copy dest -> dest to simulate an undef value / conditional copy + nbytes = ctx.builder.CreateSelect(skip, + ConstantInt::get(T_size, 0), + nbytes); + emit_memcpy(ctx, dest, src_ptr, nbytes, alignment, isVolatile, tbaa); } } } @@ -2166,9 +2180,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, const jl_cgval_t &src Value *tindex = ctx.builder.CreateAnd(src.TIndex, ConstantInt::get(T_int8, 0x7f)); if (skip) tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(T_int8, 0), tindex); - Value *src_ptr = data_pointer(ctx, src, T_pint8); - if (dest->getType() != T_pint8) - dest = emit_bitcast(ctx, dest, T_pint8); + Value *src_ptr = maybe_bitcast(ctx, data_pointer(ctx, src), T_pint8); + dest = maybe_bitcast(ctx, dest, T_pint8); BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "union_move_skip", ctx.f); SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB); BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_union_move", ctx.f); @@ -2288,8 +2301,13 @@ static void emit_setfield(jl_codectx_t &ctx, { if (sty->mutabl || !checked) { assert(strct.ispointer()); - Value *addr = ctx.builder.CreateGEP(data_pointer(ctx, strct, T_pint8), - ConstantInt::get(T_size, jl_field_offset(sty, idx0))); + size_t byte_offset = jl_field_offset(sty, idx0); + Value *addr = data_pointer(ctx, strct); + if (byte_offset > 0) { + addr = ctx.builder.CreateGEP( + emit_bitcast(ctx, decay_derived(addr), T_pint8), + ConstantInt::get(T_size, byte_offset)); + } jl_value_t *jfty = jl_svecref(sty->types, idx0); if (jl_field_isptr(sty, idx0)) { Value *r = maybe_decay_untracked(boxed(ctx, rhs)); // don't need a temporary gcroot since it'll be rooted by strct @@ -2306,7 +2324,7 @@ static void emit_setfield(jl_codectx_t &ctx, return; Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty); tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1)); - Value *ptindex = ctx.builder.CreateGEP(T_int8, emit_bitcast(ctx, addr, T_pint8), ConstantInt::get(T_size, fsz - 1)); + Value *ptindex = ctx.builder.CreateGEP(T_int8, emit_bitcast(ctx, decay_derived(addr), T_pint8), ConstantInt::get(T_size, fsz - 1)); ctx.builder.CreateStore(tindex, ptindex); // copy data if (!rhs.isghost) { @@ -2315,8 +2333,9 @@ static void emit_setfield(jl_codectx_t &ctx, } else { unsigned align = jl_field_align(sty, idx0); - typed_store(ctx, addr, ConstantInt::get(T_size, 0), rhs, jfty, - strct.tbaa, data_pointer(ctx, strct, T_pjlvalue), align); + typed_store(ctx, addr, NULL, rhs, jfty, + strct.tbaa, maybe_bitcast(ctx, + data_pointer(ctx, strct), T_pjlvalue), align); } } else { @@ -2416,12 +2435,13 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg Value *strct = emit_allocobj(ctx, jl_datatype_size(sty), literal_pointer_val(ctx, (jl_value_t*)ty)); jl_cgval_t strctinfo = mark_julia_type(ctx, strct, true, ty); + strct = decay_derived(strct); for (size_t i = 0; i < nf; i++) { if (jl_field_isptr(sty, i)) { tbaa_decorate(strctinfo.tbaa, ctx.builder.CreateStore( ConstantPointerNull::get(cast(T_prjlvalue)), emit_bitcast(ctx, - ctx.builder.CreateGEP(emit_bitcast(ctx, decay_derived(strct), T_pint8), + ctx.builder.CreateGEP(emit_bitcast(ctx, strct, T_pint8), ConstantInt::get(T_size, jl_field_offset(sty, i))), T_pprjlvalue))); } diff --git a/src/codegen.cpp b/src/codegen.cpp index e1a3f99e146d3..7f13e04fcec9d 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2135,16 +2135,16 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const if (sz > 512 && !((jl_datatype_t*)arg1.typ)->layout->haspadding) { Value *answer = ctx.builder.CreateCall(prepare_call(memcmp_derived_func), { - data_pointer(ctx, arg1, T_pint8), - data_pointer(ctx, arg2, T_pint8), + maybe_bitcast(ctx, decay_derived(data_pointer(ctx, arg1)), T_pint8), + maybe_bitcast(ctx, decay_derived(data_pointer(ctx, arg2)), T_pint8), ConstantInt::get(T_size, sz) }); return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(T_int32, 0)); } else { Type *atp = at->getPointerTo(); - Value *varg1 = data_pointer(ctx, arg1, atp); - Value *varg2 = data_pointer(ctx, arg2, atp); + Value *varg1 = maybe_bitcast(ctx, decay_derived(data_pointer(ctx, arg1)), atp); + Value *varg2 = maybe_bitcast(ctx, decay_derived(data_pointer(ctx, arg2)), atp); jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types; Value *answer = ConstantInt::get(T_int1, 1); for (size_t i = 0, l = jl_svec_len(types); i < l; i++) { @@ -2645,7 +2645,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj)), jl_true); } - Value *ptr = data_pointer(ctx, obj); + Value *ptr = decay_derived(data_pointer(ctx, obj)); *ret = typed_load(ctx, ptr, vidx, jt, obj.tbaa, false); return true; } @@ -2836,7 +2836,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, } else { size_t offs = jl_field_offset(stt, fieldidx); - Value *ptr = data_pointer(ctx, obj, T_pint8); + Value *ptr = emit_bitcast(ctx, decay_derived(data_pointer(ctx, obj)), T_pint8); Value *llvm_idx = ConstantInt::get(T_size, offs); Value *addr = ctx.builder.CreateGEP(ptr, llvm_idx); // emit this using the same type as emit_getfield_knownidx @@ -2926,7 +2926,8 @@ static jl_cgval_t emit_call_function_object(jl_method_instance_t *li, jl_llvm_fu // can lazy load on demand, no copy needed assert(at == PointerType::get(et, AddressSpace::Derived)); assert(arg.ispointer()); - argvals[idx] = decay_derived(data_pointer(ctx, arg, at)); + argvals[idx] = decay_derived(maybe_bitcast(ctx, + data_pointer(ctx, arg), at)); } else { assert(at == et); @@ -3433,9 +3434,15 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu tbaa = NULL; if (vi.pTIndex == NULL) { assert(jl_is_leaf_type(vi.value.typ)); - Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ)); - emit_memcpy(ctx, vi.value.V, rval_info, copy_bytes, - jl_datatype_align(rval_info.typ), vi.isVolatile, tbaa); + // Sometimes we can get into situations where the LHS and RHS + // are the same slot. We're not allowed to memcpy in that case + // under penalty of undefined behavior. This check should catch + // the relevant situations. + if (vi.value.V != rval_info.V) { + Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ)); + emit_memcpy(ctx, vi.value.V, rval_info, copy_bytes, + jl_datatype_align(rval_info.typ), vi.isVolatile, tbaa); + } } else { emit_unionmove(ctx, vi.value.V, rval_info, isboxed, vi.isVolatile, tbaa); @@ -4297,7 +4304,8 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t } else if (T->isAggregateType()) { // aggregate types are passed by pointer - arg = data_pointer(ctx, inputarg, T->getPointerTo()); + arg = maybe_bitcast(ctx, decay_derived(data_pointer(ctx, inputarg)), + T->getPointerTo()); } else { arg = emit_unbox(ctx, T, inputarg, spect); @@ -6571,7 +6579,7 @@ static void init_julia_llvm_env(Module *m) "llvm.julia.gc_preserve_end"); add_named_global(gc_preserve_end_func, (void*)NULL, /*dllimport*/false); - pointer_from_objref_func = Function::Create(FunctionType::get(T_size, + pointer_from_objref_func = Function::Create(FunctionType::get(T_pjlvalue, ArrayRef(PointerType::get(T_jlvalue, AddressSpace::Derived)), false), Function::ExternalLinkage, "julia.pointer_from_objref"); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 425941888d77b..0dc7c5319738d 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -269,6 +269,37 @@ static Constant *julia_const_to_llvm(jl_value_t *e) static jl_cgval_t ghostValue(jl_value_t *ty); +static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) +{ + Type *ty = unboxed->getType(); + assert(ty != T_void); + bool frompointer = ty->isPointerTy(); + bool topointer = to->isPointerTy(); + if (frompointer && topointer) { + unboxed = emit_bitcast(ctx, unboxed, to); + } + else if (frompointer) { + Type *INTT_to = INTT(to); + unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to); + if (INTT_to != to) + unboxed = ctx.builder.CreateBitCast(unboxed, to); + } + else if (topointer) { + Type *INTT_to = INTT(to); + if (to != INTT_to) + unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to); + unboxed = ctx.builder.CreateIntToPtr(unboxed, to); + } + else if (ty == T_int1 && to == T_int8) { + // bools may be stored internally as int8 + unboxed = ctx.builder.CreateZExt(unboxed, T_int8); + } + else if (ty != to) { + unboxed = ctx.builder.CreateBitCast(unboxed, to); + } + return unboxed; +} + // emit code to unpack a raw value from a box into registers or a stack slot static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest, bool volatile_store) { @@ -287,33 +318,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va Constant *c = x.constant ? julia_const_to_llvm(x.constant) : NULL; if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion - Value *unboxed = c ? c : x.V; - Type *ty = unboxed->getType(); - assert(ty != T_void); - bool frompointer = ty->isPointerTy(); - bool topointer = to->isPointerTy(); - if (frompointer && topointer) { - unboxed = emit_bitcast(ctx, unboxed, to); - } - else if (frompointer) { - Type *INTT_to = INTT(to); - unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to); - if (INTT_to != to) - unboxed = ctx.builder.CreateBitCast(unboxed, to); - } - else if (topointer) { - Type *INTT_to = INTT(to); - if (to != INTT_to) - unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to); - unboxed = ctx.builder.CreateIntToPtr(unboxed, to); - } - else if (ty == T_int1 && to == T_int8) { - // bools may be stored internally as int8 - unboxed = ctx.builder.CreateZExt(unboxed, T_int8); - } - else if (ty != to) { - unboxed = ctx.builder.CreateBitCast(unboxed, to); - } + Value *unboxed = emit_unboxed_coercion(ctx, to, c ? c : x.V); if (!dest) return unboxed; Type *dest_ty = unboxed->getType()->getPointerTo(); @@ -326,14 +331,12 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va // bools stored as int8, so an extra Trunc is needed to get an int1 Value *p = x.constant ? literal_pointer_val(ctx, x.constant) : x.V; Type *ptype = (to == T_int1 ? T_pint8 : to->getPointerTo()); - if (p->getType() != ptype) - p = emit_bitcast(ctx, p, ptype); Value *unboxed = NULL; if (to == T_int1) - unboxed = ctx.builder.CreateTrunc(tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(p)), T_int1); + unboxed = ctx.builder.CreateTrunc(tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(maybe_bitcast(ctx, p, ptype))), T_int1); else if (jt == (jl_value_t*)jl_bool_type) - unboxed = ctx.builder.CreateZExt(ctx.builder.CreateTrunc(tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(p)), T_int1), to); + unboxed = ctx.builder.CreateZExt(ctx.builder.CreateTrunc(tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(maybe_bitcast(ctx, p, ptype))), T_int1), to); if (unboxed) { if (!dest) return unboxed; @@ -354,6 +357,27 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va return NULL; } else { + if (p->getType() != ptype && isa(p)) { + // LLVM's mem2reg can't handle coercion if the load/store type does + // not match the type of the alloca. As such, it is better to + // perform the load using the alloca's type and then perform the + // appropriate coercion manually. + AllocaInst *AI = cast(p); + Type *AllocType = AI->getAllocatedType(); +#if JL_LLVM_VERSION >= 40000 + const DataLayout &DL = jl_data_layout; +#else + const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); +#endif + if (!AI->isArrayAllocation() && + (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) && + (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) && + DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) { + Instruction *load = ctx.builder.CreateAlignedLoad(p, alignment); + return emit_unboxed_coercion(ctx, to, tbaa_decorate(x.tbaa, load)); + } + } + p = maybe_bitcast(ctx, p, ptype); Instruction *load = ctx.builder.CreateAlignedLoad(p, alignment); return tbaa_decorate(x.tbaa, load); } @@ -439,7 +463,8 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv) if (isboxed) vxt = llvmt; vx = tbaa_decorate(v.tbaa, ctx.builder.CreateLoad( - data_pointer(ctx, v, vxt == T_int1 ? T_pint8 : vxt->getPointerTo()))); + emit_bitcast(ctx, data_pointer(ctx, v), + vxt == T_int1 ? T_pint8 : vxt->getPointerTo()))); } vxt = vx->getType(); @@ -899,6 +924,26 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg case srem_int: return ctx.builder.CreateSRem(x, y); case urem_int: return ctx.builder.CreateURem(x, y); + // LLVM will not fold ptrtoint+arithmetic+inttoptr to GEP. The reason for this + // has to do with alias analysis. When adding two integers, either one of them + // could be the pointer base. With getelementptr, it is clear which of the + // operands is the pointer base. We also have this information at the julia + // level. Thus, to not lose information, we need to have a separate intrinsic + // for pointer arithmetic which lowers to getelementptr. + case add_ptr: { + return ctx.builder.CreatePtrToInt( + ctx.builder.CreateGEP(T_int8, + ctx.builder.CreateIntToPtr(x, T_pint8), y), t); + + } + + case sub_ptr: { + return ctx.builder.CreatePtrToInt( + ctx.builder.CreateGEP(T_int8, + ctx.builder.CreateIntToPtr(x, T_pint8), ctx.builder.CreateNeg(y)), t); + + } + // Implements IEEE negate. See issue #7868 case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(t, -0.0), x); case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x); diff --git a/src/intrinsics.h b/src/intrinsics.h index 80491639ac6b8..0f04fe418c4e6 100644 --- a/src/intrinsics.h +++ b/src/intrinsics.h @@ -12,6 +12,8 @@ ADD_I(udiv_int, 2) \ ADD_I(srem_int, 2) \ ADD_I(urem_int, 2) \ + ADD_I(add_ptr, 2) \ + ADD_I(sub_ptr, 2) \ ADD_I(neg_float, 1) \ ADD_I(add_float, 2) \ ADD_I(sub_float, 2) \ diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 73a270141417d..7218e53cb180c 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -210,6 +210,9 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level) PM->add(createSimpleLoopUnrollPass()); // Unroll small loops //PM->add(createLoopStrengthReducePass()); // (jwb added) + // Re-run SROA after loop-unrolling (useful for small loops that operate, + // over the structure of an aggregate) + PM->add(createSROAPass()); // Break up aggregate allocas PM->add(createInstructionCombiningPass()); // Clean up after the unroller PM->add(createGVNPass()); // Remove redundancies PM->add(createMemCpyOptPass()); // Remove memcpy / form memset diff --git a/src/julia_internal.h b/src/julia_internal.h index 4a587616da6cf..9940934e9ef0c 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -761,6 +761,9 @@ JL_DLLEXPORT jl_value_t *jl_udiv_int(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_srem_int(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_urem_int(jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT jl_value_t *jl_add_ptr(jl_value_t *a, jl_value_t *b); +JL_DLLEXPORT jl_value_t *jl_sub_ptr(jl_value_t *a, jl_value_t *b); + JL_DLLEXPORT jl_value_t *jl_neg_float(jl_value_t *a); JL_DLLEXPORT jl_value_t *jl_add_float(jl_value_t *a, jl_value_t *b); JL_DLLEXPORT jl_value_t *jl_sub_float(jl_value_t *a, jl_value_t *b); diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 4a67c39f841aa..216a37eb64bdd 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -592,7 +592,6 @@ void AllocOpt::replaceUsesWith(Instruction *orig_inst, Instruction *new_inst, } else if (auto call = dyn_cast(user)) { if (ptr_from_objref && ptr_from_objref == call->getCalledFunction()) { - new_i = new PtrToIntInst(new_i, T_size, "", call); call->replaceAllUsesWith(new_i); call->eraseFromParent(); return; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 8acf06496db0f..ccb660d966a43 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1207,9 +1207,8 @@ bool LateLowerGCFrame::CleanupIR(Function &F) { } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, T_pjlvalue, "", CI); - auto *ptr = new PtrToIntInst(ASCI, CI->getType(), "", CI); - ptr->takeName(CI); - CI->replaceAllUsesWith(ptr); + ASCI->takeName(CI); + CI->replaceAllUsesWith(ASCI); } else if (alloc_obj_func && callee == alloc_obj_func) { assert(CI->getNumArgOperands() == 3); auto sz = (size_t)cast(CI->getArgOperand(1))->getZExtValue(); diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index a768b9dec3501..fecff170ba07a 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -703,8 +703,10 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) un_iintrinsic_fast(LLVMNeg, neg, neg_int, u) #define add(a,b) a + b bi_iintrinsic_fast(LLVMAdd, add, add_int, u) +bi_iintrinsic_fast(LLVMAdd, add, add_ptr, u) #define sub(a,b) a - b bi_iintrinsic_fast(LLVMSub, sub, sub_int, u) +bi_iintrinsic_fast(LLVMSub, sub, sub_ptr, u) #define mul(a,b) a * b bi_iintrinsic_fast(LLVMMul, mul, mul_int, u) #define div(a,b) a / b diff --git a/test/arrayops.jl b/test/arrayops.jl index 79dd9f788a62b..be0a3ecbb141e 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -68,19 +68,15 @@ using Main.TestHelpers.OAs @test a[1,2,1,1,2] == 20 @test a[1,1,2,2,1] == 30 - @test_throws ArgumentError reinterpret(Int8, a) - b = reshape(a, (32,)) @test b[1] == 10 @test b[19] == 20 @test b[13] == 30 @test_throws DimensionMismatch reshape(b,(5,7)) @test_throws DimensionMismatch reshape(b,(35,)) - @test_throws DimensionMismatch reinterpret(Int, b, (35,)) - @test_throws ArgumentError reinterpret(Any, b, (32,)) - @test_throws DimensionMismatch reinterpret(Complex128, b, (32,)) + @test_throws ArgumentError reinterpret(Any, b) c = ["hello", "world"] - @test_throws ArgumentError reinterpret(Float32, c, (2,)) + @test_throws ArgumentError reinterpret(Float32, c) a = Vector(ones(5)) @test_throws ArgumentError resize!(a, -2) @@ -209,7 +205,7 @@ end @test b[5] == -4 @test b[6] == -3 @test b[7] == -2 - b = reinterpret(Int, a, (3,4)) + b = reinterpret(Int, a) b[1] = -1 @test vec(b) == vec(a) diff --git a/test/choosetests.jl b/test/choosetests.jl index c769fe9d44dca..aa42294befb4b 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -41,7 +41,8 @@ function choosetests(choices = []) "enums", "cmdlineargs", "i18n", "workspace", "libdl", "int", "checked", "intset", "floatfuncs", "compile", "distributed", "inline", "boundscheck", "error", "ambiguous", "cartesian", "asmvariant", "osutils", - "channels", "iostream", "specificity", "codegen", "codevalidation" + "channels", "iostream", "specificity", "codegen", "codevalidation", + "reinterpretarray" ] profile_skipped = false if startswith(string(Sys.ARCH), "arm") diff --git a/test/core.jl b/test/core.jl index d5701879c43ae..bb2c2a9ddcff7 100644 --- a/test/core.jl +++ b/test/core.jl @@ -3953,9 +3953,6 @@ f = unsafe_wrap(Array, pointer(d), length(d)) @test !check_nul(f) f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, true) @test !check_nul(f) -g = reinterpret(UInt8, UInt16[0x1, 0x2]) -@test !check_nul(g) -@test check_nul(copy(g)) end # Copy of `#undef` @@ -5007,23 +5004,6 @@ end g21719(f, goal; tol = 1e-6) = T21719(f, tol, goal) @test isa(g21719(identity, 1.0; tol=0.1), T21719) -# reinterpret alignment requirement -let arr8 = zeros(UInt8, 16), - arr64 = zeros(UInt64, 2), - arr64_8 = reinterpret(UInt8, arr64), - arr64_i - - # Not allowed to reinterpret arrays allocated as UInt8 array to a Int32 array - res = @test_throws ArgumentError reinterpret(Int32, arr8) - @test res.value.msg == "reinterpret from alignment 1 bytes to alignment 4 bytes not allowed" - # OK to reinterpret arrays allocated as UInt64 array to a Int64 array even though - # it is passed as a UInt8 array - arr64_i = reinterpret(Int64, arr64_8) - @test arr8 == arr64_8 - arr64_i[2] = 1234 - @test arr64[2] == 1234 -end - # Alignment of perm boxes for i in 1:10 # Int64 box should be 16bytes aligned even on 32bits diff --git a/test/inference.jl b/test/inference.jl index 81910152cf37e..1642878136df1 100644 --- a/test/inference.jl +++ b/test/inference.jl @@ -830,7 +830,7 @@ f2_17003(::Any) = f2_17003(NArray_17003(gl_17003)) # issue #20847 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T} - B = reinterpret(T, A, (N, length(A))) + B = reshape(reinterpret(T, A), (N, length(A))) return nothing end diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl new file mode 100644 index 0000000000000..b334f341e83d7 --- /dev/null +++ b/test/reinterpretarray.jl @@ -0,0 +1,31 @@ +using Test + +A = Int64[1, 2, 3, 4] +B = Complex{Int64}[5+6im, 7+8im, 9+10im] +# getindex +@test reinterpret(Complex{Int64}, A) == [1 + 2im, 3 + 4im] +@test reinterpret(Float64, A) == reinterpret.(Float64, A) + +@test reinterpret(NTuple{3, Int64}, B) == [(5,6,7),(8,9,10)] + +# setindex +let Ac = copy(A), Bc = copy(B) + reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im + @test Ac == [1, 2, -1, -2] + reinterpret(NTuple{3, Int64}, Bc)[2] = (4,5,6) + @test Bc == Complex{Int64}[5+6im, 7+4im, 5+6im] + reinterpret(NTuple{3, Int64}, Bc)[1] = (1,2,3) + @test Bc == Complex{Int64}[1+2im, 3+4im, 5+6im] + + A1 = reinterpret(Float64, A) + A2 = reinterpret(Complex{Float64}, A) + A1[1] = 1.0 + @test real(A2[1]) == 1.0 +end + +# same-size reinterpret where one of the types is non-primitive +let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)] + @test reinterpret(Float32, a)[1] == reinterpret(Float32, 0x04030201) + reinterpret(Float32, a)[1] = 2.0 + @test reinterpret(Float32, a)[1] == 2.0 +end diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index dd22123c9187e..3b81e52e1aac3 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -489,12 +489,6 @@ end @test Array(spdiagm(ones(2), -1, 3, 3)) == diagm(ones(2), -1) end -@testset "issue #4986, reinterpret" begin - sfe22 = speye(Float64, 2) - mfe22 = eye(Float64, 2) - @test reinterpret(Int64, sfe22) == reinterpret(Int64, mfe22) -end - @testset "issue #5190" begin @test_throws ArgumentError sparsevec([3,5,7],[0.1,0.0,3.2],4) end @@ -964,10 +958,6 @@ end ACPY = copy(A) B = reshape(A,25,1) @test A == ACPY - C = reinterpret(Int64, A, (25, 1)) - @test A == ACPY - D = reinterpret(Int64, copy(B)) - @test C == D end @testset "issue #8225" begin @@ -1316,11 +1306,8 @@ end @test spdiagm(([1,2],[3.5],[4+5im]), (0,1,-1), 2,2) == [1 3.5; 4+5im 2] end -@testset "error conditions for reinterpret, reshape, and squeeze" begin +@testset "error conditions for reshape, and squeeze" begin local A = sprand(Bool, 5, 5, 0.2) - @test_throws ArgumentError reinterpret(Complex128, A) - @test_throws ArgumentError reinterpret(Complex128, A,(5, 5)) - @test_throws DimensionMismatch reinterpret(Int8, A,(20,)) @test_throws DimensionMismatch reshape(A,(20, 2)) @test_throws ArgumentError squeeze(A,(1, 1)) end diff --git a/test/sparse/sparsevector.jl b/test/sparse/sparsevector.jl index fcc0baac1447b..63f9551e7819d 100644 --- a/test/sparse/sparsevector.jl +++ b/test/sparse/sparsevector.jl @@ -281,11 +281,6 @@ let a = SparseVector(8, [2, 5, 6], Int32[12, 35, 72]) # vec @test vec(a) == a - # reinterpret - au = reinterpret(UInt32, a) - @test isa(au, SparseVector{UInt32,Int}) - @test exact_equal(au, SparseVector(8, [2, 5, 6], UInt32[12, 35, 72])) - # float af = float(a) @test float(af) == af