Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OffsetArray support for cat/vcat/hcat #37629

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 19 additions & 21 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1442,13 +1442,13 @@ function _typed_vcat(::Type{T}, V::AbstractVecOrTuple{AbstractVector}) where T
for Vk in V
n += Int(length(Vk))::Int
end
a = similar(V[1], T, n)
pos = 1
for k=1:Int(length(V))::Int
a = similar(first(V), T, n)
pos = first(axes(a, 1))
for k = eachindex(V)
Vk = V[k]
p1 = pos + Int(length(Vk))::Int - 1
a[pos:p1] = Vk
pos = p1+1
n = length(Vk)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
n = length(Vk)
n = Int(length(Vk))::Int

because this often gets called with poor inference. Try @code_warntype Base._typed_vcat(Float64, AbstractVector[[1,2,3], [4.0]]).

Not putting in these annotation re-opens us to all sorts of code invalidation, see https://julialang.org/blog/2020/08/invalidations/.

This isn't a general pattern you need to start adopting in all your Julia code, but this is a very low-level function called by lots of things (all of which would be invalidated if this is invalidated) and from its signature alone you might guess it's a bit of an inference nightmare.

I'm not adding comments like these elsewhere in this function, but you'll probably need to add other annotations to the initialization of pos above. Ideally, it would be great to check with SnoopCompile and loading various packages like Makie to see if you've greatly increased the number of invalidations.

copyto!(a, pos, Vk, first(axes(Vk, 1)), n)
pos += n
end
a
end
Expand All @@ -1459,11 +1459,10 @@ hcat(A::AbstractVecOrMat...) = typed_hcat(promote_eltype(A...), A...)
hcat(A::AbstractVecOrMat{T}...) where {T} = typed_hcat(T, A...)

function _typed_hcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
nargs = length(A)
nrows = size(A[1], 1)
nrows = size(first(A), 1)
ncols = 0
dense = true
for j = 1:nargs
for j = eachindex(A)
Aj = A[j]
if size(Aj, 1) != nrows
throw(ArgumentError("number of rows of each array must match (got $(map(x->size(x,1), A)))"))
Expand All @@ -1472,17 +1471,17 @@ function _typed_hcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
nd = ndims(Aj)
ncols += (nd==2 ? size(Aj,2) : 1)
end
B = similar(A[1], T, nrows, ncols)
pos = 1
B = similar(first(A), T, nrows, ncols)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How difficult would it be to adapt this to have B = similar(first(A), T, axes(first(A),1), axes(A,1)), so that all the offsets get propagated?

I guess that's only the case of a vector of vectors; when some elements are matrices you'd have to give up on the 2nd axis.

Copy link
Member Author

@johnnychen94 johnnychen94 Sep 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand your comment correctly.

As explained in JuliaArrays/OffsetArrays.jl#63 (comment), a meaningful offset propagation is only possible for very restricted conditions. This restriction doesn't make cats of OffsetArrays indeed useful.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I didn't see the issue, let me look there first.

pos = first(axes(B, 1))
if dense
for k=1:nargs
for k=eachindex(A)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spaces around =

Ak = A[k]
n = length(Ak)
copyto!(B, pos, Ak, 1, n)
pos += n
end
else
for k=1:nargs
for k=eachindex(A)
Ak = A[k]
p1 = pos+(isa(Ak,AbstractMatrix) ? size(Ak, 2) : 1)-1
B[:, pos:p1] = Ak
Expand All @@ -1496,17 +1495,16 @@ vcat(A::AbstractVecOrMat...) = typed_vcat(promote_eltype(A...), A...)
vcat(A::AbstractVecOrMat{T}...) where {T} = typed_vcat(T, A...)

function _typed_vcat(::Type{T}, A::AbstractVecOrTuple{AbstractVecOrMat}) where T
nargs = length(A)
nrows = sum(a->size(a, 1), A)::Int
ncols = size(A[1], 2)
for j = 2:nargs
ncols = size(first(A), 2)
for j = first(axes(A))[2:end]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for j = first(axes(A))[2:end]
for j = firstindex(A)+1:lastindex(A)

if size(A[j], 2) != ncols
throw(ArgumentError("number of columns of each array must match (got $(map(x->size(x,2), A)))"))
end
end
B = similar(A[1], T, nrows, ncols)
pos = 1
for k=1:nargs
B = similar(first(A), T, nrows, ncols)
pos = first(axes(B, 1))
for k=eachindex(A)
Ak = A[k]
p1 = pos+size(Ak,1)::Int-1
B[pos:p1, :] = Ak
Expand Down Expand Up @@ -1589,7 +1587,7 @@ _cat(dims, X...) = cat_t(promote_eltypeof(X...), X...; dims=dims)
@inline function _cat_t(dims, ::Type{T}, X...) where {T}
catdims = dims2cat(dims)
shape = cat_shape(catdims, map(cat_size, X)::Tuple{Vararg{Union{Int,Dims}}})::Dims
A = cat_similar(X[1], T, shape)
A = cat_similar(first(X), T, shape)
if count(!iszero, catdims)::Int > 1
fill!(A, zero(T))
end
Expand All @@ -1604,7 +1602,7 @@ function __cat(A, shape::NTuple{M,Int}, catdims, X...) where M
for x in X
for i = 1:N
if concat[i]
inds[i] = offsets[i] .+ cat_indices(x, i)
inds[i] = offsets[i] .+ parent(cat_indices(x, i))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this about?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This strips the offsets and uses linear indexing when computing indices. parent(r::IdOffsetRange) = r.parent.

This could be fragile and a little bit tricky, though. Phehaps there's a better way to make things work.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would generally try to stay away from assumptions about how x might be structured if at all possible...

offsets[i] += cat_size(x, i)
else
inds[i] = 1:shape[i]
Expand Down
2 changes: 1 addition & 1 deletion stdlib/Distributed/test/splitrange.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ using Distributed: splitrange
@test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])

const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for this change? Could this cause problems in tests running in other workers? What if it defines methods that are supposed to fail in those tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm unsure about this, though.

Without this Distributed test is broken:

      From worker 10:	ERROR: LoadError: LoadError: TaskFailedException
      From worker 10:	
      From worker 10:	    nested task error: On worker 25:
      From worker 10:	    UndefVarError: OffsetArrays not defined
      From worker 10:	    Stacktrace:
      From worker 10:	      [1] deserialize_module
      From worker 10:	        @ /buildworker/worker/package_linux32/build/usr/share/julia/stdlib/v1.6/Serialization/src/Serialization.jl:957
      From worker 10:	      [2] handle_deserialize
      From worker 10:	        @ /buildworker/worker/package_linux32/build/usr/share/julia/stdlib/v1.6/Serialization/src/Serialization.jl:856
      From worker 10:	      [3] deserialize
      From worker 10:	        @ /buildworker/worker/package_linux32/build/usr/share/julia/stdlib/v1.6/Serialization/src/Serialization.jl:774

Adding this helps remove this error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe

using .Main.OffsetArrays

Copy link
Member Author

@johnnychen94 johnnychen94 Sep 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's already using .Main.OffsetArrays one line down below on this.

I think it's because eachindex now requires OffsetArrays.IdOffsetRange and triggers the error, while for the old version of OffsetArrays it uses IdentityUnitRange and doesn't require anything in OffsetArrays.

It would be better to get #37643 first and I'll drop the OffsetArrays upgrade commit here.

using .Main.OffsetArrays

oa = OffsetArray([123, -345], (-2,))
Expand Down
46 changes: 46 additions & 0 deletions test/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,52 @@ function test_cat(::Type{TestAbstractArray})
@test cat([1], [1], dims=[1, 2]) == I(2)
end

module TestOffsetArraysCats
using Test
isdefined(Main, :OffsetArrays) || @eval Main include(joinpath(@__DIR__, "testhelpers", "OffsetArrays.jl"))
using .Main.OffsetArrays

# `cat`s on OffsetArrays ignore their offsets and treat them as normal list

# 1d
v1 = collect(1:4)
v2 = collect(5:8)
ov1 = OffsetArray(v1, -1)
ov2 = OffsetArray(v2, 1)
@test hcat(ov1, v1, ov2, v2) == hcat(v1, v1, v2, v2)
@test vcat(ov1, v1, ov2, v2) == vcat(v1, v1, v2, v2)
@test hvcat((2, 2), ov1, v2, v1, ov2) == hvcat((2, 2), v1, v2, v1, v2)
# 37628
@test reduce(hcat, (v1, v2)) == hcat(v1, v2)
@test reduce(vcat, (v1, v2)) == vcat(v1, v2)
@test reduce(hcat, OffsetVector([1:2, 1:2],10)) == [1 1;2 2]

# 2d
a1 = reshape(collect(1:6), 2, 3)
a2 = reshape(collect(7:12), 2, 3)
oa1 = OffsetArray(a1, -1, -1)
oa2 = OffsetArray(a2, 1, 1)
@test hcat(oa1, a1, oa2, a2) == hcat(a1, a1, a2, a2)
@test vcat(oa1, a1, oa2, a2) == vcat(a1, a1, a2, a2)
@test hvcat((2, 2), oa1, a2, a1, oa2) == hvcat((2, 2), a1, a2, a1, a2)

# 3d
a1 = reshape(collect(1:12), 2, 3, 2)
a2 = reshape(collect(13:24), 2, 3, 2)
oa1 = OffsetArray(a1, -1, -1, -1)
oa2 = OffsetArray(a2, 1, 1, 1)
@test hcat(oa1, a1, oa2, a2) == hcat(a1, a1, a2, a2)
@test vcat(oa1, a1, oa2, a2) == vcat(a1, a1, a2, a2)
@test hvcat((2, 2), oa1, a2, a1, oa2) == hvcat((2, 2), a1, a2, a1, a2)
# https://github.com/JuliaArrays/OffsetArrays.jl/issues/63
form=OffsetArray(reshape(zeros(Int8,0),0,0,2),0:-1,0:-1,0:1)
exp=OffsetArray(reshape(zeros(Int8,0),0,16,2),0:-1,0:15,0:1)
@test size(hcat(form,exp)) == (0, 16, 2)
# 37493
@test hcat(zeros(2, 1:1, 2), zeros(2, 2:3, 2)) == zeros(2, 3, 2)
@test vcat(zeros(1:1, 2, 2), zeros(2:3, 2, 2)) == zeros(3, 2, 2)
end

function test_ind2sub(::Type{TestAbstractArray})
n = rand(2:5)
dims = tuple(rand(1:5, n)...)
Expand Down
Loading