From 9cb1ea6e15ae79003bb588099d5a66f11d36438a Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 1 Oct 2024 10:13:29 +0530 Subject: [PATCH] Limit `@inbounds` to indexing in the dual-iterator branch in `copyto_unaliased!` (#55919) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This simplifies the `copyto_unalised!` implementation where the source and destination have different `IndexStyle`s, and limits the `@inbounds` to only the indexing operation. In particular, the iteration over `eachindex(dest)` is not marked as `@inbounds` anymore. This seems to help with performance when the destination uses Cartesian indexing. Reduced implementation of the branch: ```julia function copyto_proposed!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) for (destind, srcind) in zip(iterdest, itersrc) @inbounds dest[destind] = src[srcind] end dest end function copyto_current!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) ret = iterate(iterdest) @inbounds for a in src idx, state = ret::NTuple{2,Any} dest[idx] = a ret = iterate(iterdest, state) end dest end function copyto_current_limitinbounds!(dest, src) axes(dest) == axes(src) || throw(ArgumentError("incompatible sizes")) iterdest, itersrc = eachindex(dest), eachindex(src) ret = iterate(iterdest) for isrc in itersrc idx, state = ret::NTuple{2,Any} @inbounds dest[idx] = src[isrc] ret = iterate(iterdest, state) end dest end ``` ```julia julia> a = zeros(40000,4000); b = rand(size(a)...); julia> av = view(a, UnitRange.(axes(a))...); julia> @btime copyto_current!($av, $b); 617.704 ms (0 allocations: 0 bytes) julia> @btime copyto_current_limitinbounds!($av, $b); 304.146 ms (0 allocations: 0 bytes) julia> @btime copyto_proposed!($av, $b); 240.217 ms (0 allocations: 0 bytes) julia> versioninfo() Julia Version 1.12.0-DEV.1260 Commit 4a4ca9c8152 (2024-09-28 01:49 UTC) Build Info: Official https://julialang.org release Platform Info: OS: Linux (x86_64-linux-gnu) CPU: 8 × Intel(R) Core(TM) i5-10310U CPU @ 1.70GHz WORD_SIZE: 64 LLVM: libLLVM-18.1.7 (ORCJIT, skylake) Threads: 1 default, 0 interactive, 1 GC (on 8 virtual cores) Environment: JULIA_EDITOR = subl ``` I'm not quite certain why the proposed implementation here (`copyto_proposed!`) is even faster than `copyto_current_limitinbounds!`. In any case, `copyto_proposed!` is easier to read, so I'm not complaining. This fixes https://github.com/JuliaLang/julia/issues/53158 (cherry picked from commit 06e7b9d292ed4ced5b523fe94daef30332eabbd3) --- base/abstractarray.jl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index ad70b973ab10b..bb4aff0f6a411 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1096,11 +1096,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle: end else # Dual-iterator implementation - ret = iterate(iterdest) - @inbounds for a in src - idx, state = ret::NTuple{2,Any} - dest[idx] = a - ret = iterate(iterdest, state) + for (Idest, Isrc) in zip(iterdest, itersrc) + @inbounds dest[Idest] = src[Isrc] end end end