Skip to content

Commit

Permalink
LAPACK: annotate size check in lacpy! with @noinline for reduced …
Browse files Browse the repository at this point in the history
…latency (#55029)

The `@noinline` annotation on the size check appears to reduce latency
in a second call with different argument types:
```julia
julia> using LinearAlgebra

julia> A = rand(2,2); B = similar(A);

julia> @time LAPACK.lacpy!(B, A, 'U');
  0.032585 seconds (29.80 k allocations: 1.469 MiB, 99.84% compilation time)

julia> A = rand(Float32,2,2); B = similar(A);

julia> @time LAPACK.lacpy!(B, A, 'U');
  0.026698 seconds (22.80 k allocations: 1.113 MiB, 99.84% compilation time) # v"1.12.0-DEV.810"
  0.024715 seconds (19.88 k allocations: 987.000 KiB, 99.80% compilation time) # Without noinline
  0.017084 seconds (18.52 k allocations: 903.828 KiB, 99.72% compilation time) # This PR (with noinline)
```
  • Loading branch information
jishnub authored Jul 5, 2024
1 parent 140248e commit 59f08df
Showing 1 changed file with 6 additions and 11 deletions.
17 changes: 6 additions & 11 deletions stdlib/LinearAlgebra/src/lapack.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7164,19 +7164,11 @@ for (fn, elty) in ((:dlacpy_, :Float64),
m, n = size(A)
m1, n1 = size(B)
if uplo == 'U'
if n < m
(m1 < n || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($n,$n)"))
else
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
end
lacpy_size_check((m1, n1), (n < m ? n : m, n))
elseif uplo == 'L'
if m < n
(m1 < m || n1 < m) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$m)"))
else
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
end
lacpy_size_check((m1, n1), (m, m < n ? m : n))
else
(m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
lacpy_size_check((m1, n1), (m, n))
end
lda = max(1, stride(A, 2))
ldb = max(1, stride(B, 2))
Expand All @@ -7189,6 +7181,9 @@ for (fn, elty) in ((:dlacpy_, :Float64),
end
end

# The noinline annotation reduces latency
@noinline lacpy_size_check((m1, n1), (m, n)) = (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))

"""
lacpy!(B, A, uplo) -> B
Expand Down

0 comments on commit 59f08df

Please sign in to comment.