From 59f08df99bf8d73ed1d41104b82062f328ee5926 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Fri, 5 Jul 2024 21:20:46 +0530 Subject: [PATCH] LAPACK: annotate size check in `lacpy!` with `@noinline` for reduced latency (#55029) The `@noinline` annotation on the size check appears to reduce latency in a second call with different argument types: ```julia julia> using LinearAlgebra julia> A = rand(2,2); B = similar(A); julia> @time LAPACK.lacpy!(B, A, 'U'); 0.032585 seconds (29.80 k allocations: 1.469 MiB, 99.84% compilation time) julia> A = rand(Float32,2,2); B = similar(A); julia> @time LAPACK.lacpy!(B, A, 'U'); 0.026698 seconds (22.80 k allocations: 1.113 MiB, 99.84% compilation time) # v"1.12.0-DEV.810" 0.024715 seconds (19.88 k allocations: 987.000 KiB, 99.80% compilation time) # Without noinline 0.017084 seconds (18.52 k allocations: 903.828 KiB, 99.72% compilation time) # This PR (with noinline) ``` --- stdlib/LinearAlgebra/src/lapack.jl | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl index c21b6ef92c4e5..e9cfacfcd0cfd 100644 --- a/stdlib/LinearAlgebra/src/lapack.jl +++ b/stdlib/LinearAlgebra/src/lapack.jl @@ -7164,19 +7164,11 @@ for (fn, elty) in ((:dlacpy_, :Float64), m, n = size(A) m1, n1 = size(B) if uplo == 'U' - if n < m - (m1 < n || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($n,$n)")) - else - (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)")) - end + lacpy_size_check((m1, n1), (n < m ? n : m, n)) elseif uplo == 'L' - if m < n - (m1 < m || n1 < m) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$m)")) - else - (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)")) - end + lacpy_size_check((m1, n1), (m, m < n ? m : n)) else - (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)")) + lacpy_size_check((m1, n1), (m, n)) end lda = max(1, stride(A, 2)) ldb = max(1, stride(B, 2)) @@ -7189,6 +7181,9 @@ for (fn, elty) in ((:dlacpy_, :Float64), end end +# The noinline annotation reduces latency +@noinline lacpy_size_check((m1, n1), (m, n)) = (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)")) + """ lacpy!(B, A, uplo) -> B