diff --git a/src/solvers/spqr.jl b/src/solvers/spqr.jl index a8f92887..1ae5a2e9 100644 --- a/src/solvers/spqr.jl +++ b/src/solvers/spqr.jl @@ -108,6 +108,17 @@ function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer, return rnk, _E, _HPinv end +struct QRSparseQ{Tv<:CHOLMOD.VTypes,Ti<:Integer} <: AbstractQ{Tv} + factors::SparseMatrixCSC{Tv,Ti} + τ::Vector{Tv} + n::Int # Number of columns in original matrix +end + +Base.size(Q::QRSparseQ) = (size(Q.factors, 1), size(Q.factors, 1)) +Base.axes(Q::QRSparseQ) = map(Base.OneTo, size(Q)) + +Matrix{T}(Q::QRSparseQ) where {T} = lmul!(Q, Matrix{T}(I, size(Q, 1), min(size(Q, 1), Q.n))) + # Struct for storing sparse QR from SPQR such that # A[invperm(rpivinv), cpiv] = (I - factors[:,1]*τ[1]*factors[:,1]')*...*(I - factors[:,k]*τ[k]*factors[:,k]')*R # with k = size(factors, 2). @@ -115,8 +126,12 @@ struct QRSparse{Tv,Ti} <: LinearAlgebra.Factorization{Tv} factors::SparseMatrixCSC{Tv,Ti} τ::Vector{Tv} R::SparseMatrixCSC{Tv,Ti} + Q::QRSparseQ{Tv,Ti} cpiv::Vector{Ti} rpivinv::Vector{Ti} + + _lock::ReentrantLock + _ldiv_workspace::Vector{Tv} # backing storage for work buffer (resizable) end Base.size(F::QRSparse) = (size(F.factors, 1), size(F.R, 2)) @@ -133,17 +148,6 @@ function Base.size(F::QRSparse, i::Integer) end Base.axes(F::QRSparse) = map(Base.OneTo, size(F)) -struct QRSparseQ{Tv<:CHOLMOD.VTypes,Ti<:Integer} <: AbstractQ{Tv} - factors::SparseMatrixCSC{Tv,Ti} - τ::Vector{Tv} - n::Int # Number of columns in original matrix -end - -Base.size(Q::QRSparseQ) = (size(Q.factors, 1), size(Q.factors, 1)) -Base.axes(Q::QRSparseQ) = map(Base.OneTo, size(Q)) - -Matrix{T}(Q::QRSparseQ) where {T} = lmul!(Q, Matrix{T}(I, size(Q, 1), min(size(Q, 1), Q.n))) - # From SPQR manual p. 6 _default_tol(A::AbstractSparseMatrixCSC) = 20*sum(size(A))*eps()*maximum(norm(view(A, :, i)) for i in axes(A, 2)) @@ -155,6 +159,12 @@ Compute the `QR` factorization of a sparse matrix `A`. Fill-reducing row and col are used such that `F.R = F.Q'*A[F.prow,F.pcol]`. The main application of this type is to solve least squares or underdetermined problems with [`\\`](@ref). The function calls the C library SPQR[^ACM933]. +!!! note + The returned `QRSparse` object uses an internal workspace for + [`ldiv!()`](@ref) calls that is protected by a lock for threadsafety. For + multithreaded use, create a separate copy of this object for each task with + `copy(F)`. + !!! note `qr(A::SparseMatrixCSC)` uses the SPQR library that is part of [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse). As this library only supports sparse matrices with [`Float64`](@ref) or @@ -205,14 +215,19 @@ function LinearAlgebra.qr(A::SparseMatrixCSC{Tv, Ti}; tol=_default_tol(A), order R, E, H, HPinv, HTau) R_ = SparseMatrixCSC{Tv, Ti}(Sparse(R[])) - return QRSparse(SparseMatrixCSC{Tv, Ti}(Sparse(H[])), - vec(Array{Tv}(CHOLMOD.Dense(HTau[]))), - SparseMatrixCSC{Tv, Ti}(min(size(A)...), - size(R_, 2), - getcolptr(R_), - rowvals(R_), - nonzeros(R_)), - p, hpinv) + factors = SparseMatrixCSC{Tv, Ti}(Sparse(H[])) + τ = vec(Array{Tv}(CHOLMOD.Dense(HTau[]))) + R = SparseMatrixCSC{Tv, Ti}(min(size(A)...), + size(R_, 2), + getcolptr(R_), + rowvals(R_), + nonzeros(R_)) + + return QRSparse(factors, τ, R, + QRSparseQ(factors, τ, size(R, 2)), + p, hpinv, + ReentrantLock(), + Tv[]) # _ldiv_workspace (lazily sized on first solve) end LinearAlgebra.qr(A::SparseMatrixCSC{Float16}; tol=_default_tol(A)) = qr(convert(SparseMatrixCSC{Float32}, A); tol=tol) @@ -338,9 +353,7 @@ end (*)(A::SparseMatrixCSC, Q::QRSparseQ) = A * sparse(Q) @inline function Base.getproperty(F::QRSparse, d::Symbol) - if d === :Q - return QRSparseQ(F.factors, F.τ, size(F, 2)) - elseif d === :prow + if d === :prow return invperm(F.rpivinv) elseif d === :pcol return F.cpiv @@ -354,6 +367,18 @@ function Base.propertynames(F::QRSparse, private::Bool=false) private ? ((public ∪ fieldnames(typeof(F)))...,) : public end +""" + copy(F::QRSparse) + +A shallow copy of QRSparse for use in multithreaded solve applications. +Shares the factorization data but duplicates the workspace so that +each copy can be used independently in a different thread. +""" +function Base.copy(F::QRSparse) + QRSparse(F.factors, F.τ, F.R, F.Q, F.cpiv, F.rpivinv, + ReentrantLock(), similar(F._ldiv_workspace)) +end + function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, F::QRSparse) summary(io, F); println(io) println(io, "Q factor:") @@ -406,49 +431,29 @@ function (\)(F::QRSparse{T}, B::VecOrMat{Complex{T}}) where T<:LinearAlgebra.Bla return collect(reshape(reinterpret(Complex{T}, copy(transpose(reshape(x, (length(x) >> 1), 2)))), _ret_size(F, B))) end -function _ldiv_basic(F::QRSparse, B::StridedVecOrMat) - if size(F, 1) != size(B, 1) - throw(DimensionMismatch("size(F) = $(size(F)) but size(B) = $(size(B))")) - end - - # The rank of F equal might be reduced - rnk = rank(F) - - # allocate an array for the return value large enough to hold B and X - # For overdetermined problem, B is larger than X and vice versa - X = similar(B, ntuple(i -> i == 1 ? max(size(F, 2), size(B, 1)) : size(B, 2), Val(ndims(B)))) +function _get_ldiv_workspace(F::QRSparse{Tv}, B::StridedVecOrMat) where Tv + m, n = size(F) + k = ndims(B) == 1 ? 1 : size(B, 2) + wrows = max(m, n) - # Fill will zeros. These will eventually become the zeros in the basic solution - # fill!(X, 0) - # Apply left permutation to the solution and store in X - for j in axes(B, 2) - for i in 1:length(F.rpivinv) - @inbounds X[F.rpivinv[i], j] = B[i, j] - end + # Resize backing vector if needed + wlen = wrows * k + if length(F._ldiv_workspace) != wlen + resize!(F._ldiv_workspace, wlen) end - # Make a view into X corresponding to the size of B - X0 = view(X, axes(B, 1), :) - - # Apply Q' to B - lmul!(adjoint(F.Q), X0) - - # Zero out to get basic solution - X[rnk + 1:end, :] .= 0 - - # Solve R*X = B - ldiv!(UpperTriangular(F.R[Base.OneTo(rnk), Base.OneTo(rnk)]), - view(X0, Base.OneTo(rnk), :)) + # Reshape into matrix. Note that we use ReshapedArray here instead of + # reshape() to avoid allocations later when taking a view. + W = Base.ReshapedArray(F._ldiv_workspace, (wrows, k), ()) + return W +end - # Apply right permutation and extract solution from X - # NB: cpiv == [] if SPQR was called with ORDERING_FIXED - if length(F.cpiv) == 0 - return getindex(X, ntuple(i -> i == 1 ? (1:size(F,2)) : :, Val(ndims(B)))...) - end - return getindex(X, ntuple(i -> i == 1 ? invperm(F.cpiv) : :, Val(ndims(B)))...) +function (\)(F::QRSparse{T}, B::StridedVecOrMat{T}) where {T} + X = similar(B, ntuple(i -> i == 1 ? size(F, 2) : size(B, 2), Val(ndims(B)))) + # Note that we copy F here for thread-safety + return ldiv!(X, copy(F), B) end -(\)(F::QRSparse{T}, B::StridedVecOrMat{T}) where {T} = _ldiv_basic(F, B) """ (\\)(F::QRSparse, B::StridedVecOrMat) @@ -473,4 +478,61 @@ julia> qr(A)\\fill(1.0, 4) """ (\)(F::QRSparse, B::StridedVecOrMat) = F\convert(AbstractArray{eltype(F)}, B) +function LinearAlgebra.ldiv!(X::StridedVecOrMat{T}, F::QRSparse{T}, B::StridedVecOrMat{T}) where {T} + if size(F, 1) != size(B, 1) + throw(DimensionMismatch("size(F) = $(size(F)) but size(B) = $(size(B))")) + end + if size(F, 2) != size(X, 1) + throw(DimensionMismatch("size(F) = $(size(F)) but size(X) = $(size(X))")) + end + if ndims(B) > 1 && size(X, 2) != size(B, 2) + throw(DimensionMismatch("size(X) = $(size(X)) but size(B) = $(size(B))")) + end + + rnk = rank(F) + m = size(F, 1) + n = size(F, 2) + + @lock F._lock begin + W = _get_ldiv_workspace(F, B) + + # Apply left permutation to B and store in W + for j in axes(B, 2) + for i in 1:length(F.rpivinv) + @inbounds W[F.rpivinv[i], j] = B[i, j] + end + end + + # Make a view into W corresponding to the size of B + W0 = @view W[Base.OneTo(m), :] + + # Apply Q' to permuted B + lmul!(adjoint(F.Q), W0) + + # Solve R*X = Q'*P*B + ldiv!(UpperTriangular(@view(F.R[Base.OneTo(rnk), Base.OneTo(rnk)])), + @view(W0[Base.OneTo(rnk), :])) + + # Apply right permutation: scatter solved rows into X using cpiv directly. + # Zero X first so free variables (beyond rank) are zero in the basic solution. + # NB: cpiv == [] if SPQR was called with ORDERING_FIXED + fill!(X, zero(T)) + if length(F.cpiv) == 0 + for j in axes(W, 2) + for i in 1:rnk + @inbounds X[i, j] = W[i, j] + end + end + else + for j in axes(W, 2) + for i in 1:rnk + @inbounds X[F.cpiv[i], j] = W[i, j] + end + end + end + end + + return X +end + end # module diff --git a/test/spqr.jl b/test/spqr.jl index 5d83d4e7..1738eea5 100644 --- a/test/spqr.jl +++ b/test/spqr.jl @@ -9,7 +9,7 @@ else using SparseArrays.SPQR using SparseArrays.CHOLMOD -using LinearAlgebra: I, istriu, norm, qr, rank, rmul!, lmul!, Adjoint, Transpose, ColumnNorm, RowMaximum, NoPivot +using LinearAlgebra: I, istriu, norm, qr, rank, rmul!, lmul!, ldiv!, Adjoint, Transpose, ColumnNorm, RowMaximum, NoPivot using SparseArrays: SparseArrays, sparse, sprandn, spzeros, SparseMatrixCSC using Random: seed! @@ -150,7 +150,7 @@ end A = sparse([0.0 1 0 0; 0 0 0 0]) F = qr(A) @test propertynames(F) == (:R, :Q, :prow, :pcol) - @test propertynames(F, true) == (:R, :Q, :prow, :pcol, :factors, :τ, :cpiv, :rpivinv) + @test propertynames(F, true) == (:R, :Q, :prow, :pcol, :factors, :τ, :cpiv, :rpivinv, :_lock, :_ldiv_workspace) end @testset "rank" begin @@ -180,6 +180,44 @@ end @test V' * Dq.Q ≈ V' * Matrix(Dq.Q) end +@testset "ldiv!" begin + @testset "workspace reuse" begin + A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], randn(nn), m, n) + F = qr(A) + b = randn(m) + x = zeros(n) + + # First call will allocate the workspace + first_allocs = @allocated ldiv!(x, F, b) + @test length(F._ldiv_workspace) > 0 + @test x ≈ Array(A) \ b + + # Second call with same-sized RHS should reuse workspace + b2 = randn(m) + second_allocs = @allocated ldiv!(x, F, b2) + @test second_allocs < first_allocs + @test x ≈ Array(A) \ b2 + end + + @testset "dimension errors" begin + A = sprandn(m, n, 0.5) + F = qr(A) + @test_throws DimensionMismatch ldiv!(zeros(n), F, zeros(m - 1)) + @test_throws DimensionMismatch ldiv!(zeros(n - 1), F, zeros(m)) + @test_throws DimensionMismatch ldiv!(zeros(n, 2), F, zeros(m, 3)) + end + + @testset "copying QRSparse" begin + A = sprandn(m, n, 0.5) + F = qr(A) + F_copy = copy(F) + + # These fields must not be shared + @test F._lock !== F_copy._lock + @test F._ldiv_workspace !== F_copy._ldiv_workspace + end +end + @testset "no strategies" begin A = I + sprandn(10, 10, 0.1) for i in [ColumnNorm, RowMaximum, NoPivot]