diff --git a/lib/cusparse/interfaces.jl b/lib/cusparse/interfaces.jl index ced5d8037b..f5727bbef5 100644 --- a/lib/cusparse/interfaces.jl +++ b/lib/cusparse/interfaces.jl @@ -40,11 +40,17 @@ Base.:(\)(transA::Transpose{T, LowerTriangular{T, S}}, B::DenseCuMatrix{T}) wher Base.:(\)(adjA::Adjoint{T, UpperTriangular{T, S}},B::DenseCuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('C',parent(adjA),B,'O') Base.:(\)(adjA::Adjoint{T, LowerTriangular{T, S}},B::DenseCuMatrix{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sm('C',parent(adjA),B,'O') -Base.:(\)(A::Union{UpperTriangular{T, S},LowerTriangular{T, S}}, B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('N',A,B,'O') +Base.:(\)(A::Union{UpperTriangular{T, S},LowerTriangular{T, S}}, B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('N',A,B,'O') Base.:(\)(transA::Transpose{T, UpperTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O') Base.:(\)(transA::Transpose{T, LowerTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O') Base.:(\)(adjA::Adjoint{T, UpperTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('C',parent(adjA),B,'O') Base.:(\)(adjA::Adjoint{T, LowerTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('C',parent(adjA),B,'O') +Base.:(\)(A::Union{UnitUpperTriangular{T, S},UnitLowerTriangular{T, S}}, B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('N',A,B,'O',unit_diag=true) +Base.:(\)(transA::Transpose{T, UnitUpperTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O',unit_diag=true) +Base.:(\)(transA::Transpose{T, UnitLowerTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('T',parent(transA),B,'O',unit_diag=true) +Base.:(\)(adjA::Adjoint{T, UnitUpperTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('C',parent(adjA),B,'O',unit_diag=true) +Base.:(\)(adjA::Adjoint{T, UnitLowerTriangular{T, S}},B::DenseCuVector{T}) where {T<:BlasFloat, S<:AbstractCuSparseMatrix{T}} = sv2('C',parent(adjA),B,'O',unit_diag=true) + Base.:(+)(A::Union{CuSparseMatrixCSR,CuSparseMatrixCSC},B::Union{CuSparseMatrixCSR,CuSparseMatrixCSC}) = geam(A,B,'O','O','O') Base.:(-)(A::Union{CuSparseMatrixCSR,CuSparseMatrixCSC},B::Union{CuSparseMatrixCSR,CuSparseMatrixCSC}) = geam(A,-one(eltype(A)),B,'O','O','O') diff --git a/lib/cusparse/level2.jl b/lib/cusparse/level2.jl index 8518ecc9ee..85b1eeb2ca 100644 --- a/lib/cusparse/level2.jl +++ b/lib/cusparse/level2.jl @@ -44,13 +44,14 @@ for (fname,elty) in ((:cusparseSbsrmv, :Float32), end """ - sv2!(transa::SparseChar, uplo::SparseChar, alpha::BlasFloat, A::CuSparseMatrixBSR, X::CuVector, index::SparseChar) + sv2!(transa::SparseChar, uplo::SparseChar, alpha::BlasFloat, A::CuSparseMatrixBSR, X::CuVector, index::SparseChar; unit_diag::Bool=false) Performs `X = alpha * op(A) \\ X `, where `op` can be nothing (`transa = N`), tranpose (`transa = T`) or conjugate transpose (`transa = C`). `X` is a dense vector, and `uplo` tells `sv2!` which triangle of the block sparse matrix `A` to reference. +If the triangle has unit diagonal, set `unit_diag` to true. """ -sv2!(transa::SparseChar, uplo::SparseChar, alpha::BlasFloat, A::CuSparseMatrixBSR, X::CuVector, index::SparseChar) +sv2!(transa::SparseChar, uplo::SparseChar, alpha::BlasFloat, A::CuSparseMatrixBSR, X::CuVector, index::SparseChar; unit_diag::Bool=false) # bsrsv2 for (bname,aname,sname,elty) in ((:cusparseSbsrsv2_bufferSize, :cusparseSbsrsv2_analysis, :cusparseSbsrsv2_solve, :Float32), (:cusparseDbsrsv2_bufferSize, :cusparseDbsrsv2_analysis, :cusparseDbsrsv2_solve, :Float64), @@ -62,9 +63,11 @@ for (bname,aname,sname,elty) in ((:cusparseSbsrsv2_bufferSize, :cusparseSbsrsv2_ alpha::Number, A::CuSparseMatrixBSR{$elty}, X::CuVector{$elty}, - index::SparseChar) - desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, uplo, CUSPARSE_DIAG_TYPE_NON_UNIT, index) - m,n = A.dims + index::SparseChar; + unit_diag::Bool=false) + DIAG_TYPE = (unit_diag ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT) + desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, uplo, DIAG_TYPE, index) + m,n = A.dims if m != n throw(DimensionMismatch("A must be square, but has dimensions ($m,$n)!")) end @@ -106,36 +109,40 @@ for elty in (:Float32, :Float64, :ComplexF32, :ComplexF64) alpha::Number, A::CuSparseMatrix{$elty}, X::CuVector{$elty}, - index::SparseChar) - sv2!(transa,uplo,alpha,A,copy(X),index) + index::SparseChar; + unit_diag::Bool=false) + sv2!(transa,uplo,alpha,A,copy(X),index,unit_diag=unit_diag) end function sv2(transa::SparseChar, uplo::SparseChar, A::CuSparseMatrix{$elty}, X::CuVector{$elty}, - index::SparseChar) - sv2!(transa,uplo,one($elty),A,copy(X),index) + index::SparseChar; + unit_diag::Bool=false) + sv2!(transa,uplo,one($elty),A,copy(X),index,unit_diag=unit_diag) end function sv2(transa::SparseChar, alpha::Number, A::AbstractTriangular, X::CuVector{$elty}, - index::SparseChar) + index::SparseChar; + unit_diag::Bool=false) uplo = 'U' - if istril(A) + if typeof(A) <: Union{LowerTriangular, UnitLowerTriangular} uplo = 'L' end - sv2!(transa,uplo,alpha,A.data,copy(X),index) + sv2!(transa,uplo,alpha,A.data,copy(X),index,unit_diag=unit_diag) end function sv2(transa::SparseChar, A::AbstractTriangular, X::CuVector{$elty}, - index::SparseChar) + index::SparseChar; + unit_diag::Bool=false) uplo = 'U' - if istril(A) + if typeof(A) <: Union{LowerTriangular, UnitLowerTriangular} uplo = 'L' end - sv2!(transa,uplo,one($elty),A.data,copy(X),index) + sv2!(transa,uplo,one($elty),A.data,copy(X),index,unit_diag=unit_diag) end end end @@ -151,8 +158,10 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ alpha::Number, A::CuSparseMatrixCSR{$elty}, X::CuVector{$elty}, - index::SparseChar) - desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, uplo, CUSPARSE_DIAG_TYPE_NON_UNIT, index) + index::SparseChar; + unit_diag::Bool=false) + DIAG_TYPE = (unit_diag ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT) + desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, uplo, DIAG_TYPE, index) m,n = A.dims if m != n throw(DimensionMismatch("A must be square, but has dimensions ($m,$n)!")) @@ -198,7 +207,8 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ alpha::Number, A::CuSparseMatrixCSC{$elty}, X::CuVector{$elty}, - index::SparseChar) + index::SparseChar; + unit_diag::Bool=false) ctransa = 'N' cuplo = 'U' if transa == 'N' @@ -207,8 +217,9 @@ for (bname,aname,sname,elty) in ((:cusparseScsrsv2_bufferSize, :cusparseScsrsv2_ if uplo == 'U' cuplo = 'L' end - desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, cuplo, CUSPARSE_DIAG_TYPE_NON_UNIT, index) - n,m = A.dims + DIAG_TYPE = (unit_diag ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT) + desc = CuMatrixDescriptor(CUSPARSE_MATRIX_TYPE_GENERAL, cuplo, DIAG_TYPE, index) + n,m = A.dims if m != n throw(DimensionMismatch("A must be square, but has dimensions ($m,$n)!")) end diff --git a/test/cusparse.jl b/test/cusparse.jl index 6e2e3cdff4..f052f63cdc 100644 --- a/test/cusparse.jl +++ b/test/cusparse.jl @@ -307,63 +307,70 @@ end end end end + @testset "bsrsv2" begin @testset for elty in [Float32,Float64,ComplexF32,ComplexF64] @testset "bsrsv2!" begin - A = rand(elty,m,m) - A = triu(A) - X = rand(elty,m) - alpha = rand(elty) - d_X = CuArray(X) - d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CuSparseMatrixBSR(d_A, blockdim) - d_X = CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') - h_Y = collect(d_X) - Y = A\(alpha * X) - @test Y ≈ h_Y - d_X = CUDA.rand(elty,n) - @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') - A = sparse(rand(elty,m,n)) - d_A = CuSparseMatrixCSR(A) - d_A = CuSparseMatrixBSR(d_A, blockdim) - @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O') + for unit_diag ∈ (false, true) + A = rand(elty,m,m) + A = (unit_diag ? triu(A, 1) + I : triu(A)) + X = rand(elty,m) + alpha = rand(elty) + d_X = CuArray(X) + d_A = CuSparseMatrixCSR(sparse(A)) + d_A = CuSparseMatrixBSR(d_A, blockdim) + d_X = CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + h_Y = collect(d_X) + Y = A\(alpha * X) + @test Y ≈ h_Y + d_X = CUDA.rand(elty,n) + @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + A = sparse(rand(elty,m,n)) + d_A = CuSparseMatrixCSR(A) + d_A = CuSparseMatrixBSR(d_A, blockdim) + @test_throws DimensionMismatch CUSPARSE.sv2!('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + end end @testset "bsrsv2" begin - A = rand(elty,m,m) - A = triu(A) - Al = tril(A) - X = rand(elty,m) - alpha = rand(elty) - d_X = CuArray(X) - d_A = CuSparseMatrixCSR(sparse(A)) - d_A = CuSparseMatrixBSR(d_A, blockdim) - d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') - h_Y = collect(d_Y) - Y = A\(alpha * X) - @test Y ≈ h_Y - d_Y = UpperTriangular(d_A)\d_X - h_Y = collect(d_Y) - @test h_Y ≈ A\X - #=d_Y = UpperTriangular(d_A)'\d_X - h_Y = collect(d_Y) - @test h_Y ≈ A'\X=# - d_Y = transpose(UpperTriangular(d_A))\d_X - h_Y = collect(d_Y) - @test h_Y ≈ transpose(A)\X - d_Y = LowerTriangular(d_A)\d_X - h_Y = collect(d_Y) - @test h_Y ≈ Al\X - #=d_Y = LowerTriangular(d_A)'\d_X - h_Y = collect(d_Y) - @test h_Y ≈ A'\X=# - d_Y = transpose(LowerTriangular(d_A))\d_X - h_Y = collect(d_Y) - @test h_Y ≈ transpose(Al)\X - A = sparse(rand(elty,m,n)) - d_A = CuSparseMatrixCSR(A) - d_A = CuSparseMatrixBSR(d_A, blockdim) - @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') + for unit_diag ∈ (false, true) + A = rand(elty,m,m) + A = (unit_diag ? triu(A, 1) + I : triu(A)) + Al = (unit_diag ? tril(A, -1) + I : tril(A)) + X = rand(elty,m) + alpha = rand(elty) + d_X = CuArray(X) + d_A = CuSparseMatrixCSR(sparse(A)) + d_A = CuSparseMatrixBSR(d_A, blockdim) + d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + h_Y = collect(d_Y) + Y = A\(alpha * X) + @test Y ≈ h_Y + UA = (unit_diag ? UnitUpperTriangular(d_A) : UpperTriangular(d_A)) + d_Y = UA\d_X + h_Y = collect(d_Y) + @test h_Y ≈ A\X + #=d_Y = UpperTriangular(d_A)'\d_X + h_Y = collect(d_Y) + @test h_Y ≈ A'\X=# + d_Y = transpose(UA)\d_X + h_Y = collect(d_Y) + @test h_Y ≈ transpose(A)\X + LA = (unit_diag ? UnitLowerTriangular(d_A) : LowerTriangular(d_A)) + d_Y = LA\d_X + h_Y = collect(d_Y) + @test h_Y ≈ Al\X + #=d_Y = LowerTriangular(d_A)'\d_X + h_Y = collect(d_Y) + @test h_Y ≈ A'\X=# + d_Y = transpose(LA)\d_X + h_Y = collect(d_Y) + @test h_Y ≈ transpose(Al)\X + A = sparse(rand(elty,m,n)) + d_A = CuSparseMatrixCSR(A) + d_A = CuSparseMatrixBSR(d_A, blockdim) + @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + end end end end @@ -475,86 +482,94 @@ end @testset "cssv" begin @testset for elty in [Float32,Float64,ComplexF32,ComplexF64] @testset "csrsv2" begin - A = rand(elty,m,m) - A = triu(A) - Al = tril(A) - X = rand(elty,m) - alpha = rand(elty) - d_X = CuArray(X) - d_A = CuSparseMatrixCSR(sparse(A)) - d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') - h_Y = collect(d_Y) - Y = A\(alpha * X) - @test Y ≈ h_Y - d_y = UpperTriangular(d_A)\d_X - h_y = collect(d_y) - y = A\X - @test y ≈ h_y - d_y = transpose(UpperTriangular(d_A))\d_X - h_y = collect(d_y) - y = transpose(A)\X - @test y ≈ h_y - #=d_y = UpperTriangular(d_A)'\d_X - h_y = collect(d_y) - y = A'\X - @test y ≈ h_y=# - d_y = LowerTriangular(d_A)\d_X - h_y = collect(d_y) - y = Al\X - @test y ≈ h_y - d_y = transpose(LowerTriangular(d_A))\d_X - h_y = collect(d_y) - y = transpose(Al)\X - @test y ≈ h_y - #=d_y = LowerTriangular(d_A)'\d_X - h_y = collect(d_y) - y = A'\X - @test y ≈ h_y=# - A = sparse(rand(elty,m,n)) - d_A = CuSparseMatrixCSR(A) - @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') + for unit_diag ∈ (false, true) + A = rand(elty,m,m) + A = (unit_diag ? triu(A, 1) + I : triu(A)) + Al = (unit_diag ? tril(A, -1) + I : tril(A)) + X = rand(elty,m) + alpha = rand(elty) + d_X = CuArray(X) + d_A = CuSparseMatrixCSR(sparse(A)) + d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + h_Y = collect(d_Y) + Y = A\(alpha * X) + @test Y ≈ h_Y + UA = (unit_diag ? UnitUpperTriangular(d_A) : UpperTriangular(d_A)) + d_y = UA\d_X + h_y = collect(d_y) + y = A\X + @test y ≈ h_y + d_y = transpose(UA)\d_X + h_y = collect(d_y) + y = transpose(A)\X + @test y ≈ h_y + #=d_y = UpperTriangular(d_A)'\d_X + h_y = collect(d_y) + y = A'\X + @test y ≈ h_y=# + LA = (unit_diag ? UnitLowerTriangular(d_A) : LowerTriangular(d_A)) + d_y = LA\d_X + h_y = collect(d_y) + y = Al\X + @test y ≈ h_y + d_y = transpose(LA)\d_X + h_y = collect(d_y) + y = transpose(Al)\X + @test y ≈ h_y + #=d_y = LowerTriangular(d_A)'\d_X + h_y = collect(d_y) + y = A'\X + @test y ≈ h_y=# + A = sparse(rand(elty,m,n)) + d_A = CuSparseMatrixCSR(A) + @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + end end @testset "cscsv2" begin - A = rand(elty,m,m) - A = triu(A) - Al = tril(A) - X = rand(elty,m) - alpha = rand(elty) - d_X = CuArray(X) - d_A = CuSparseMatrixCSC(sparse(A)) - d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') - h_Y = collect(d_Y) - Y = A\(alpha * X) - @test Y ≈ h_Y - d_Y = CUSPARSE.sv2('T','U',alpha,d_A,d_X,'O') - h_Y = collect(d_Y) - Y = transpose(A)\(alpha * X) - @test Y ≈ h_Y - d_y = UpperTriangular(d_A)\d_X - h_y = collect(d_y) - y = A\X - @test y ≈ h_y - d_y = transpose(UpperTriangular(d_A))\d_X - h_y = collect(d_y) - y = transpose(A)\X - @test y ≈ h_y - d_y = LowerTriangular(d_A)\d_X - h_y = collect(d_y) - y = Al\X - @test y ≈ h_y - d_y = transpose(LowerTriangular(d_A))\d_X - h_y = collect(d_y) - y = transpose(Al)\X - @test y ≈ h_y - #=d_y = UpperTriangular(d_A)'\d_X - h_y = collect(d_y) - y = A'\X - @test y ≈ h_y=# - # shouldn't work for now bc sv2 has no way to do conj... - A = sparse(rand(elty,m,n)) - d_A = CuSparseMatrixCSC(A) - @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O') + for unit_diag ∈ (false, true) + A = rand(elty,m,m) + A = (unit_diag ? triu(A, 1) + I : triu(A)) + Al = (unit_diag ? tril(A, -1) + I : tril(A)) + X = rand(elty,m) + alpha = rand(elty) + d_X = CuArray(X) + d_A = CuSparseMatrixCSC(sparse(A)) + d_Y = CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + h_Y = collect(d_Y) + Y = A\(alpha * X) + @test Y ≈ h_Y + d_Y = CUSPARSE.sv2('T','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + h_Y = collect(d_Y) + Y = transpose(A)\(alpha * X) + @test Y ≈ h_Y + UA = (unit_diag ? UnitUpperTriangular(d_A) : UpperTriangular(d_A)) + d_y = UA\d_X + h_y = collect(d_y) + y = A\X + @test y ≈ h_y + d_y = transpose(UA)\d_X + h_y = collect(d_y) + y = transpose(A)\X + @test y ≈ h_y + LA = (unit_diag ? UnitLowerTriangular(d_A) : LowerTriangular(d_A)) + d_y = LA\d_X + h_y = collect(d_y) + y = Al\X + @test y ≈ h_y + d_y = transpose(LA)\d_X + h_y = collect(d_y) + y = transpose(Al)\X + @test y ≈ h_y + #=d_y = UpperTriangular(d_A)'\d_X + h_y = collect(d_y) + y = A'\X + @test y ≈ h_y=# + # shouldn't work for now bc sv2 has no way to do conj... + A = sparse(rand(elty,m,n)) + d_A = CuSparseMatrixCSC(A) + @test_throws DimensionMismatch CUSPARSE.sv2('N','U',alpha,d_A,d_X,'O',unit_diag=unit_diag) + end end end end