Skip to content

Commit

Permalink
Merge branch 'JuliaGPU:master' into QR_views
Browse files Browse the repository at this point in the history
  • Loading branch information
evelyne-ringoot authored Jul 1, 2024
2 parents 0cf05d1 + 40fa8c0 commit 9fca399
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 44 deletions.
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "GPUArrays"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "10.1.0"
version = "10.2.2"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand All @@ -16,7 +16,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[compat]
Adapt = "4.0"
GPUArraysCore = "= 0.1.6"
LLVM = "3.9, 4, 5, 6"
LLVM = "3.9, 4, 5, 6, 7, 8"
LinearAlgebra = "1"
Printf = "1"
Random = "1"
Expand Down
2 changes: 1 addition & 1 deletion lib/JLArrays/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JLArrays"
uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
authors = ["Tim Besard <[email protected]>"]
version = "0.1.4"
version = "0.1.5"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
8 changes: 8 additions & 0 deletions lib/JLArrays/src/JLArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,14 @@ end
Base.copyto!(dest::DenseJLArray{T}, source::DenseJLArray{T}) where {T} =
copyto!(dest, 1, source, 1, length(source))

function Base.resize!(a::DenseJLVector{T}, nl::Integer) where {T}
a_resized = JLVector{T}(undef, nl)
copyto!(a_resized, 1, a, 1, min(length(a), nl))
a.data = a_resized.data
a.offset = 0
a.dims = size(a_resized)
return a
end

## random number generation

Expand Down
16 changes: 16 additions & 0 deletions src/host/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,19 @@ Base.deepcopy(x::AbstractGPUArray) = copy(x)

# revert of JuliaLang/julia#31929
Base.filter(f, As::AbstractGPUArray) = As[map(f, As)::AbstractGPUArray{Bool}]

# appending

function Base.append!(a::AbstractGPUVector, items::AbstractVector)
n = length(items)
resize!(a, length(a) + n)
copyto!(a, length(a) - n + 1, items, firstindex(items), n)
return a
end

# this is needed because copyto! of most GPU arrays
# doesn't currently support Tuple sources
function Base.append!(a::AbstractGPUVector, items::Tuple)
append!(a, collect(items))
return a
end
221 changes: 204 additions & 17 deletions src/host/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,10 @@ end


## matrix multiplication

function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::AbstractArray{S}, a::Number, b::Number) where {T,S,R}
# legacy method
generic_matmatmul!(C::AbstractArray, A::AbstractArray, B::AbstractArray, a::Number, b::Number) =
generic_matmatmul!(C, A, B, MulAddMul(a, b))
function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::AbstractArray{S}, add::MulAddMul) where {T,S,R}
if size(A,2) != size(B,1)
throw(DimensionMismatch("matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
end
Expand All @@ -350,20 +352,18 @@ function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::Abstrac
return fill!(C, zero(R))
end

add = MulAddMul(a, b)

gpu_call(C, A, B; name="matmatmul!") do ctx, C, A, B
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1

@inbounds if i <= size(A,1) && j <= size(B,2)
z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
Ctmp = convert(promote_type(R, typeof(z2)), z2)
Cij = convert(promote_type(R, typeof(z2)), z2)
for k in 1:size(A,2)
Ctmp += A[i, k]*B[k, j]
Cij += A[i, k]*B[k, j]
end
C[i,j] = add(Ctmp, C[i,j])
C[i,j] = add(Cij, C[i,j])
end

return
Expand All @@ -372,42 +372,229 @@ function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::Abstrac
C
end

@static if VERSION < v"1.12.0-"
function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, _add::MulAddMul = MulAddMul())
generic_matmatmul!(C, wrap(A, tA), B, _add.alpha, _add.beta)
generic_matmatmul!(C, wrap(A, tA), B, _add)
end

function LinearAlgebra.generic_matmatmul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, _add::MulAddMul=MulAddMul())
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add.alpha, _add.beta)
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
end
else
function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, a::Number, b::Number)
LinearAlgebra.@stable_muladdmul generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
end

function LinearAlgebra.generic_matmatmul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, a::Number, b::Number)
LinearAlgebra.@stable_muladdmul generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(a, b))
end
end

function generic_trimatmul!(C::AbstractGPUVecOrMat{R}, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix{T}, B::AbstractGPUVecOrMat{S}) where {T,S,R}
if size(A,2) != size(B,1)
throw(DimensionMismatch(lazy"matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
end
if size(C,1) != size(A,1) || size(C,2) != size(B,2)
throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs $((size(A,1),size(B,2)))"))
end
if isempty(A) || isempty(B)
return fill!(C, zero(R))
end

upper = tfun === identity ? uploc == 'U' : uploc != 'U'
unit = isunitc == 'U'

function trimatmul(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += (unit ? one(Cij) : A[i,i]) * B[i,j]
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
Cij += A[i,k] * B[k,j]
end
C[i,j] += Cij
end

return
end

function trimatmul_t(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += (unit ? one(Cij) : transpose(A[i,i])) * B[i,j]
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
Cij += transpose(A[k,i]) * B[k,j]
end
C[i,j] += Cij
end

return
end

function trimatmul_a(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += (unit ? one(Cij) : adjoint(A[i,i])) * B[i,j]
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
Cij += adjoint(A[k,i]) * B[k,j]
end
C[i,j] += Cij
end

return
end

if tfun === identity
gpu_call(trimatmul, C, A, B; name="trimatmul")
elseif tfun == transpose
gpu_call(trimatmul_t, C, A, B; name="trimatmul_t")
elseif tfun === adjoint
gpu_call(trimatmul_a, C, A, B; name="trimatmul_a")
else
error("Not supported")
end

C
end

function generic_mattrimul!(C::AbstractGPUVecOrMat{R}, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix{T}, B::AbstractGPUVecOrMat{S}) where {T,S,R}
if size(A,2) != size(B,1)
throw(DimensionMismatch(lazy"matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
end
if size(C,1) != size(A,1) || size(C,2) != size(B,2)
throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs $((size(A,1),size(B,2)))"))
end
if isempty(A) || isempty(B)
return fill!(C, zero(R))
end

upper = tfun === identity ? uploc == 'U' : uploc != 'U'
unit = isunitc == 'U'

function mattrimul(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += A[i,j] * (unit ? one(Cij) : B[j,j])
for k in (upper ? 1 : (j + 1)):(upper ? (j - 1) : m)
Cij += A[i,k] * B[k,j]
end
C[i,j] += Cij
end

return
end

function mattrimul_t(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += A[i,j] * (unit ? one(Cij) : transpose(B[j,j]))
for k in (upper ? 1 : (j + 1) ):(upper ? (j - 1) : m)
Cij += A[i,k] * transpose(B[j,k])
end
C[i,j] += Cij
end

return
end

function mattrimul_a(ctx, C, A, B)
idx = @linearidx C
assume.(size(C) .> 0)
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
l, m, n = size(A, 1), size(B, 1), size(B, 2)

@inbounds if i <= l && j <= n
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
Cij = convert(promote_type(R, typeof(z2)), z2)
Cij += A[i,j] * (unit ? one(Cij) : adjoint(B[j,j]))
for k in (upper ? 1 : (j + 1)):(upper ? (j - 1) : m)
Cij += A[i,k] * adjoint(B[j,k])
end
C[i,j] += Cij
end

return
end

if tfun === identity
gpu_call(mattrimul, C, A, B; name="mattrimul")
elseif tfun == transpose
gpu_call(mattrimul_t, C, A, B; name="mattrimul_t")
elseif tfun === adjoint
gpu_call(mattrimul_a, C, A, B; name="mattrimul_a")
else
error("Not supported")
end

C
end

if VERSION >= v"1.10-"
function LinearAlgebra.generic_trimatmul!(C::AbstractGPUVecOrMat, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix, B::AbstractGPUVecOrMat)
generic_trimatmul!(C, uploc, isunitc, tfun, A, B)
end
function LinearAlgebra.generic_mattrimul!(C::AbstractGPUMatrix, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix, B::AbstractGPUMatrix)
generic_mattrimul!(C, uploc, isunitc, tfun, A, B)
end
end

if VERSION < v"1.10.0-DEV.1365"
# catch other functions that are called by LinearAlgebra's mul!
function LinearAlgebra.gemv!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, a::Number, b::Number)
generic_matmatmul!(C, wrap(A, tA), B, a, b)
generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
end
# disambiguation
function LinearAlgebra.gemv!(C::AbstractGPUVector{T}, tA::AbstractChar, A::AbstractGPUMatrix{T}, B::AbstractGPUVector{T}, a::Number, b::Number) where {T<:LinearAlgebra.BlasFloat}
generic_matmatmul!(C, wrap(A, tA), B, a, b)
generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
end

LinearAlgebra.gemm_wrapper!(C::AbstractGPUVecOrMat, tA::AbstractChar, tB::AbstractChar, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, _add::MulAddMul) =
LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add)
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
# disambiguation
LinearAlgebra.gemm_wrapper!(C::AbstractGPUVecOrMat{T}, tA::AbstractChar, tB::AbstractChar, A::AbstractGPUVecOrMat{T}, B::AbstractGPUVecOrMat{T}, _add::MulAddMul) where {T<:LinearAlgebra.BlasFloat} =
LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add)
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)

function LinearAlgebra.syrk_wrapper!(C::AbstractGPUMatrix, tA::AbstractChar, A::AbstractGPUVecOrMat, _add::MulAddMul = MulAddMul())
if tA == 'T'
LinearAlgebra.generic_matmatmul!(C, 'T', 'N', A, A, _add)
generic_matmatmul!(C, wrap(A, 'T'), A, _add)
else # tA == 'N'
LinearAlgebra.generic_matmatmul!(C, 'N', 'T', A, A, _add)
generic_matmatmul!(C, A, wrap(A, 'T'), _add)
end
end
function LinearAlgebra.herk_wrapper!(C::AbstractGPUMatrix, tA::AbstractChar, A::AbstractGPUVecOrMat, _add::MulAddMul = MulAddMul())
if tA == 'C'
LinearAlgebra.generic_matmatmul!(C, 'C', 'N', A, A, _add)
generic_matmatmul!(C, wrap(A, 'C'), A, _add)
else # tA == 'N'
LinearAlgebra.generic_matmatmul!(C, 'N', 'C', A, A, _add)
generic_matmatmul!(C, A, wrap(A, 'C'), _add)
end
end
end # VERSION
Expand Down
2 changes: 1 addition & 1 deletion test/testsuite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ include("testsuite/construction.jl")
include("testsuite/gpuinterface.jl")
include("testsuite/indexing.jl")
include("testsuite/base.jl")
#include("testsuite/vector.jl")
include("testsuite/vector.jl")
include("testsuite/reductions.jl")
include("testsuite/broadcasting.jl")
include("testsuite/linalg.jl")
Expand Down
33 changes: 33 additions & 0 deletions test/testsuite/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,39 @@
@test istriu(A) == istriu(B)
end
end

if VERSION >= v"1.10-"
@testset "mul! + Triangular" begin
@testset "trimatmul! ($TR x $T, $f)" for T in (Float32, ComplexF32), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular), f in (identity, transpose, adjoint)
n = 128
A = AT(rand(T, n,n))
b = AT(rand(T, n))
Ct = AT(zeros(T, n))
C = zeros(T, n)
mul!(Ct, f(TR(A)), b)
mul!(C, f(TR(collect(A))), collect(b))
@test collect(Ct) C

B = AT(rand(T, n, n))
Ct = AT(zeros(T, n, n))
C = zeros(T, n, n)
mul!(Ct, f(TR(A)), B)
mul!(C, f(TR(collect(A))), collect(B))
@test collect(Ct) C
end

@testset "mattrimul ($TR x $T, $f)" for T in (Float32, ComplexF32), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular), f in (identity, transpose, adjoint)
n = 128
A = AT(rand(T, n,n))
B = AT(rand(T, n, n))
Ct = AT(zeros(T, n, n))
C = zeros(T, n, n)
mul!(Ct, A, f(TR(B)))
mul!(C, collect(A), f(TR(collect(B))))
@test collect(Ct) C
end
end
end
end

@testset "diagonal" begin
Expand Down
Loading

0 comments on commit 9fca399

Please sign in to comment.