Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/apiutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,22 @@ function seed!(duals::AbstractArray{Dual{T,V,N}}, x,
return duals
end

# Triangular matrices
function _nonzero_indices(x::UpperTriangular)
n = size(x, 1)
return (CartesianIndex(i, j) for j in 1:n for i in 1:j)
end
function _nonzero_indices(x::LowerTriangular)
n = size(x, 1)
return (CartesianIndex(i, j) for j in 1:n for i in j:n)
end
function seed!(duals::Union{LowerTriangular{Dual{T,V,N}},UpperTriangular{Dual{T,V,N}}}, x, seeds::NTuple{N,Partials{N,V}}) where {T,V,N}
for (idx, seed) in zip(_nonzero_indices(duals), seeds)
duals[idx] = Dual{T,V,N}(x[idx], seed)
end
return duals
end

function seed!(duals::AbstractArray{Dual{T,V,N}}, x, index,
seed::Partials{N,V} = zero(Partials{N,V})) where {T,V,N}
offset = index - 1
Expand Down
8 changes: 8 additions & 0 deletions src/gradient.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ end
extract_gradient!(::Type{T}, result::AbstractArray, y::Real) where {T} = fill!(result, zero(y))
extract_gradient!(::Type{T}, result::AbstractArray, dual::Dual) where {T}= copyto!(result, partials(T, dual))

# Triangular matrices
function extract_gradient!(::Type{T}, result::Union{UpperTriangular,LowerTriangular}, dual::Dual) where {T}
for (idx, p) in zip(_nonzero_indices(result), partials(T, dual))
result[idx] = p
end
return result
end

function extract_gradient_chunk!(::Type{T}, result, dual, index, chunksize) where {T}
offset = index - 1
for i in 1:chunksize
Expand Down
9 changes: 9 additions & 0 deletions test/GradientTest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,13 @@ end
@test dx ≈ sum(a * b)
end

# issue #738
@testset "LowerTriangular and UpperTriangular" begin
M = rand(3, 3)
for T in (LowerTriangular, UpperTriangular)
@test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately this fails if I increase the size of the matrix above the chunk size:

julia> @testset "LowerTriangular and UpperTriangular" begin
           M = rand(3, 3)
           for T in (LowerTriangular, UpperTriangular)
               @test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3))
               @test ForwardDiff.gradient(x -> dot(M, x), T(randn(3, 3))) == T(M)
           end
       end
Test Summary:                       | Pass  Total  Time
LowerTriangular and UpperTriangular |    4      4  1.0s
Test.DefaultTestSet("LowerTriangular and UpperTriangular", Any[], 4, false, false, true, 1.74360103628159e9, 1.743601037319704e9, false, "REPL[8]")

julia> @testset "LowerTriangular and UpperTriangular" begin
           M = rand(10, 10)
           for T in (LowerTriangular, UpperTriangular)
               @test ForwardDiff.gradient(sum, T(randn(10, 10))) == T(ones(10, 10))
               @test ForwardDiff.gradient(x -> dot(M, x), T(randn(10, 10))) == T(M)
           end
       end
LowerTriangular and UpperTriangular: Error During Test at REPL[9]:4
  Test threw exception
  Expression: ForwardDiff.gradient(sum, T(randn(10, 10))) == T(ones(10, 10))
  ArgumentError: cannot set index in the upper triangular part (1, 2) of an LowerTriangular matrix to a nonzero value (Dual{ForwardDiff.Tag{typeof(sum), Float64}}(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0))
  Stacktrace:
    [1] throw_nonzeroerror(T::Type, x::Any, i::Int64, j::Int64)
      @ LinearAlgebra ~/.julia/juliaup/julia-1.11.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.11/LinearAlgebra/src/triangular.jl:295
    [2] setindex!
      @ ~/.julia/juliaup/julia-1.11.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.11/LinearAlgebra/src/triangular.jl:326 [inlined]
    [3] _unsafe_setindex!
      @ ./reshapedarray.jl:297 [inlined]
    [4] setindex!
      @ ./reshapedarray.jl:286 [inlined]
    [5] setindex!
      @ ./subarray.jl:372 [inlined]
    [6] macro expansion
      @ ./broadcast.jl:973 [inlined]
    [7] macro expansion
      @ ./simdloop.jl:77 [inlined]
    [8] copyto!
      @ ./broadcast.jl:972 [inlined]
    [9] copyto!
      @ ./broadcast.jl:925 [inlined]
   [10] materialize!
      @ ./broadcast.jl:883 [inlined]
   [11] materialize!
      @ ./broadcast.jl:880 [inlined]
   [12] seed!(duals::LowerTriangular{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}}}, x::LowerTriangular{Float64, Matrix{Float64}}, index::Int64, seeds::NTuple{12, ForwardDiff.Partials{12, Float64}}, chunksize::Int64)
      @ ForwardDiff ~/.julia/packages/ForwardDiff/L0kjR/src/apiutils.jl:69
   [13] seed!
      @ ~/.julia/packages/ForwardDiff/L0kjR/src/apiutils.jl:66 [inlined]
   [14] chunk_mode_gradient(f::typeof(sum), x::LowerTriangular{Float64, Matrix{Float64}}, cfg::ForwardDiff.GradientConfig{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12, LowerTriangular{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}}}})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah good catch 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another catch is: This PR doesn't (yet) fix the fact that still for every element of the matrices seeds are generated - even though (IMO) we should limit it to the structurally non-zero entries.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have forgotten how all this code works... but 5-arg seed! implies the iteration is happening elsewhere?

The other thing worth checking would be GradientConfig and DiffResult things from https://juliadiff.org/ForwardDiff.jl/stable/user/advanced/ as I'm not sure what paths they take.

Copy link
Member Author

@devmotion devmotion Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, seed! (and extract_gradient_chunk!) with > 3 arguments are called if gradients are computed in multiple chunks:

# seed work vectors
xdual = cfg.duals
seeds = cfg.seeds
seed!(xdual, x)
# do first chunk manually to calculate output type
seed!(xdual, x, 1, seeds)
ydual = f(xdual)
$(result_definition)
extract_gradient_chunk!(T, result, ydual, 1, N)
seed!(xdual, x, 1)
# do middle chunks
for c in middlechunks
i = ((c - 1) * N + 1)
seed!(xdual, x, i, seeds)
ydual = f(xdual)
extract_gradient_chunk!(T, result, ydual, i, N)
seed!(xdual, x, i)
end
# do final chunk
seed!(xdual, x, lastchunkindex, seeds, lastchunksize)
ydual = f(xdual)
extract_gradient_chunk!(T, result, ydual, lastchunkindex, lastchunksize)

Copy link
Member

@mcabbott mcabbott Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those look good. Some chance this is flawed copy-pasting at my end, but for chunk mode I see strange effects:

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, UpperTriangular(rand(5, 5)))
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8}
5×5 UpperTriangular{Float64, Matrix{Float64}}:
 1.0  1.0  1.0  1.0  1.0
     1.0  1.0  1.0  1.0
         1.0  1.0  1.0
             1.0  1.0
                 1.0

julia> sum(ans)
15.0

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(11)));  # ok
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#21#22", Float64}, Float64, 11}

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(13)));  # weird
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7}
[.... >15 more]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, strange. I get

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, UpperTriangular(rand(5, 5)))
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#40#41", Float64}, Float64, 8}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#40#41", Float64}, Float64, 8}
5×5 UpperTriangular{Float64, Matrix{Float64}}:
 1.0  1.0  1.0  1.0  1.0
     1.0  1.0  1.0  1.0
         1.0  1.0  1.0
             1.0  1.0
                 1.0

julia> sum(ans)
15.0

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(11)));
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#42#43", Float64}, Float64, 11}

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(13)));
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#44#45", Float64}, Float64, 7}
eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#44#45", Float64}, Float64, 7}

which seems expected given

function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
N = pickchunksize(input_length, threshold)
Base.@nif 12 d->(N == d) d->(Chunk{d}()) d->(Chunk{N}())
end

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only test failures are the ones already present on the master branch (possibly related to some recent changes of acosh or NaNMath.acosh?).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My weird results above must have been my mistake, lazy to check out the branch...

We could add tests of how many executions are used in chunk mode (not sure whether there are any such now) but not essential.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a test in d2a8af7.

@test ForwardDiff.gradient(x -> dot(M, x), T(randn(3, 3))) == T(M)
end
end

end # module
Loading