Do not seed structural zeros #739

devmotion · 2025-04-02T08:47:19Z

Fixes #738.

Since I had made #695 I felt somewhat responsible for #738. Defining seed! and extract_gradient! for triangular matrices seems to fix the example.

Probably this should b extended to additional matrix types.

mcabbott · 2025-04-02T13:39:05Z

test/GradientTest.jl

+@testset "LowerTriangular and UpperTriangular" begin
+    M = rand(3, 3)
+    for T in (LowerTriangular, UpperTriangular)
+        @test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3))


Unfortunately this fails if I increase the size of the matrix above the chunk size:

julia> @testset "LowerTriangular and UpperTriangular" begin M = rand(3, 3) for T in (LowerTriangular, UpperTriangular) @test ForwardDiff.gradient(sum, T(randn(3, 3))) == T(ones(3, 3)) @test ForwardDiff.gradient(x -> dot(M, x), T(randn(3, 3))) == T(M) end end Test Summary: | Pass Total Time LowerTriangular and UpperTriangular | 4 4 1.0s Test.DefaultTestSet("LowerTriangular and UpperTriangular", Any[], 4, false, false, true, 1.74360103628159e9, 1.743601037319704e9, false, "REPL[8]") julia> @testset "LowerTriangular and UpperTriangular" begin M = rand(10, 10) for T in (LowerTriangular, UpperTriangular) @test ForwardDiff.gradient(sum, T(randn(10, 10))) == T(ones(10, 10)) @test ForwardDiff.gradient(x -> dot(M, x), T(randn(10, 10))) == T(M) end end LowerTriangular and UpperTriangular: Error During Test at REPL[9]:4 Test threw exception Expression: ForwardDiff.gradient(sum, T(randn(10, 10))) == T(ones(10, 10)) ArgumentError: cannot set index in the upper triangular part (1, 2) of an LowerTriangular matrix to a nonzero value (Dual{ForwardDiff.Tag{typeof(sum), Float64}}(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0)) Stacktrace: [1] throw_nonzeroerror(T::Type, x::Any, i::Int64, j::Int64) @ LinearAlgebra ~/.julia/juliaup/julia-1.11.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.11/LinearAlgebra/src/triangular.jl:295 [2] setindex! @ ~/.julia/juliaup/julia-1.11.4+0.aarch64.apple.darwin14/share/julia/stdlib/v1.11/LinearAlgebra/src/triangular.jl:326 [inlined] [3] _unsafe_setindex! @ ./reshapedarray.jl:297 [inlined] [4] setindex! @ ./reshapedarray.jl:286 [inlined] [5] setindex! @ ./subarray.jl:372 [inlined] [6] macro expansion @ ./broadcast.jl:973 [inlined] [7] macro expansion @ ./simdloop.jl:77 [inlined] [8] copyto! @ ./broadcast.jl:972 [inlined] [9] copyto! @ ./broadcast.jl:925 [inlined] [10] materialize! @ ./broadcast.jl:883 [inlined] [11] materialize! @ ./broadcast.jl:880 [inlined] [12] seed!(duals::LowerTriangular{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}}}, x::LowerTriangular{Float64, Matrix{Float64}}, index::Int64, seeds::NTuple{12, ForwardDiff.Partials{12, Float64}}, chunksize::Int64) @ ForwardDiff ~/.julia/packages/ForwardDiff/L0kjR/src/apiutils.jl:69 [13] seed! @ ~/.julia/packages/ForwardDiff/L0kjR/src/apiutils.jl:66 [inlined] [14] chunk_mode_gradient(f::typeof(sum), x::LowerTriangular{Float64, Matrix{Float64}}, cfg::ForwardDiff.GradientConfig{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12, LowerTriangular{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(sum), Float64}, Float64, 12}}}})

Ah good catch 👍

Another catch is: This PR doesn't (yet) fix the fact that still for every element of the matrices seeds are generated - even though (IMO) we should limit it to the structurally non-zero entries.

I have forgotten how all this code works... but 5-arg seed! implies the iteration is happening elsewhere?

The other thing worth checking would be GradientConfig and DiffResult things from https://juliadiff.org/ForwardDiff.jl/stable/user/advanced/ as I'm not sure what paths they take.

Yeah, seed! (and extract_gradient_chunk!) with > 3 arguments are called if gradients are computed in multiple chunks:

ForwardDiff.jl/src/gradient.jl

Lines 118 to 142 in fd2d4a9

# seed work vectors

xdual = cfg.duals

seeds = cfg.seeds

seed!(xdual, x)

# do first chunk manually to calculate output type

seed!(xdual, x, 1, seeds)

ydual = f(xdual)

$(result_definition)

extract_gradient_chunk!(T, result, ydual, 1, N)

seed!(xdual, x, 1)

# do middle chunks

for c in middlechunks

i = ((c - 1) * N + 1)

seed!(xdual, x, i, seeds)

ydual = f(xdual)

extract_gradient_chunk!(T, result, ydual, i, N)

seed!(xdual, x, i)

end

# do final chunk

seed!(xdual, x, lastchunkindex, seeds, lastchunksize)

ydual = f(xdual)

extract_gradient_chunk!(T, result, ydual, lastchunkindex, lastchunksize)

Those look good. Some chance this is flawed copy-pasting at my end, but for chunk mode I see strange effects:

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, UpperTriangular(rand(5, 5))) eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#27#28", Float64}, Float64, 8} 5×5 UpperTriangular{Float64, Matrix{Float64}}: 1.0 1.0 1.0 1.0 1.0 ⋅ 1.0 1.0 1.0 1.0 ⋅ ⋅ 1.0 1.0 1.0 ⋅ ⋅ ⋅ 1.0 1.0 ⋅ ⋅ ⋅ ⋅ 1.0 julia> sum(ans) 15.0 julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(11))); # ok eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#21#22", Float64}, Float64, 11} julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(13))); # weird eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#23#24", Float64}, Float64, 7} [.... >15 more]

Hmm, strange. I get

julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, UpperTriangular(rand(5, 5))) eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#40#41", Float64}, Float64, 8} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#40#41", Float64}, Float64, 8} 5×5 UpperTriangular{Float64, Matrix{Float64}}: 1.0 1.0 1.0 1.0 1.0 ⋅ 1.0 1.0 1.0 1.0 ⋅ ⋅ 1.0 1.0 1.0 ⋅ ⋅ ⋅ 1.0 1.0 ⋅ ⋅ ⋅ ⋅ 1.0 julia> sum(ans) 15.0 julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(11))); eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#42#43", Float64}, Float64, 11} julia> ForwardDiff.gradient(x -> begin @show(eltype(x)); sum(x) end, Diagonal(rand(13))); eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#44#45", Float64}, Float64, 7} eltype(x) = ForwardDiff.Dual{ForwardDiff.Tag{var"#44#45", Float64}, Float64, 7}

which seems expected given

ForwardDiff.jl/src/prelude.jl

Lines 10 to 13 in fd2d4a9

function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)

N = pickchunksize(input_length, threshold)

Base.@nif 12 d->(N == d) d->(Chunk{d}()) d->(Chunk{N}())

end

The only test failures are the ones already present on the master branch (possibly related to some recent changes of acosh or NaNMath.acosh?).

My weird results above must have been my mistake, lazy to check out the branch...

We could add tests of how many executions are used in chunk mode (not sure whether there are any such now) but not essential.

I added a test in d2a8af7.

Fix gradient with LowerTriangular and UpperTriangular input

f3340a3

mcabbott reviewed Apr 2, 2025

View reviewed changes

devmotion added 2 commits April 2, 2025 16:17

Only seed structurally non-zero entries

715f4bc

Base vector vs chunk mode on structural length

161d227

devmotion changed the title ~~Fix gradient with LowerTriangular and UpperTriangular input~~ Do not seed structural zeros Apr 2, 2025

Fix tests

cf9f4bb

mcabbott approved these changes Apr 2, 2025

View reviewed changes

Check number of function evaluations and chunk sizes

d2a8af7

devmotion merged commit fce7f76 into master Apr 2, 2025
2 of 3 checks passed

odow mentioned this pull request Apr 3, 2025

Prep for v1.38.1 jump-dev/MathOptInterface.jl#2707

Merged

5 tasks

devmotion deleted the dw/lower_upper_triangular branch April 3, 2025 07:55

penelopeysm mentioned this pull request Apr 3, 2025

add ForwardDiff@1 TuringLang/Bijectors.jl#378

Merged

thomvet mentioned this pull request Aug 8, 2025

Scalar indexing error when computing Jacobian with a CUDA.CuArray #759

Open

devmotion mentioned this pull request Aug 19, 2025

add back the iteration based seeding for Array #634

Closed

KristofferC mentioned this pull request Aug 19, 2025

fix: regression in non-fast scalar indexing support #760

Open

2 tasks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Do not seed structural zeros #739

Do not seed structural zeros #739

Uh oh!

devmotion commented Apr 2, 2025

Uh oh!

mcabbott Apr 2, 2025

Uh oh!

devmotion Apr 2, 2025

Uh oh!

devmotion Apr 2, 2025

Uh oh!

mcabbott Apr 2, 2025

Uh oh!

devmotion Apr 2, 2025 •

edited

Loading

Uh oh!

mcabbott Apr 2, 2025 •

edited

Loading

Uh oh!

devmotion Apr 2, 2025

Uh oh!

devmotion Apr 2, 2025

Uh oh!

mcabbott Apr 2, 2025

Uh oh!

devmotion Apr 2, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

	# seed work vectors
	xdual = cfg.duals
	seeds = cfg.seeds
	seed!(xdual, x)

	# do first chunk manually to calculate output type
	seed!(xdual, x, 1, seeds)
	ydual = f(xdual)
	$(result_definition)
	extract_gradient_chunk!(T, result, ydual, 1, N)
	seed!(xdual, x, 1)

	# do middle chunks
	for c in middlechunks
	i = ((c - 1) * N + 1)
	seed!(xdual, x, i, seeds)
	ydual = f(xdual)
	extract_gradient_chunk!(T, result, ydual, i, N)
	seed!(xdual, x, i)
	end

	# do final chunk
	seed!(xdual, x, lastchunkindex, seeds, lastchunksize)
	ydual = f(xdual)
	extract_gradient_chunk!(T, result, ydual, lastchunkindex, lastchunksize)

	function Chunk(input_length::Integer, threshold::Integer = DEFAULT_CHUNK_THRESHOLD)
	N = pickchunksize(input_length, threshold)
	Base.@nif 12 d->(N == d) d->(Chunk{d}()) d->(Chunk{N}())
	end

Do not seed structural zeros #739

Do not seed structural zeros #739

Uh oh!

Conversation

devmotion commented Apr 2, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

devmotion Apr 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mcabbott Apr 2, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

devmotion Apr 2, 2025 •

edited

Loading

mcabbott Apr 2, 2025 •

edited

Loading