FluxML · CarloLucibello · Jan 10, 2023 · Oct 24, 2022 · Oct 24, 2022 · Oct 25, 2022
diff --git a/Project.toml b/Project.toml
@@ -10,7 +10,7 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
-GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"  # not loaded, just a version bound
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
 IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
@@ -34,7 +34,7 @@ ChainRulesTestUtils = "1"
 DiffRules = "1.4"
 FillArrays = "0.8, 0.9, 0.10, 0.11, 0.12, 0.13"
 ForwardDiff = "0.10"
-GPUArrays = "8.4.2"  # not loaded, just a version bound
+GPUArrays = "8.4.2"
 GPUArraysCore = "0.1.1"
 IRTools = "0.4.4"
 LogExpFunctions = "0.3.1"

diff --git a/src/lib/broadcast.jl b/src/lib/broadcast.jl
@@ -120,6 +120,9 @@ end
 @adjoint broadcasted(::typeof(imag), x::Numeric) =
   imag.(x), z̄ -> (nothing, im .* real.(z̄))
 
+@adjoint broadcasted(::typeof(abs2), x::Numeric) =
+  abs2.(x), z̄ -> (nothing, 2 .* real.(z̄) .* x)
+
 @adjoint function broadcasted(::typeof(+), a::AbstractArray{<:Number}, b::Bool)
   y = b === false ? a : a .+ b
   y, Δ -> (nothing, Δ, nothing)
@@ -190,7 +193,7 @@ _dual_safearg(x) = false
   # Avoid generic broadcasting in two easy cases:
   if T == Bool
     return (f.(args...), _ -> nothing)
-  elseif T <: Real && isconcretetype(T) && _dual_purefun(F) && all(_dual_safearg, args) && !isderiving()
+  elseif T <: Union{Real, Complex} && isconcretetype(T) && _dual_purefun(F) && all(_dual_safearg, args) && !isderiving()
     return broadcast_forward(f, args...)
   end
   len = inclen(args)
@@ -232,23 +235,44 @@ end
 import ForwardDiff
 using ForwardDiff: Dual
 
-dual(x, p) = x
-dual(x::Real, p) = Dual(x, p)
-dual(x::Bool, p) = x
+
+# We do this because it ensures type stability so it compiles nicely on the gpu
+dual(x, i, N) = x
+dual(x::Bool, i, ::Val{N}) where {N} = x
+dual(x::Real, i, ::Val{N}) where {N} = Dual(x, ntuple(j-> i==j, Val(N)))
+# For complex since ForwardDiff.jl doesn't play nicely with complex numbers we
+# construct a Complex dual number and tag the real and imaginary parts separately
+function dual(x::Complex, i, ::Val{N}) where {N}
+    re_dual = Dual(real(x), ntuple(j->i==j, Val(2N)))
+    im_dual = Dual(imag(x), ntuple(j->(N+i)==j, Val(2N)))
+    return Complex(re_dual, im_dual)
+end
 
 function dual_function(f::F) where F
-  function (args::Vararg{Any,N}) where N
-    ds = map(args, ntuple(identity,Val(N))) do x, i
-      dual(x, ntuple(j -> i==j, Val(N)))
+    function (args::Vararg{Any,N}) where N
+      ds = map(args, ntuple(identity,Val(N))) do x, i
+        tmp = dual(x, i, Val(N))
+        return tmp
+      end
+      return f(ds...)
     end
-    return f(ds...)
   end
-end
+
 
 @inline function broadcast_forward(f, args::Vararg{Any,N}) where N
-  valN = Val(N)
   out = dual_function(f).(args...)
-  eltype(out) <: Dual || return (out, _ -> nothing)
+  T = eltype(out)
+  T <: Union{Dual, Complex} || return (out, _ -> nothing)
-  T <: Union{Dual, Complex} || return (out, _ -> nothing)
+  T <: Union{Dual, Complex{<:Dual}} || return (out, _ -> nothing)
-  T <: Union{Dual, Complex} || return (out, _ -> nothing)
+  T <: Union{Dual, Complex{<:Dual}} || return (out, _ -> nothing)
+  if any(eltype(a) <: Complex for a in args)
+    _broadcast_forward_complex(T, out, args...)
+  else
+    _broadcast_forward(T, out, args...)
+  end
+end
+
+# Real input and real output
+function _broadcast_forward(::Type{<:Dual}, out, args::Vararg{Any, N}) where {N}
+  valN = Val(N)
   y = broadcast(x -> x.value, out)
   function bc_fwd_back(ȳ)
     dargs = ntuple(valN) do i
@@ -259,6 +283,60 @@ end
   return y, bc_fwd_back
 end
 
+# This handles complex output and real input
+function _broadcast_forward(::Type{<:Complex}, out, args::Vararg{Any, N}) where {N}
+    valN = Val(N)
+    y = broadcast(x -> Complex.(real(x).value, imag(x).value), out)
+    function bc_fwd_back(ȳ)
+      dargs = ntuple(valN) do i
+        unbroadcast(args[i], broadcast((y1, o1) -> (real(y1)*real(o1).partials[i] + imag(y1)*imag(o1).partials[i]), ȳ, out))
+      end
+      (nothing, nothing, dargs...) # nothings for broadcasted & f
+    end
+    return y, bc_fwd_back
+  end
+
+# This handles complex input and real output we use the gradient definition from ChainRules here
+# since it agrees with what Zygote did for real(x).
+function _broadcast_forward_complex(::Type{<:Dual}, out, args::Vararg{Any, N}) where {N}
+    valN = Val(N)
+    y = broadcast(x -> x.value, out)
+    function bc_fwd_back(ȳ)
+      dargs = ntuple(valN) do i
+        unbroadcast(args[i], broadcast((y1, o1) -> y1 * Complex(o1.partials[i], o1.partials[i+N]), ȳ, out))
+      end
+      (nothing, nothing, dargs...) # nothings for broadcasted & f
+    end
+    return y, bc_fwd_back
+end
+
+# # # This is for complex input and complex output
+# # # I am a little confused what derivative we want to use here but this should match
+# what is done for all the tests
+
+# If we assume that
+# f(x + iy) = u(x,y) + iv(x,y)
+# then we do the following for the adjoint
+# Δu ∂u/∂x + Δv∂v/∂x + i(Δu∂u/∂y + Δv ∂v/∂y )
+# this follows https://juliadiff.org/ChainRulesCore.jl/stable/maths/complex.html
+function _adjoint_complex(Δz, df, i)
+    Δu, Δv = reim(Δz)
+    du, dv = reim(df)
+    return Complex(Δu*du.partials[i] + Δv*dv.partials[i], Δu*du.partials[i+N] + Δv*dv.partials[i+N])
+end
+
+function _broadcast_forward_complex(::Type{<:Complex}, out, args::Vararg{Any, N}) where {N}
+    valN = Val(N)
+    y = broadcast(x -> Complex.(real(x).value, imag(x).value), out)
+    function bc_fwd_back(ȳ)
+      dargs = ntuple(valN) do i
+        unbroadcast(args[i], broadcast((y1, o1) -> _adjoint_complex(y1, o1, i), ȳ, out))
+      end
+      (nothing, nothing, dargs...) # nothings for broadcasted & f
+    end
+    return y, bc_fwd_back
+end
+
 using GPUArraysCore  # replaces @require CUDA block, weird indenting to preserve git blame
 
        # Ordinary broadcasting calls broadcast_forward anyway when certain its' safe,
@@ -287,4 +365,3 @@ using GPUArraysCore  # replaces @require CUDA block, weird indenting to preserve
   end
 
   pull_block_vert(sz, Δ::AbstractGPUArray, A::Number) = @allowscalar Δ[sz]
-
diff --git a/test/complex.jl b/test/complex.jl
@@ -120,4 +120,3 @@ end
     end
     @test Zygote.hessian(fun, collect(1:9)) ≈ [14 0 0 0 0 0 2 0 0; 0 16 0 0 0 0 0 4 0; 0 0 18 0 0 0 0 0 6; 0 0 0 14 0 0 8 0 0; 0 0 0 0 16 0 0 10 0; 0 0 0 0 0 18 0 0 12; 2 0 0 8 0 0 0 0 0; 0 4 0 0 10 0 0 0 0; 0 0 6 0 0 12 0 0 0]
 end
-
diff --git a/test/cuda.jl b/test/cuda.jl
@@ -26,7 +26,7 @@ end
   g_gpu = gradient(x -> v(x, 7), a_gpu)[1]
   @test g_gpu isa CuArray
   @test g_gpu |> collect ≈ g
-  
+
   w(x) = sum(broadcast(log, x))
   g = gradient(x -> w(x), a)[1]
   g_gpu = gradient(x -> w(x), a_gpu)[1]
@@ -38,7 +38,7 @@ end
   @test gradient(x -> sum(x .> 3), a_gpu) == (nothing,)
   g3 = gradient(x -> sum(x .^ 3) / count(x .> 3), a)[1]              # was Can't differentiate gc_preserve_end expression
   @test_skip cu(g3) ≈ gradient(x -> sum(x .^ 3) / sum(x .> 3), a_gpu)[1]  # was KernelException -- not fixed by PR #1018
-  @test cu(g3) ≈ gradient(x -> sum(x .^ 3) / count(x .> 3), a_gpu)[1] 
+  @test cu(g3) ≈ gradient(x -> sum(x .^ 3) / count(x .> 3), a_gpu)[1]
 
   # Projection: eltype preservation:
   @test gradient(x -> 2.3 * sum(x.^4), a_gpu)[1] isa CuArray{Float32}
@@ -90,40 +90,40 @@ end
 @testset "gradient algebra" begin
   w, b = rand(2) |> cu, rand(2) |> cu
   x1, x2 = rand(2) |> cu, rand(2) |> cu
- 
-  gs1 = gradient(() -> sum(w .* x1), Params([w])) 
-  gs2 = gradient(() -> sum(w .* x2), Params([w])) 
+
+  gs1 = gradient(() -> sum(w .* x1), Params([w]))
+  gs2 = gradient(() -> sum(w .* x2), Params([w]))
 
   @test .- gs1 isa Grads
-  @test gs1 .- gs2 isa Grads 
+  @test gs1 .- gs2 isa Grads
   @test .+ gs1 isa Grads
-  @test gs1 .+ gs2 isa Grads 
-  @test 2 .* gs1 isa Grads 
+  @test gs1 .+ gs2 isa Grads
+  @test 2 .* gs1 isa Grads
   @test (2 .* gs1)[w] ≈ 2 * gs1[w]
-  @test gs1 .* 2 isa Grads 
-  @test gs1 ./ 2 isa Grads  
-  @test (gs1 .+ gs2)[w] ≈ gs1[w] .+ gs2[w] 
+  @test gs1 .* 2 isa Grads
+  @test gs1 ./ 2 isa Grads
+  @test (gs1 .+ gs2)[w] ≈ gs1[w] .+ gs2[w]
 
   gs12 = gs1 .+ gs2
   gs1 .+= gs2
-  @test gs12[w] ≈ gs1[w] 
+  @test gs12[w] ≈ gs1[w]
 
   gs3 = gradient(() -> sum(w .* x1), Params([w, b])) # grad nothing with respect to b
-  gs4 = gradient(() -> sum(w .* x2 .+ b), Params([w, b])) 
+  gs4 = gradient(() -> sum(w .* x2 .+ b), Params([w, b]))
 
   @test .- gs3 isa Grads
-  @test gs3 .- gs4 isa Grads 
+  @test gs3 .- gs4 isa Grads
   @test .+ gs3 isa Grads
-  @test gs3 .+ gs4 isa Grads 
-  @test 2 .* gs3 isa Grads 
-  @test gs3 .* 2 isa Grads 
-  @test gs3 ./ 2 isa Grads  
+  @test gs3 .+ gs4 isa Grads
+  @test 2 .* gs3 isa Grads
+  @test gs3 .* 2 isa Grads
+  @test gs3 ./ 2 isa Grads
   @test (gs3 .+ gs4)[w] ≈ gs3[w] .+ gs4[w]
-  @test (gs3 .+ gs4)[b] ≈ gs4[b] 
-  
+  @test (gs3 .+ gs4)[b] ≈ gs4[b]
+
   @test gs3 .+ IdDict(w => similar(w), b => similar(b)) isa Grads
   gs3 .+= IdDict(p => randn!(similar(p)) for p in keys(gs3))
-  @test gs3 isa Grads 
+  @test gs3 isa Grads
 
   @test_throws ArgumentError gs1 .+ gs4
 end
@@ -140,3 +140,21 @@ end
   @test_skip gradient((x,y) -> sum(vcat(x,y)), 1f0, r, 2f0, r)[2] isa CUDA.CuArray{Float32}
 end
 
+
+@testset "CUDA complex broadcasting" begin
+    # Issue 961 and 1121 and 1215
+    x = rand(Float32, 50)
+    y = complex(rand(Float32, 50))
+
+    xgpu = cu(x)
+    ygpu = cu(y)
+
+
+    g1 = Zygote.gradient(x->sum(abs2, x), ygpu)[1]
+    g2 = Zygote.gradient(x->sum(abs2.(x)), ygpu)[1]
+    g3 = Zygote.gradient(x->sum(abs2, x), y)[1]
+    @test g1 isa CUDA.CuArray{ComplexF32}
+    @test g2 isa CUDA.CuArray{ComplexF32}
+    @test collect(g1) ≈ collect(g2)
+    @test collect(g1) ≈ g3
+end
Original file line number	Diff line number	Diff line change
Expand Up		@@ -120,4 +120,3 @@ end
		end
		@test Zygote.hessian(fun, collect(1:9)) ≈ [14 0 0 0 0 0 2 0 0; 0 16 0 0 0 0 0 4 0; 0 0 18 0 0 0 0 0 6; 0 0 0 14 0 0 8 0 0; 0 0 0 0 16 0 0 10 0; 0 0 0 0 0 18 0 0 12; 2 0 0 8 0 0 0 0 0; 0 4 0 0 10 0 0 0 0; 0 0 6 0 0 12 0 0 0]
		end