FluxML · Roger-luo · Jun 18, 2019 · Jun 18, 2019 · Oct 4, 2019 · Oct 4, 2019
diff --git a/Manifest.toml b/Manifest.toml
@@ -2,9 +2,9 @@
 
 [[AbstractFFTs]]
 deps = ["LinearAlgebra"]
-git-tree-sha1 = "380e36c66edfa099cd90116b24c1ce8cafccac40"
+git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716"
 uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
-version = "0.4.1"
+version = "0.5.0"
 
 [[Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
@@ -45,12 +45,6 @@ git-tree-sha1 = "9a11d428dcdc425072af4aea19ab1e8c3e01c032"
 uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
 version = "1.3.0"
 
-[[Crayons]]
-deps = ["Test"]
-git-tree-sha1 = "f621b8ef51fd2004c7cf157ea47f027fdeac5523"
-uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
-version = "4.0.0"
-
 [[DataStructures]]
 deps = ["InteractiveUtils", "OrderedCollections"]
 git-tree-sha1 = "1fe8fad5fc84686dcbc674aa255bc867a64f8132"
@@ -82,10 +76,10 @@ deps = ["Random", "Serialization", "Sockets"]
 uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 
 [[FFTW]]
-deps = ["AbstractFFTs", "BinaryProvider", "Conda", "Libdl", "LinearAlgebra", "Reexport", "Test"]
-git-tree-sha1 = "6c5b420da0b8c12098048561b8d58f81adea506f"
+deps = ["AbstractFFTs", "BinaryProvider", "Conda", "Libdl", "LinearAlgebra", "Reexport"]
+git-tree-sha1 = "4cfd3d43819228b9e73ab46600d0af0aa5cedceb"
 uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
-version = "1.0.1"
+version = "1.1.0"
 
 [[FillArrays]]
 deps = ["LinearAlgebra", "Random", "SparseArrays"]
@@ -94,11 +88,17 @@ pinned = true
 uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
 version = "0.7.4"
 
+[[FiniteDifferences]]
+deps = ["LinearAlgebra", "Printf"]
+git-tree-sha1 = "712a747a0106ad1cca0947e3d1e765cf17dee8b8"
+uuid = "26cc04aa-876d-5657-8c51-4c34ba976000"
+version = "0.9.0"
+
 [[ForwardDiff]]
 deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "NaNMath", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "adf88d6da1f0294058f38295becf8807986bb7d0"
+git-tree-sha1 = "4407e7b76999eca2646abdb68203bd4302476168"
 uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.5"
+version = "0.10.6"
 
 [[IRTools]]
 deps = ["InteractiveUtils", "MacroTools", "Test"]
@@ -167,7 +167,7 @@ uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
 version = "0.3.8"
 
 [[Pkg]]
-deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
+deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
 uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [[Printf]]
@@ -219,9 +219,9 @@ version = "0.8.0"
 
 [[StaticArrays]]
 deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "1e9c5d89cba8047d518f1ffef432906ef1a3e8bd"
+git-tree-sha1 = "5a3bcb6233adabde68ebc97be66e95dcb787424c"
 uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "0.12.0"
+version = "0.12.1"
 
 [[Statistics]]
 deps = ["LinearAlgebra", "SparseArrays"]
@@ -232,10 +232,10 @@ deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [[TimerOutputs]]
-deps = ["Crayons", "Printf", "Test", "Unicode"]
-git-tree-sha1 = "b80671c06f8f8bae08c55d67b5ce292c5ae2660c"
+deps = ["Printf"]
+git-tree-sha1 = "d9c67bd7ac89aafa75037307331d050998bb5a96"
 uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.0"
+version = "0.5.1"
 
 [[Tokenize]]
 git-tree-sha1 = "dfcdbbfb2d0370716c815cbd6f8a364efb6f42cf"

diff --git a/Project.toml b/Project.toml
@@ -6,6 +6,7 @@ version = "0.4.1"
 DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 IRTools = "7869d1d1-7146-5819-86e3-90919afe41df"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
@@ -31,7 +32,6 @@ NaNMath = "0"
 Requires = "0.5"
 SpecialFunctions = "0"
 ZygoteRules = "0.2"
-julia = "1"
 
 [extras]
 CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"

diff --git a/src/Zygote.jl b/src/Zygote.jl
@@ -35,6 +35,7 @@ include("lib/utils.jl")
 include("compiler/interface2.jl")
 
 include("profiler/Profile.jl")
+include("gradcheck.jl")
 
 @init @require Tracker="9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" begin
   include("flux.jl")

diff --git a/src/gradcheck.jl b/src/gradcheck.jl
@@ -0,0 +1,82 @@
+using FiniteDifferences
+export jacobian, gradcheck
+
+# Base on torch.gradcheck
+make_jacobian(x::AbstractArray{T}, out_length::Int) where T = zeros(T, out_length, length(x))
+make_jacobian(x::Number, out_length::Int) = zeros(typeof(x), out_length, 1)
+
+zero_like(x::T) where {T <: Number} = zero(T)
+zero_like(x::AbstractArray) = zeros(eltype(x), size(x))
+zero_like(x::Broadcast.Broadcasted) = zero_like(Broadcast.materialize(x))
+
+"""
+    jacobian(f, xs...)
+
+Return the analytical jacobian of `f` with input `xs...`.
+"""
+function jacobian(f, xs...)
+    output, back = pullback(f, xs...)
+    output_size = length(output)
+    jacobians = map(x->make_jacobian(x, output_size), xs)
+    grad_output = zero_like(output)
+    jacobian!(back, jacobians, grad_output)
+    return jacobians
+end
+
+# to get numbers through
+_vec(x) = x
+_vec(x::AbstractArray) = vec(x)
+
+function jacobian!(f_back, jacobians, grad_output::T) where T <: Number    
+    grads_input = f_back(one(T))
+    for (jacobian_x, d_x) in zip(jacobians, grads_input)
+        jacobian_x[1, :] .= _vec(d_x)
+    end
+    return jacobians
+end
+
+function jacobian!(f_back, jacobians, grad_output::AbstractArray)
+    for (k, idx) in enumerate(eachindex(grad_output))
+        grad_output = fill!(grad_output, 0)
+        grad_output[idx] = 1
+        grads_input = f_back(grad_output)
+        for (jacobian_x, d_x) in zip(jacobians, grads_input)
+            jacobian_x[k, :] .= _vec(d_x)
+        end
+    end
+    return jacobians
+end
+
+# create a valid delta for backward pass test
+test_delta(x::Number) = one(x)
+test_delta(x::AbstractArray) = fill_storage_ones(x)
+# NOTE: this might be in Adapt, but I'll PR this there later.
+#       in general we don't need to use the same type of output
+#       for delta, but since users are allowed to define their
+#       closure in pullback with type annotations, it's better
+#       to take care of it
+fill_storage_ones(x::AbstractArray) = fill!(similar(x), 1)
+fill_storage_ones(x::LinearAlgebra.Adjoint) = LinearAlgebra.Adjoint(fill_storage_ones(parent(x)))
+fill_storage_ones(x::Transpose) = Transpose(fill_storage_ones(parent(x)))
+fill_storage_ones(x::Diagonal) = Diagonal(fill_storage_ones(parent(x)))
+fill_storage_ones(x::UpperTriangular) = UpperTriangular(fill_storage_ones(parent(x)))
+fill_storage_ones(x::LowerTriangular) = LowerTriangular(fill_storage_ones(parent(x)))
+fill_storage_ones(x::Symmetric) = Symmetric(fill_storage_ones(parent(x)))
+
+"""
+    gradcheck(f, xs...; eps=sqrt(eps), atol::Real=0, rtol::Real=atol>0 ? 0 : √eps)
+
+Check the gradient of `f` at input `xs...` by comparing numerical jacobian and analytical jacobian.
+"""
+function gradcheck(f, xs...;
+        eps=sqrt(eps(eltype(first(xs)))),
+        atol::Real=0, rtol::Real= atol > 0 ? 0 : sqrt(eps))
+
+    fdm = central_fdm(5, 1, eps=eps)
+    output, back = pullback(f, xs...)
+
+    Δ = test_delta(output)
+    nj′vp = j′vp(fdm, f, Δ, xs...)
+    sj′vp = back(Δ)
+    all( isapprox.(nj′vp, sj′vp, ; atol=atol, rtol=rtol) )
+end
diff --git a/src/lib/array.jl b/src/lib/array.jl
@@ -477,16 +477,23 @@ end
 
 # Adjoint based on the Theano implementation, which uses the differential as described
 # in Brančík, "Matlab programs for matrix exponential function derivative evaluation"
-@adjoint exp(A::AbstractMatrix) = exp(A), function(F̄)
+function ∇exp(F̄, A::AbstractMatrix)
   n = size(A, 1)
   E = eigen(A)
   w = E.values
   ew = exp.(w)
   X = _pairdiffquotmat(exp, n, w, ew, ew, ew)
   V = E.vectors
   VF = factorize(V)
-  Ā = (V * ((VF \ F̄' * V) .* X) / VF)'
-  return (Ā,)
+  return (V * ((VF \ F̄' * V) .* X) / VF)'
+end
+
+@adjoint exp(A::AbstractMatrix) = exp(A), function(F̄)
+  return (∇exp(F̄, A), )
+end
+
+@adjoint exp(A::AbstractMatrix{<:Real}) = exp(A), function(F̄)
+  return (real(∇exp(F̄, A)), )
 end
 
 @adjoint function LinearAlgebra.eigen(A::LinearAlgebra.RealHermSymComplexHerm)