From 3ac59cddd7914a2d61bc06514ff7e38042a4a3da Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 19 Apr 2019 15:36:34 -0400 Subject: [PATCH 1/5] [TF] Fix gradients in sum(squeezinAxes:) and mean(squeezinAxes:) sum(squeezinAxes:) and mean(squeezinAxes:) were throwing an error during the bawckward pass because the gradients weren't unsqueezed before being broadcast. Note that this could be refactored nicely if we had a function that took a list of ints for `expandingShape`. Second note: I may be wrong, but it seems like `_vjpMean(squeezingAxes axes: [Int])` is never used and only the Tensor version is. --- stdlib/public/TensorFlow/Gradients.swift | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/stdlib/public/TensorFlow/Gradients.swift b/stdlib/public/TensorFlow/Gradients.swift index 4430e04354462..6c7bdba36dfb2 100644 --- a/stdlib/public/TensorFlow/Gradients.swift +++ b/stdlib/public/TensorFlow/Gradients.swift @@ -579,7 +579,11 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { squeezingAxes axes: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = sum(squeezingAxes: axes) - return (value, { [shape = shapeTensor] in $0.broadcast(toShape: shape) }) + return (value, { [shape = shapeTensor] in + var res = $0 + for i in axes.array.scalars { res = res.expandingShape(at: Int(i)) } + return res.broadcast(toShape: shape) + }) } @inlinable @@ -596,7 +600,9 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { let value = mean(squeezingAxes: axes) return (value, { [shape = shapeTensor, count = axes.map { shape[$0] }.reduce(1, *)] in - $0.broadcast(toShape: shape) / Tensor(Scalar(count)) + var res = $0 + for i in axes { res = res.expandingShape(at: Int(i)) } + return res.broadcast(toShape: shape) / Tensor(Scalar(count)) }) } @@ -607,7 +613,9 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { let value = mean(squeezingAxes: axes) let count = Raw.gather(params: shapeTensor, indices: axes).product() return (value, { [shape = shapeTensor] in - $0.broadcast(toShape: shape) / Tensor(count) + var res = $0 + for i in axes.array.scalars { res = res.expandingShape(at: Int(i)) } + return res.broadcast(toShape: shape) / Tensor(count) }) } } From fb565e0dffb880c4aedf9d2ba6a116b458bcc88c Mon Sep 17 00:00:00 2001 From: Dan Zheng Date: Fri, 19 Apr 2019 12:56:47 -0700 Subject: [PATCH 2/5] Remove unused `_vjpMean` function. --- stdlib/public/TensorFlow/Gradients.swift | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/stdlib/public/TensorFlow/Gradients.swift b/stdlib/public/TensorFlow/Gradients.swift index 6c7bdba36dfb2..1a36ae99b5490 100644 --- a/stdlib/public/TensorFlow/Gradients.swift +++ b/stdlib/public/TensorFlow/Gradients.swift @@ -595,17 +595,6 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { }) } - @inlinable - func _vjpMean(squeezingAxes axes: [Int]) -> (Tensor, (Tensor) -> Tensor) { - let value = mean(squeezingAxes: axes) - return (value, { [shape = shapeTensor, - count = axes.map { shape[$0] }.reduce(1, *)] in - var res = $0 - for i in axes { res = res.expandingShape(at: Int(i)) } - return res.broadcast(toShape: shape) / Tensor(Scalar(count)) - }) - } - @inlinable func _vjpMean( squeezingAxes axes: Tensor From b2fa44adfb9d36590f618a98011cc6d0a111774c Mon Sep 17 00:00:00 2001 From: Dan Zheng Date: Fri, 19 Apr 2019 13:07:10 -0700 Subject: [PATCH 3/5] Update Gradients.swift --- stdlib/public/TensorFlow/Gradients.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stdlib/public/TensorFlow/Gradients.swift b/stdlib/public/TensorFlow/Gradients.swift index 1a36ae99b5490..ebc5d0e4d6ef3 100644 --- a/stdlib/public/TensorFlow/Gradients.swift +++ b/stdlib/public/TensorFlow/Gradients.swift @@ -579,10 +579,10 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { squeezingAxes axes: Tensor ) -> (Tensor, (Tensor) -> Tensor) { let value = sum(squeezingAxes: axes) - return (value, { [shape = shapeTensor] in - var res = $0 + return (value, { [shape = shapeTensor] in + var res = $0 for i in axes.array.scalars { res = res.expandingShape(at: Int(i)) } - return res.broadcast(toShape: shape) + return res.broadcast(toShape: shape) }) } @@ -603,7 +603,7 @@ extension Tensor where Scalar : TensorFlowFloatingPoint { let count = Raw.gather(params: shapeTensor, indices: axes).product() return (value, { [shape = shapeTensor] in var res = $0 - for i in axes.array.scalars { res = res.expandingShape(at: Int(i)) } + for i in axes.array.scalars { res = res.expandingShape(at: Int(i)) } return res.broadcast(toShape: shape) / Tensor(count) }) } From dfbd9908dcdb575c57807fc16ab582cdba7518d6 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Fri, 19 Apr 2019 20:38:44 +0000 Subject: [PATCH 4/5] Add test --- .../TensorFlowRuntime/tensor_autodiff_runtime.swift | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift index a17dc3d3d034c..f50c0bd3d2e0a 100644 --- a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift +++ b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift @@ -98,38 +98,39 @@ TensorADTests.testAllBackends("Abs") { TensorADTests.testAllBackends("sum") { let input = Tensor(repeating: 42, shape: [2, 2]) let sumPullbackScalar = pullback(at: input) { (a: Tensor) in a.sum() } + let sumPullbackSqueezingAxes = pullback(at: input) { (a: Tensor) in a.sum(squeezingAxes: [0, 1]) } let sumPullbackAlongAxes = pullback(at: input) { (a: Tensor) in a.sum(alongAxes: 0, 1) } let expected = Tensor(ones: [2, 2]) expectEqual(expected, sumPullbackScalar(Tensor(1))) - // expectEqual(expected, sumPullbackSqueezingAxes(Tensor(1))) + expectEqual(expected, sumPullbackSqueezingAxes(Tensor(1))) expectEqual(expected, sumPullbackAlongAxes(Tensor(1))) expectEqual(expected * 3, sumPullbackScalar(Tensor(3))) - // expectEqual(expected * 3, sumPullbackSqueezingAxes(Tensor(3))) + expectEqual(expected * 3, sumPullbackSqueezingAxes(Tensor(3))) expectEqual(expected * 3, sumPullbackAlongAxes(Tensor(3))) } TensorADTests.testAllBackends("mean") { let meanGradScalar = gradient { (a: Tensor) in a.mean() } - // let meanGradSqueezingAxes = gradient { (a: Tensor) in a.mean(squeezingAxes: 0, 1) } + let meanGradSqueezingAxes = gradient { (a: Tensor) in a.mean(squeezingAxes: 0, 1) } let meanGradAlongAxes = gradient { (a: Tensor) in a.mean(alongAxes: 0, 1) } let input = Tensor(ones: [2, 2]) let expected = Tensor(repeating: 0.25, shape: [2, 2]) expectEqual(expected, meanGradScalar(input)) - // expectEqual(expected, meanGradSqueezingAxes(input)) + expectEqual(expected, meanGradSqueezingAxes(input)) expectEqual(expected, meanGradAlongAxes(input)) } TensorADTests.testAllBackends("variance") { let varianceGradScalar = gradient { (a: Tensor) in a.variance() } - // let varianceGradSqueezingAxes = gradient { (a: Tensor) in a.variance(squeezingAxes: 0, 1) } + let varianceGradSqueezingAxes = gradient { (a: Tensor) in a.variance(squeezingAxes: 0, 1) } let varianceGradAlongAxes = gradient { (a: Tensor) in a.variance(alongAxes: 0, 1) } let input: Tensor = [[1, 2], [3, 4]] let expected: Tensor = [[-0.75, -0.25], [0.25, 0.75]] expectEqual(expected, varianceGradScalar(input)) - // expectEqual(expected, varianceGradSqueezingAxes(input)) + expectEqual(expected, varianceGradSqueezingAxes(input)) expectEqual(expected, varianceGradAlongAxes(input)) } From 65752cfa851cec02645d0936235c36f55f66afd5 Mon Sep 17 00:00:00 2001 From: Dan Zheng Date: Fri, 19 Apr 2019 13:42:24 -0700 Subject: [PATCH 5/5] Minor edit for consistency. --- test/TensorFlowRuntime/tensor_autodiff_runtime.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift index f50c0bd3d2e0a..69cbe26b88284 100644 --- a/test/TensorFlowRuntime/tensor_autodiff_runtime.swift +++ b/test/TensorFlowRuntime/tensor_autodiff_runtime.swift @@ -98,7 +98,7 @@ TensorADTests.testAllBackends("Abs") { TensorADTests.testAllBackends("sum") { let input = Tensor(repeating: 42, shape: [2, 2]) let sumPullbackScalar = pullback(at: input) { (a: Tensor) in a.sum() } - let sumPullbackSqueezingAxes = pullback(at: input) { (a: Tensor) in a.sum(squeezingAxes: [0, 1]) } + let sumPullbackSqueezingAxes = pullback(at: input) { (a: Tensor) in a.sum(squeezingAxes: 0, 1) } let sumPullbackAlongAxes = pullback(at: input) { (a: Tensor) in a.sum(alongAxes: 0, 1) } let expected = Tensor(ones: [2, 2])