From 45e1502e4b0cf8c38380417776c391464d89c6fb Mon Sep 17 00:00:00 2001 From: sxjscience Date: Sun, 14 Oct 2018 14:56:04 +0800 Subject: [PATCH 01/27] try to add support some ops --- .../tensor/elemwise_binary_op_basic.cc | 12 +++++++++- .../tensor/elemwise_unary_op_basic.cc | 8 ++++++- src/operator/tensor/elemwise_unary_op_trig.cc | 22 +++++++++++++++++-- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc index 339290df8bf9..710ce5510236 100644 --- a/src/operator/tensor/elemwise_binary_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_op_basic.cc @@ -224,7 +224,17 @@ The storage type of ``elemwise_mul`` output depends on storage types of inputs return std::vector{ResourceRequest::kTempSpace}; }) .add_alias("_mul").add_alias("_Mul") -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_mul"}); +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto lhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_lhs", + {ograds[0], n->inputs[1]}, nullptr, &n); + auto rhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_rhs", + {ograds[0], n->inputs[0]}, nullptr, &n); + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0}); + ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0}); + return ret; + }); NNVM_REGISTER_OP(_backward_mul) .set_num_inputs(3) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 49ae976cfc2c..b11c1ebbcc28 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -623,7 +623,13 @@ The storage type of ``negative`` output depends upon the input storage type: - negative(csr) = csr )code") -.set_attr("FGradient", ElemwiseGradUseNone{"negative"}); +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto in_grad = MakeNode("negative", n->attrs.name + "_backward", {ograds[0]}, nullptr, &n); + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); + return ret; + }); // reciprocal MXNET_OPERATOR_REGISTER_UNARY(reciprocal) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 288719f48a96..5de6de63c06d 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -44,7 +44,15 @@ The storage type of ``sin`` output depends upon the input storage type: - sin(csr) = csr )code" ADD_FILELINE) -.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_sin" }); +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto x_grad = MakeNode("cos", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n); + auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", + {ograds[0], nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n); + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); + return ret; + }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd); @@ -61,7 +69,17 @@ The input should be in radians (:math:`2\pi` rad equals 360 degrees). The storage type of ``cos`` output is always dense )code" ADD_FILELINE) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_cos"}); +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto x_grad = MakeNode("sin", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n); + auto neg_x_grad = MakeNode("negative", n->attrs.name + "_mid_neg_x_grad", + {nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n); + auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", + {ograds[0], nnvm::NodeEntry{neg_x_grad, 0, 0}}, nullptr, &n); + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); + return ret; + }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd); From 492e4cdd19db27c1a930ccb5f1ed6562d044fe8b Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 3 Apr 2019 10:38:04 -0700 Subject: [PATCH 02/27] add unit test for second order grad --- .../python/unittest/test_higher_order_grad.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 tests/python/unittest/test_higher_order_grad.py diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py new file mode 100644 index 000000000000..696ac9fa8cab --- /dev/null +++ b/tests/python/unittest/test_higher_order_grad.py @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import mxnet as mx +import numpy as np +from mxnet import gluon, nd, autograd +from mxnet.test_utils import assert_almost_equal +from tests.python.unittest.common import with_seed + + +@with_seed() +def test_elemwise_mul(): + x = nd.array([1, 2, 3]) + y = nd.zeros(3) + x.attach_grad() + with autograd.record(): + y = nd.elemwise_mul(x, x) + y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] + y_grad.backward() + expect_grad = nd.array([2, 2, 2]) + assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) + + +@with_seed() +def test_sin(): + x = nd.array([1, 2, 3]) + x.attach_grad() + with autograd.record(): + y = nd.sin(x) + y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] + y_grad.backward() + expect_grad = -nd.sin(x) + assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) + + +@with_seed() +def test_cos(): + x = nd.array([1, 2, 3]) + x.attach_grad() + with autograd.record(): + y = nd.cos(x) + y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] + y_grad.backward() + expect_grad = -nd.cos(x) + assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) + + +if __name__ == '__main__': + import nose + nose.runmodule() From 45b334ebb571880388b924eba7629f63b4cd4a9d Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 3 Apr 2019 16:55:35 -0700 Subject: [PATCH 03/27] implement grad for relu and add unit test --- src/imperative/imperative.cc | 5 ++- .../tensor/elemwise_unary_op_basic.cc | 10 ++++- .../python/unittest/test_higher_order_grad.py | 41 +++++++++++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index 3e5b3987522c..b07e761aa124 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -347,8 +347,9 @@ std::vector Imperative::Backward( x_reqs.push_back(info.grad_req); info.fresh_out_grad = true; } - CHECK_GT(xs.size(), 0) - << "There are no inputs in computation graph that require gradients."; + if (xs.empty()) { + LOG(WARNING) << "There are no inputs in computation graph that require gradients."; + } } Graph g_graph = pass::MXGradient( diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 68654e3a116e..a16d3f2d89eb 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -83,7 +83,15 @@ The storage type of ``relu`` output depends upon the input storage type: - relu(csr) = csr )code" ADD_FILELINE) -.set_attr("FGradient", ElemwiseGradUseOut{"_backward_relu"}); +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto zero_node = MakeNode("zeros_like", n->attrs.name + "_relu_backward", {n->inputs[0]}, nullptr, &n); + auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n); + auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n); + std::vector ret; + ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); + return ret; + }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd); diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 696ac9fa8cab..4b6bce7f6a29 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -37,25 +37,50 @@ def test_elemwise_mul(): @with_seed() def test_sin(): + def sin(x): + return nd.sin(x) + x = nd.array([1, 2, 3]) - x.attach_grad() - with autograd.record(): - y = nd.sin(x) - y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] - y_grad.backward() expect_grad = -nd.sin(x) - assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) + check_second_order_unary(x, sin, expect_grad) @with_seed() def test_cos(): + def cos(x): + return nd.cos(x) + x = nd.array([1, 2, 3]) + expect_grad = -nd.cos(x) + check_second_order_unary(x, cos, expect_grad) + + +@with_seed() +def test_negative(): + def negative(x): + return nd.negative(x) + + x = nd.array([1, 2, 3]) + expect_grad = nd.zeros_like(x) + check_second_order_unary(x, negative, expect_grad) + + +@with_seed() +def test_relu(): + def relu(x): + return nd.relu(x) + + x = nd.array([1, 2, 3]) + expect_grad = nd.zeros_like(x) + check_second_order_unary(x, relu, expect_grad) + + +def check_second_order_unary(x, op, expect_grad): x.attach_grad() with autograd.record(): - y = nd.cos(x) + y = op(x) y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] y_grad.backward() - expect_grad = -nd.cos(x) assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) From 4dc0907a6cb636966d44018879c14ca4cfcf2a61 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 5 Apr 2019 11:26:41 -0700 Subject: [PATCH 04/27] fix lint --- src/operator/tensor/elemwise_unary_op_basic.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index a16d3f2d89eb..3f794966dc92 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -85,9 +85,12 @@ The storage type of ``relu`` output depends upon the input storage type: )code" ADD_FILELINE) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto zero_node = MakeNode("zeros_like", n->attrs.name + "_relu_backward", {n->inputs[0]}, nullptr, &n); - auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n); - auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n); + auto zero_node = MakeNode("zeros_like", n->attrs.name + "_backward", + {n->inputs[0]}, nullptr, &n); + auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", + {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n); + auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", + {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n); std::vector ret; ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); return ret; From 30ff1e9925a9bdb94514ab4a1f876c46419f0d27 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 28 May 2019 13:28:02 -0700 Subject: [PATCH 05/27] register FGradient attribute for backward relu --- .../tensor/elemwise_binary_op_basic.cc | 12 +--------- .../tensor/elemwise_unary_op_basic.cc | 24 ++++++++----------- .../python/unittest/test_higher_order_grad.py | 13 ---------- 3 files changed, 11 insertions(+), 38 deletions(-) diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc index 2e1f979c805a..c5e30c68de7e 100644 --- a/src/operator/tensor/elemwise_binary_op_basic.cc +++ b/src/operator/tensor/elemwise_binary_op_basic.cc @@ -233,17 +233,7 @@ The storage type of ``elemwise_mul`` output depends on storage types of inputs return std::vector{ResourceRequest::kTempSpace}; }) .add_alias("_mul").add_alias("_Mul") -.set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto lhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_lhs", - {ograds[0], n->inputs[1]}, nullptr, &n); - auto rhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_rhs", - {ograds[0], n->inputs[0]}, nullptr, &n); - std::vector ret; - ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0}); - ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0}); - return ret; - }); +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_mul"}); NNVM_REGISTER_OP(_backward_mul) .set_num_inputs(3) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index ad835c654959..a096de4df067 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -83,21 +83,17 @@ The storage type of ``relu`` output depends upon the input storage type: - relu(csr) = csr )code" ADD_FILELINE) -.set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto zero_node = MakeNode("zeros_like", n->attrs.name + "_backward", - {n->inputs[0]}, nullptr, &n); - auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", - {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n); - auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", - {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n); - std::vector ret; - ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); - return ret; - }); +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_relu"}); -MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, - unary_bwd); +MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + std::vector ret; + auto in_grad = MakeNode("zeros_like", n->attrs.name + "_backward", + {n->inputs[0]}, nullptr, &n); + ret.emplace_back(in_grad); + return ret; + }); // sigmoid MXNET_OPERATOR_REGISTER_UNARY(sigmoid) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 4b6bce7f6a29..ec94e1e9d376 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -22,19 +22,6 @@ from tests.python.unittest.common import with_seed -@with_seed() -def test_elemwise_mul(): - x = nd.array([1, 2, 3]) - y = nd.zeros(3) - x.attach_grad() - with autograd.record(): - y = nd.elemwise_mul(x, x) - y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0] - y_grad.backward() - expect_grad = nd.array([2, 2, 2]) - assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy()) - - @with_seed() def test_sin(): def sin(x): From d9ba3da306c7b541122abae7d85434cfc574a91e Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 28 May 2019 13:33:37 -0700 Subject: [PATCH 06/27] resolve conflict --- tests/python/unittest/test_higher_order_grad.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 3fedd7cdb374..e8dc80c3e5ab 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -15,13 +15,13 @@ # specific language governing permissions and limitations # under the License. -<<<<<<< HEAD -import mxnet as mx + +import math import numpy as np +import mxnet as mx from mxnet import gluon, nd, autograd -from mxnet.test_utils import assert_almost_equal -from tests.python.unittest.common import with_seed - +from mxnet.test_utils import assert_almost_equal, random_arrays +from common import with_seed @with_seed() def test_sin(): @@ -63,13 +63,6 @@ def relu(x): check_second_order_unary(x, relu, expect_grad) -def check_second_order_unary(x, op, expect_grad): -======= -import math - -from mxnet import nd, autograd -from mxnet.test_utils import assert_almost_equal, random_arrays -from common import with_seed @with_seed() From 1c93c7d5167e69d7f99b870a70185a2ac501de4c Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 28 May 2019 13:35:08 -0700 Subject: [PATCH 07/27] remove unused imports --- tests/python/unittest/test_higher_order_grad.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index e8dc80c3e5ab..f05ee21e5713 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -17,12 +17,11 @@ import math -import numpy as np -import mxnet as mx -from mxnet import gluon, nd, autograd +from mxnet import nd, autograd from mxnet.test_utils import assert_almost_equal, random_arrays from common import with_seed + @with_seed() def test_sin(): def sin(x): From de721bc877bc420cb2bbe3d119b7023610e662c7 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 29 May 2019 17:05:16 -0700 Subject: [PATCH 08/27] change gradient using set_attr --- .../tensor/elemwise_unary_op_basic.cc | 19 +++++----- .../python/unittest/test_higher_order_grad.py | 36 ++++++++++++------- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 904561695ce2..ec2ea3707bb9 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -89,9 +89,14 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { std::vector ret; - auto in_grad = MakeNode("zeros_like", n->attrs.name + "_backward", - {n->inputs[0]}, nullptr, &n); - ret.emplace_back(in_grad); + // f(x) -> f = relu + // f'(x) = 1 if x > 0 else 0 + // f''(x) = 0 + auto gx = nnvm::NodeEntry{n}; // f'(x) + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], gx}, nullptr, &n)); + ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in", + {gx}, nullptr, &n)); return ret; }); @@ -660,13 +665,7 @@ The storage type of ``negative`` output depends upon the input storage type: - negative(csr) = csr )code") -.set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto in_grad = MakeNode("negative", n->attrs.name + "_backward", {ograds[0]}, nullptr, &n); - std::vector ret; - ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); - return ret; - }); +.set_attr("FGradient", ElemwiseGradUseNone{"negative"}); // reciprocal MXNET_OPERATOR_REGISTER_UNARY(reciprocal) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index f05ee21e5713..60816bf6487b 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -27,9 +27,12 @@ def test_sin(): def sin(x): return nd.sin(x) - x = nd.array([1, 2, 3]) - expect_grad = -nd.sin(x) - check_second_order_unary(x, sin, expect_grad) + def grad_grad_op(x): + return -nd.sin(x) + + arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) + for array in arrays: + check_second_order_unary(array, sin, grad_grad_op) @with_seed() @@ -37,9 +40,12 @@ def test_cos(): def cos(x): return nd.cos(x) - x = nd.array([1, 2, 3]) - expect_grad = -nd.cos(x) - check_second_order_unary(x, cos, expect_grad) + def grad_grad_op(x): + return -nd.cos(x) + + arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) + for array in arrays: + check_second_order_unary(array, cos, grad_grad_op) @with_seed() @@ -47,9 +53,12 @@ def test_negative(): def negative(x): return nd.negative(x) - x = nd.array([1, 2, 3]) - expect_grad = nd.zeros_like(x) - check_second_order_unary(x, negative, expect_grad) + def grad_grad_op(x): + return nd.zeros_like(x) + + arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) + for array in arrays: + check_second_order_unary(array, negative, grad_grad_op) @with_seed() @@ -57,11 +66,12 @@ def test_relu(): def relu(x): return nd.relu(x) - x = nd.array([1, 2, 3]) - expect_grad = nd.zeros_like(x) - check_second_order_unary(x, relu, expect_grad) - + def grad_grad_op(x): + return nd.zeros_like(x) + arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) + for array in arrays: + check_second_order_unary(array, relu, grad_grad_op) @with_seed() From 0ac094262b740370d667aa79ce55ee68bf1095e5 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 29 May 2019 17:13:43 -0700 Subject: [PATCH 09/27] remove higher order grad test for negative(x) --- src/imperative/imperative.cc | 5 ++--- tests/python/unittest/test_higher_order_grad.py | 13 ------------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index cb6e1446f5ba..9c7621975cf5 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -349,9 +349,8 @@ std::vector Imperative::Backward( x_reqs.push_back(info.grad_req); info.fresh_out_grad = true; } - if (xs.empty()) { - LOG(WARNING) << "There are no inputs in computation graph that require gradients."; - } + CHECK_GT(xs.size(), 0) + << "There are no inputs in computation graph that require gradients."; } Graph g_graph = pass::MXGradient( diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 60816bf6487b..3b686e276e7f 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -48,19 +48,6 @@ def grad_grad_op(x): check_second_order_unary(array, cos, grad_grad_op) -@with_seed() -def test_negative(): - def negative(x): - return nd.negative(x) - - def grad_grad_op(x): - return nd.zeros_like(x) - - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - for array in arrays: - check_second_order_unary(array, negative, grad_grad_op) - - @with_seed() def test_relu(): def relu(x): From f8e624ef29ffc8a6c475e4754ec66bd2f8ef08fa Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 29 May 2019 22:07:21 -0700 Subject: [PATCH 10/27] fix lint --- src/operator/tensor/elemwise_unary_op_basic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index ec2ea3707bb9..bd94f81bff26 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -92,7 +92,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd f = relu // f'(x) = 1 if x > 0 else 0 // f''(x) = 0 - auto gx = nnvm::NodeEntry{n}; // f'(x) + auto gx = nnvm::NodeEntry{n}; // f'(x) ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", {ograds[0], gx}, nullptr, &n)); ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in", From 8538980062db57ec4dbe3b97a436b8b34f7921c8 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 30 May 2019 09:44:31 -0700 Subject: [PATCH 11/27] reverse indent --- src/imperative/imperative.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index 9c7621975cf5..d8fba1c169ec 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -350,7 +350,7 @@ std::vector Imperative::Backward( info.fresh_out_grad = true; } CHECK_GT(xs.size(), 0) - << "There are no inputs in computation graph that require gradients."; + << "There are no inputs in computation graph that require gradients."; } Graph g_graph = pass::MXGradient( From 1ee38b5a1b6db91a7dfb5c223465aaec79791891 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 30 May 2019 09:46:32 -0700 Subject: [PATCH 12/27] remove unused backward operator --- src/operator/tensor/elemwise_unary_op_trig.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 3308d5f13f1e..8a04d023376e 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -54,8 +54,6 @@ The storage type of ``sin`` output depends upon the input storage type: return ret; }); -MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd); - // cos MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos) MXNET_ADD_SPARSE_OP_ALIAS(cos) @@ -81,8 +79,6 @@ The storage type of ``cos`` output is always dense return ret; }); -MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd); - // tan MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan) .describe(R"code(Computes the element-wise tangent of the input array. From c18f317f7eb547163a987b15d5bd481cc2137550 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 30 May 2019 10:28:04 -0700 Subject: [PATCH 13/27] refactor backward for sin(x) and cos(x) --- src/operator/tensor/elemwise_unary_op_trig.cc | 61 +++++++++++++------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 8a04d023376e..d97b34f6207f 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -44,15 +44,28 @@ The storage type of ``sin`` output depends upon the input storage type: - sin(csr) = csr )code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_sin" }); + +MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd) .set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto x_grad = MakeNode("cos", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n); - auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", - {ograds[0], nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n); - std::vector ret; - ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); - return ret; - }); + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // f(x) = sin(x) + // f'(x) = cos(x) + // f''(x) = -sin(x) + auto grad_x = nnvm::NodeEntry(n); + auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad", + {n->inputs[1]}, nullptr, &n); + auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", + {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n); + std::vector ret; + // for the backward of the _backward_sin node + // first input is the ograd and second input is x (because ElemwiseUseIn) + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], grad_x}, nullptr, &n)); + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", + {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n)); + return ret; + }); // cos MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos) @@ -67,17 +80,29 @@ The input should be in radians (:math:`2\pi` rad equals 360 degrees). The storage type of ``cos`` output is always dense )code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_cos"}); + +MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd) .set_attr("FGradient", - [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto x_grad = MakeNode("sin", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n); - auto neg_x_grad = MakeNode("negative", n->attrs.name + "_mid_neg_x_grad", - {nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n); - auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", - {ograds[0], nnvm::NodeEntry{neg_x_grad, 0, 0}}, nullptr, &n); - std::vector ret; - ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0}); - return ret; - }); + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // f(x) = cos(x) + // f'(x) = -sin(x) + // f''(x) = -cos(x) + auto grad_x = nnvm::NodeEntry(n); + auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad", + {n->inputs[1]}, nullptr, &n); + auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", + {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n); + std::vector ret; + // for the backward of the _backward_cos node + // first input is the ograd and second input is x (because ElemwiseUseIn) + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], grad_x}, nullptr, &n)); + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", + {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n)); + return ret; + }); + // tan MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan) From 689cfeeaf3972a4d726fde7c58810a68399d6ebe Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Thu, 30 May 2019 13:27:19 -0700 Subject: [PATCH 14/27] change value init to list init --- src/operator/tensor/elemwise_unary_op_trig.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index d97b34f6207f..dd72e4ee58c2 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -52,7 +52,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_mid_grad_grad", {n->inputs[1]}, nullptr, &n); auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", @@ -63,7 +63,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_backward_grad_grad", {ograds[0], grad_x}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); return ret; }); @@ -88,7 +88,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_mid_grad_grad", {n->inputs[1]}, nullptr, &n); auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", @@ -99,7 +99,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_backward_grad_grad", {ograds[0], grad_x}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); return ret; }); From 0b6c2ef7374718168d45da5ba673c17f5993dc5b Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 31 May 2019 11:48:14 -0700 Subject: [PATCH 15/27] change to list initialization --- src/operator/tensor/elemwise_unary_op_trig.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index dd72e4ee58c2..40801b5847cb 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -56,7 +56,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_mid_grad_grad", {n->inputs[1]}, nullptr, &n); auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", - {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n); + {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n); std::vector ret; // for the backward of the _backward_sin node // first input is the ograd and second input is x (because ElemwiseUseIn) @@ -92,7 +92,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_mid_grad_grad", {n->inputs[1]}, nullptr, &n); auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", - {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n); + {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n); std::vector ret; // for the backward of the _backward_cos node // first input is the ograd and second input is x (because ElemwiseUseIn) From 31f671f9de7d1a65470751c9748e5638e62874f5 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 31 May 2019 14:46:19 -0700 Subject: [PATCH 16/27] generate random shape in test --- .../python/unittest/test_higher_order_grad.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 3b686e276e7f..77bfa68157aa 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -18,7 +18,7 @@ import math from mxnet import nd, autograd -from mxnet.test_utils import assert_almost_equal, random_arrays +from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd from common import with_seed @@ -30,8 +30,9 @@ def sin(x): def grad_grad_op(x): return -nd.sin(x) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, sin, grad_grad_op) @@ -43,8 +44,9 @@ def cos(x): def grad_grad_op(x): return -nd.cos(x) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, cos, grad_grad_op) @@ -56,8 +58,9 @@ def relu(x): def grad_grad_op(x): return nd.zeros_like(x) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, relu, grad_grad_op) @@ -69,9 +72,9 @@ def log(x): def grad_grad_op(x): return -1/(x**2) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, log, grad_grad_op) @@ -83,9 +86,9 @@ def log2(x): def grad_grad_op(x): return -1/((x**2) * math.log(2)) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, log2, grad_grad_op) @@ -97,9 +100,9 @@ def log10(x): def grad_grad_op(x): return -1/((x**2) * math.log(10)) - arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5)) - - for array in arrays: + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) check_second_order_unary(array, log10, grad_grad_op) From 62fcca3c4236195433295fdce6c83c9607ddfc6e Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 3 Jun 2019 15:38:38 -0700 Subject: [PATCH 17/27] fix a bug in second order backward --- .../tensor/elemwise_unary_op_basic.cc | 5 +- src/operator/tensor/elemwise_unary_op_trig.cc | 46 ++++++++++++------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index bd94f81bff26..e7c5cb6cd1d2 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -89,7 +89,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { std::vector ret; - // f(x) -> f = relu + // ograds[0]: d^2L/dx^2 + // inputs[0]: dL/dy + // inputs[1]: y + // f(x) -> relu(x) // f'(x) = 1 if x > 0 else 0 // f''(x) = 0 auto gx = nnvm::NodeEntry{n}; // f'(x) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 40801b5847cb..7ebba7773132 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -49,21 +49,27 @@ The storage type of ``sin`` output depends upon the input storage type: MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: d^2L/dx^2 + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseUseIn) // f(x) = sin(x) // f'(x) = cos(x) // f''(x) = -sin(x) - auto grad_x = nnvm::NodeEntry{n}; - auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad", - {n->inputs[1]}, nullptr, &n); - auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", - {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n); + auto x_grad = MakeNode("cos", n->attrs.name + "_x_grad", + {n->inputs[1]}, nullptr, &n); + auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", + {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_grad_mid", + {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + + auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid", + {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); + std::vector ret; - // for the backward of the _backward_sin node - // first input is the ograd and second input is x (because ElemwiseUseIn) + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], grad_x}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; }); @@ -85,21 +91,29 @@ The storage type of ``cos`` output is always dense MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: d^2L/dx^2 + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseUseIn) // f(x) = cos(x) // f'(x) = -sin(x) // f''(x) = -cos(x) - auto grad_x = nnvm::NodeEntry{n}; - auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad", - {n->inputs[1]}, nullptr, &n); - auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad", - {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n); + auto x_grad = MakeNode("negative", n->attrs.name + "_x_grad", + {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_mid", + {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", + {nnvm::NodeEntry{MakeNode("cos", n->attrs.name + "_grad_grad_mid", + {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + + auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid", + {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); + std::vector ret; // for the backward of the _backward_cos node // first input is the ograd and second input is x (because ElemwiseUseIn) ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], grad_x}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; }); From a0a0e75eb25eaaf83d7cdab18d228d44b804b462 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 3 Jun 2019 16:49:53 -0700 Subject: [PATCH 18/27] fix lint --- src/operator/tensor/elemwise_unary_op_trig.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 7ebba7773132..e4b7b5b2638e 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -58,8 +58,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_x_grad", {n->inputs[1]}, nullptr, &n); auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", - {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_grad_mid", - {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + {nnvm::NodeEntry{ + MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n) + }}, + nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid", {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); @@ -101,8 +103,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", - {nnvm::NodeEntry{MakeNode("cos", n->attrs.name + "_grad_grad_mid", - {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + {nnvm::NodeEntry{ + MakeNode("cos", n->attrs.name + "_grad_grad_mid",{n->inputs[1]}, nullptr, &n) + }}, + nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid", {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); From 451c4bd108babf4c6a7a33c86373734f77d9a084 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 3 Jun 2019 21:30:08 -0700 Subject: [PATCH 19/27] fix lint --- src/operator/tensor/elemwise_unary_op_trig.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index e4b7b5b2638e..6157c056476b 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -60,8 +60,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_x_grad_grad", {nnvm::NodeEntry{ MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n) - }}, - nullptr, &n); + }}, nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid", {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); @@ -100,13 +99,13 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_x_grad", - {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_mid", - {n->inputs[1]}, nullptr, &n)}}, nullptr, &n); + {nnvm::NodeEntry{ + MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n) + }}, nullptr, &n); auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", {nnvm::NodeEntry{ - MakeNode("cos", n->attrs.name + "_grad_grad_mid",{n->inputs[1]}, nullptr, &n) - }}, - nullptr, &n); + MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n) + }}, nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid", {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); From b9b0c93cb14b6b12a3bbd955a709aa0949236811 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 5 Jun 2019 12:32:44 -0700 Subject: [PATCH 20/27] address reviewer comment and renaming --- src/operator/tensor/elemwise_unary_op_basic.cc | 11 +++++++---- src/operator/tensor/elemwise_unary_op_trig.cc | 16 ++++++++-------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index e7c5cb6cd1d2..f2b8dd6b1314 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -89,17 +89,20 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { std::vector ret; - // ograds[0]: d^2L/dx^2 + // ograds[0]: dL/dxgrad // inputs[0]: dL/dy // inputs[1]: y // f(x) -> relu(x) // f'(x) = 1 if x > 0 else 0 // f''(x) = 0 - auto gx = nnvm::NodeEntry{n}; // f'(x) + auto dydx = MakeNode("_greater", n->attrs.name + "_dydx", + {n->inputs[1], nnvm::NodeEntry{ + MakeNode("zeros_like", n->attrs.name + "tmp", {n->inputs[1]}, nullptr, &n) + }}, nullptr, &n); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], gx}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry(dydx)}, nullptr, &n)); ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in", - {gx}, nullptr, &n)); + {n->inputs[1]}, nullptr, &n)); return ret; }); diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 6157c056476b..b7cf76e4eb2d 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -55,20 +55,20 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwdattrs.name + "_x_grad", + auto dydx = MakeNode("cos", n->attrs.name + "_dydx", {n->inputs[1]}, nullptr, &n); - auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", + auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2", {nnvm::NodeEntry{ MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n) }}, nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid", - {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); + {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n); std::vector ret; ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; @@ -98,23 +98,23 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwdattrs.name + "_x_grad", + auto dydx = MakeNode("negative", n->attrs.name + "_dydx", {nnvm::NodeEntry{ MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n) }}, nullptr, &n); - auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad", + auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2", {nnvm::NodeEntry{ MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n) }}, nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid", - {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n); + {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n); std::vector ret; // for the backward of the _backward_cos node // first input is the ograd and second input is x (because ElemwiseUseIn) ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n)); + {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; From 94e3b5f75ed33484856899fa96f621838b5a33aa Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Tue, 18 Jun 2019 17:39:43 -0700 Subject: [PATCH 21/27] test 2nd order gradient for sigmoid --- src/common/exec_utils.h | 6 +++--- src/imperative/imperative.cc | 7 +++++++ src/operator/tensor/elemwise_unary_op_basic.cc | 17 ++++++++++++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h index 0551b429f17e..b9560e1cfe83 100644 --- a/src/common/exec_utils.h +++ b/src/common/exec_utils.h @@ -286,7 +286,7 @@ inline void LogMemoryPlan(const nnvm::Graph& g) { const auto &idx = g.indexed_graph(); const auto& vshape = g.GetAttr("shape"); const auto& vtype = g.GetAttr("dtype"); - const auto& vstorage = g.GetAttr("storage_id"); + //const auto& vstorage = g.GetAttr("storage_id"); // find node range uint32_t node_start = 0, node_end = idx.num_nodes(); if (g.attrs.count("node_range")) { @@ -304,13 +304,13 @@ inline void LogMemoryPlan(const nnvm::Graph& g) { auto eid = idx.entry_id(e); size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024; LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " (" - << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]); + << kilo_bytes << " KB) -> "; // << storage_str(vstorage[eid]); } for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { uint32_t eid = idx.entry_id(nid, index); size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024; LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " (" - << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]); + << kilo_bytes << " KB) -> "; // << storage_str(vstorage[eid]); } } } diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index d8fba1c169ec..e804ae8032b5 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -317,6 +317,7 @@ std::vector Imperative::Backward( } } + // Get gradient graph Symbol sym; sym.outputs = graph.outputs; @@ -368,6 +369,8 @@ std::vector Imperative::Backward( graph.outputs.push_back(e); } } + + const auto& idx = graph.indexed_graph(); // get number of nodes used in forward pass size_t num_forward_nodes = 0; @@ -501,6 +504,10 @@ std::vector Imperative::Backward( } } + + std::cout<<"Complete graph: " << std::endl; + common::LogMemoryPlan(graph); + // Execution bool prev_recording = set_is_recording(create_graph); diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index f2b8dd6b1314..f517d100d033 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -121,7 +121,22 @@ The storage type of ``sigmoid`` output is always dense .set_attr("FGradient", ElemwiseGradUseOut{"_backward_sigmoid"}); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + auto fx = nnvm::NodeEntry{n->inputs[1]}; + auto gx_ograd = nnvm::NodeEntry{n}; + + std::vector ret; + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], fx}, nullptr, &n)); + + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp", + {ograds[0], fx}, nullptr, &n)); + return ret; + }); + + DMLC_REGISTER_PARAMETER(HardSigmoidParam); MXNET_OPERATOR_REGISTER_UNARY(hard_sigmoid) From 7d95760418f7303bfda7544e7cbc7d3ba46c3809 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 19 Jun 2019 22:43:43 -0700 Subject: [PATCH 22/27] higher order grads for sigmoid --- src/common/exec_utils.h | 5 ++-- src/imperative/imperative.cc | 6 ++--- .../tensor/elemwise_unary_op_basic.cc | 24 ++++++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h index b9560e1cfe83..d8b7a33bf22b 100644 --- a/src/common/exec_utils.h +++ b/src/common/exec_utils.h @@ -286,7 +286,6 @@ inline void LogMemoryPlan(const nnvm::Graph& g) { const auto &idx = g.indexed_graph(); const auto& vshape = g.GetAttr("shape"); const auto& vtype = g.GetAttr("dtype"); - //const auto& vstorage = g.GetAttr("storage_id"); // find node range uint32_t node_start = 0, node_end = idx.num_nodes(); if (g.attrs.count("node_range")) { @@ -304,13 +303,13 @@ inline void LogMemoryPlan(const nnvm::Graph& g) { auto eid = idx.entry_id(e); size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024; LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " (" - << kilo_bytes << " KB) -> "; // << storage_str(vstorage[eid]); + << kilo_bytes << " KB)"; } for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { uint32_t eid = idx.entry_id(nid, index); size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024; LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " (" - << kilo_bytes << " KB) -> "; // << storage_str(vstorage[eid]); + << kilo_bytes << " KB)"; } } } diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index e804ae8032b5..0f5665d85e5b 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -504,9 +504,9 @@ std::vector Imperative::Backward( } } - - std::cout<<"Complete graph: " << std::endl; - common::LogMemoryPlan(graph); + if (dmlc::GetEnv("MXNET_MEM_PLAN_VERBOSE_LOGGING", false)) { + common::LogMemoryPlan(graph); + } // Execution diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index f517d100d033..309129a9b97a 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -124,15 +124,23 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - auto fx = nnvm::NodeEntry{n->inputs[1]}; - auto gx_ograd = nnvm::NodeEntry{n}; - + // n->inputs[0] : y_grad + // n->inputs[1] : f(x) = sigmoid(x) + // ograds[0] : head_grads + // f''(x) = f'(x) * (1 - 2*f(x)) + auto ones = MakeNode("ones_like", n->attrs.name + "_grad_ones", {n->inputs[1]}, nullptr, &n); + const std::unordered_map args = {{"scalar", "2.0"}}; + auto two_y = MakeNode("_mul_scalar", n->attrs.name + "_mul_two", {n->inputs[1]}, &args, &n); + auto one_minus_two_y = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub", + {nnvm::NodeEntry{ones}, nnvm::NodeEntry{two_y}}, nullptr, &n); + auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_grad_mul", + {n->inputs[0], nnvm::NodeEntry{one_minus_two_y}}, nullptr, &n); + // when building gradient graph, the backward node of n->inputs[1] will be + // added to the graph again, therefore f`(x) will be multiplied std::vector ret; - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], fx}, nullptr, &n)); - - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp", - {ograds[0], fx}, nullptr, &n)); + ret.emplace_back(ograds[0]); // this output is not passed out if gradient w.r.t x only + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_in", + {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; }); From f43489d6b0e01fec9a80821e6c630b2f3510f644 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 19 Jun 2019 22:48:37 -0700 Subject: [PATCH 23/27] add unit test --- tests/python/unittest/test_higher_order_grad.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 77bfa68157aa..52bfc9d8345a 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -105,6 +105,22 @@ def grad_grad_op(x): array = random_arrays(shape) check_second_order_unary(array, log10, grad_grad_op) +@with_seed() +def test_sigmoid(): + def sigmoid(x): + return nd.sigmoid(x) + + def grad_op(x): + return sigmoid(x) * (1 - sigmoid(x)) + + def grad_grad_op(x): + return grad_op(x) * (1 - 2 * sigmoid(x)) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, sigmoid, grad_grad_op) + def check_second_order_unary(x, op, grad_grad_op): x = nd.array(x) From 55d7ebc227e7e15c42f7d95d5437a0da475aeef1 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 19 Jun 2019 22:56:26 -0700 Subject: [PATCH 24/27] remove blank lines --- src/imperative/imperative.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index 0f5665d85e5b..e2c0c9d4c9d4 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -317,7 +317,6 @@ std::vector Imperative::Backward( } } - // Get gradient graph Symbol sym; sym.outputs = graph.outputs; @@ -369,8 +368,6 @@ std::vector Imperative::Backward( graph.outputs.push_back(e); } } - - const auto& idx = graph.indexed_graph(); // get number of nodes used in forward pass size_t num_forward_nodes = 0; From 10dab5878169cfcf5d9288ce10e6280d295cb5ac Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 19 Jun 2019 23:08:16 -0700 Subject: [PATCH 25/27] update test --- tests/python/unittest/test_higher_order_grad.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 966ad1ebb90d..ad14c5050c1b 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -105,6 +105,7 @@ def grad_grad_op(x): array = random_arrays(shape) check_second_order_unary(array, log10, grad_grad_op) + @with_seed() def test_sigmoid(): def sigmoid(x): From d134d2f58f24c65164e82b8d0d6c979886960fb8 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Wed, 19 Jun 2019 23:34:39 -0700 Subject: [PATCH 26/27] fix lint --- src/operator/tensor/elemwise_unary_op_basic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 934230e11ea1..dbf6c8b8f75e 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -138,7 +138,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, // when building gradient graph, the backward node of n->inputs[1] will be // added to the graph again, therefore f`(x) will be multiplied std::vector ret; - ret.emplace_back(ograds[0]); // this output is not passed out if gradient w.r.t x only + ret.emplace_back(ograds[0]); // this output is not passed out if gradient w.r.t x only ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_in", {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; From 6848d42ca45a2d9163576c9c6222c165d049bf62 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 1 Jul 2019 21:39:35 -0700 Subject: [PATCH 27/27] fix third order gradient for sigmoid --- src/operator/tensor/elemwise_unary_op_basic.cc | 7 ++++++- src/operator/tensor/elemwise_unary_op_trig.cc | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index dbf6c8b8f75e..26c74085dbe6 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -128,6 +128,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, // n->inputs[1] : f(x) = sigmoid(x) // ograds[0] : head_grads // f''(x) = f'(x) * (1 - 2*f(x)) + // NodeEntry{n} : y_grad * f'(x) auto ones = MakeNode("ones_like", n->attrs.name + "_grad_ones", {n->inputs[1]}, nullptr, &n); const std::unordered_map args = {{"scalar", "2.0"}}; auto two_y = MakeNode("_mul_scalar", n->attrs.name + "_mul_two", {n->inputs[1]}, &args, &n); @@ -135,10 +136,14 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, {nnvm::NodeEntry{ones}, nnvm::NodeEntry{two_y}}, nullptr, &n); auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_grad_mul", {n->inputs[0], nnvm::NodeEntry{one_minus_two_y}}, nullptr, &n); + auto dydx = MakeNode("elemwise_div", n->attrs.name + "_grad_div", + {nnvm::NodeEntry{n}, n->inputs[0]}, nullptr, &n); + // when building gradient graph, the backward node of n->inputs[1] will be // added to the graph again, therefore f`(x) will be multiplied std::vector ret; - ret.emplace_back(ograds[0]); // this output is not passed out if gradient w.r.t x only + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad", + {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n)); ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_in", {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n)); return ret; diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index b7cf76e4eb2d..13410e9422a5 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -49,7 +49,7 @@ The storage type of ``sin`` output depends upon the input storage type: MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - // ograds[0]: d^2L/dx^2 + // ograds[0]: head_grad_grads (dL/dxgrad) // inputs[0]: dL/dy // inputs[1]: x (ElemwiseUseIn) // f(x) = sin(x) @@ -92,7 +92,7 @@ The storage type of ``cos`` output is always dense MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - // ograds[0]: d^2L/dx^2 + // ograds[0]: head_grad_grads (dL/dx_grad) // inputs[0]: dL/dy // inputs[1]: x (ElemwiseUseIn) // f(x) = cos(x)