From 406d707ffe4dc2ea5a677605c0bdd11d2f7c006e Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Sat, 13 Jul 2019 11:16:52 +0530 Subject: [PATCH 1/5] support arcsinh, arccosh for higher order grad --- src/operator/tensor/elemwise_unary_op_trig.cc | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 13410e9422a5..79bd4538e365 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -307,7 +307,36 @@ The storage type of ``arcsinh`` output depends upon the input storage type: .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arcsinh" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsinh, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dy_grad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = arcsinh(x) + // n: f'(x) = 1/(x^2 + 1)^1/2 + // f''(x) = f'(x) * x/(x^2 + 1) = x/(x^2 + 1)^(3/2) + // Note: x/(x^2 + 1) = x * f'(x)^2 + auto dydx = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_grad_x = nnvm::NodeEntry{n}; + auto grad_x = MakeNode("elemwise_div", n->attrs.name + "_grad_x", + {dydx_mul_grad_x, dydx}, nullptr, &n); + auto grad_x_square = MakeNode("square", n->attrs.name + "_grad_x_square", + {nnvm::NodeEntry{grad_x}}, nullptr, &n); + auto grad_x_square_mul_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_x_square_mul_x", + {nnvm::NodeEntry{grad_x_square}, x}, nullptr, &n); + auto grad_grad_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_grad_x", + {dydx_mul_grad_x, nnvm::NodeEntry{grad_x_square_mul_x}}, + nullptr, &n); + + std::vector ret; + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], nnvm::NodeEntry{grad_x}}, nullptr, &n)); + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", + {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + return ret; + }); // arccosh MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(arccosh, cpu, mshadow_op::arccosh) @@ -321,7 +350,36 @@ The storage type of ``arccosh`` output is always dense .set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arccosh" }); MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccosh, - unary_bwd); + unary_bwd) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + // ograds[0]: head_grad_grads (dL/dy_grad) + // inputs[0]: dL/dy + // inputs[1]: x (ElemwiseGradUseIn) + // f(x) = arccosh(x) + // n: f'(x) = 1/((x - 1)^1/2 * (x + 1)^1/2) + // f''(x) = f'(x) * x/((x + 1)*(x - 1)) = x/((x-1)^1/2 * (x+1)^1/2 * (x-1) * (x+1)) + // Note: x/((x-1)*(x+1)) = x * f'(x)^2 + auto dydx = n->inputs[0]; + auto x = n->inputs[1]; + auto dydx_mul_grad_x = nnvm::NodeEntry{n}; + auto grad_x = MakeNode("elemwise_div", n->attrs.name + "_grad_x", + {dydx_mul_grad_x, dydx}, nullptr, &n); + auto grad_x_square = MakeNode("square", n->attrs.name + "_grad_x_square", + {nnvm::NodeEntry{grad_x}}, nullptr, &n); + auto grad_x_square_mul_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_x_square_mul_x", + {nnvm::NodeEntry{grad_x_square}, x}, nullptr, &n); + auto grad_grad_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_grad_x", + {dydx_mul_grad_x, nnvm::NodeEntry{grad_x_square_mul_x}}, + nullptr, &n); + + std::vector ret; + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", + {ograds[0], nnvm::NodeEntry{grad_x}}, nullptr, &n)); + ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", + {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + return ret; + }); // arctanh MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(arctanh, cpu, mshadow_op::arctanh) From 0fd280c7f8e9fdd209d1840c5da729665b4ddd19 Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Sat, 13 Jul 2019 11:51:43 +0530 Subject: [PATCH 2/5] add relevant tests --- .../python/unittest/test_higher_order_grad.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py index 0f07d014d435..aaa969952592 100644 --- a/tests/python/unittest/test_higher_order_grad.py +++ b/tests/python/unittest/test_higher_order_grad.py @@ -17,6 +17,7 @@ import math +import random from mxnet import nd, autograd from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd from common import with_seed @@ -50,6 +51,40 @@ def grad_grad_op(x): check_second_order_unary(array, cos, grad_grad_op) +@with_seed() +def test_arcsinh(): + def arcsinh(x): + return nd.arcsinh(x) + + def grad_grad_op(x): + return x/nd.sqrt((nd.square(x)+1)**3) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + check_second_order_unary(array, arcsinh, grad_grad_op) + + +@with_seed() +def test_arccosh(): + def arccosh(x): + return nd.arccosh(x) + + def grad_grad_op(x): + return x/(nd.sqrt(x-1) * nd.sqrt(x+1) * (x+1) * (x-1)) + + sigma = random.randint(25, 100) + mu = random.randint(500, 1000) + + for dim in range(1, 5): + shape = rand_shape_nd(dim) + array = random_arrays(shape) + array = array * sigma + mu + # Domain of arccosh 1 to infinity. + assert((array > 1).all()) + check_second_order_unary(array, arccosh, grad_grad_op) + + @with_seed() def test_relu(): def relu(x): From 55c83853f4c7b0664d340b9115b89ff4c0abd675 Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Fri, 26 Jul 2019 20:40:49 +0530 Subject: [PATCH 3/5] update comments --- src/operator/tensor/elemwise_unary_op_trig.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 79bd4538e365..d4ad0b9582b2 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -310,7 +310,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsinh, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - // ograds[0]: head_grad_grads (dL/dy_grad) + // ograds[0]: head_grad_grads (dL/dxgrad) // inputs[0]: dL/dy // inputs[1]: x (ElemwiseGradUseIn) // f(x) = arcsinh(x) @@ -353,7 +353,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccosh, unary_bwd) .set_attr("FGradient", [](const nnvm::NodePtr& n, const std::vector& ograds) { - // ograds[0]: head_grad_grads (dL/dy_grad) + // ograds[0]: head_grad_grads (dL/dxgrad) // inputs[0]: dL/dy // inputs[1]: x (ElemwiseGradUseIn) // f(x) = arccosh(x) From 0f7ce26fc23b036f2efac476ef801a9bf8625673 Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Thu, 12 Sep 2019 21:47:44 +0530 Subject: [PATCH 4/5] use NodeOpGen --- src/operator/tensor/elemwise_unary_op_trig.cc | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc index 4573466f8f52..a436ebb284a3 100644 --- a/src/operator/tensor/elemwise_unary_op_trig.cc +++ b/src/operator/tensor/elemwise_unary_op_trig.cc @@ -450,21 +450,16 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsinh, auto dydx = n->inputs[0]; auto x = n->inputs[1]; auto dydx_mul_grad_x = nnvm::NodeEntry{n}; - auto grad_x = MakeNode("elemwise_div", n->attrs.name + "_grad_x", - {dydx_mul_grad_x, dydx}, nullptr, &n); - auto grad_x_square = MakeNode("square", n->attrs.name + "_grad_x_square", - {nnvm::NodeEntry{grad_x}}, nullptr, &n); - auto grad_x_square_mul_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_x_square_mul_x", - {nnvm::NodeEntry{grad_x_square}, x}, nullptr, &n); - auto grad_grad_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_grad_x", - {dydx_mul_grad_x, nnvm::NodeEntry{grad_x_square_mul_x}}, - nullptr, &n); + auto op = mxnet::util::NodeOpGen{n}; + + auto grad_x = op.div(dydx_mul_grad_x, dydx); + auto grad_x_square = op.square(grad_x); + auto grad_x_square_mul_x = op.mul(grad_x_square, x); + auto grad_grad_x = op.mul(dydx_mul_grad_x, grad_x_square_mul_x); std::vector ret; - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], nnvm::NodeEntry{grad_x}}, nullptr, &n)); - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + ret.emplace_back(op.mul(ograds[0], grad_x)); + ret.emplace_back(op.mul(ograds[0], grad_grad_x)); return ret; }); @@ -493,21 +488,16 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccosh, auto dydx = n->inputs[0]; auto x = n->inputs[1]; auto dydx_mul_grad_x = nnvm::NodeEntry{n}; - auto grad_x = MakeNode("elemwise_div", n->attrs.name + "_grad_x", - {dydx_mul_grad_x, dydx}, nullptr, &n); - auto grad_x_square = MakeNode("square", n->attrs.name + "_grad_x_square", - {nnvm::NodeEntry{grad_x}}, nullptr, &n); - auto grad_x_square_mul_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_x_square_mul_x", - {nnvm::NodeEntry{grad_x_square}, x}, nullptr, &n); - auto grad_grad_x = MakeNode("elemwise_mul", n->attrs.name + "_grad_grad_x", - {dydx_mul_grad_x, nnvm::NodeEntry{grad_x_square_mul_x}}, - nullptr, &n); + auto op = mxnet::util::NodeOpGen{n}; + + auto grad_x = op.div(dydx_mul_grad_x, dydx); + auto grad_x_square = op.square(grad_x); + auto grad_x_square_mul_x = op.mul(grad_x_square, x); + auto grad_grad_x = op.mul(dydx_mul_grad_x, grad_x_square_mul_x); std::vector ret; - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", - {ograds[0], nnvm::NodeEntry{grad_x}}, nullptr, &n)); - ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in", - {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n)); + ret.emplace_back(op.mul(ograds[0], grad_x)); + ret.emplace_back(op.mul(ograds[0], grad_grad_x)); return ret; }); From 8e8e332cd28ad544a73968c96f3e739445e81adf Mon Sep 17 00:00:00 2001 From: kshitij12345 Date: Sat, 14 Sep 2019 19:17:27 +0530 Subject: [PATCH 5/5] retrigger CI