Skip to content

Commit

Permalink
Fix Flaky Test Higher Order Grad (apache#17325)
Browse files Browse the repository at this point in the history
* failing seed

* failed seed arcsin

* use random_uniform_arrays to generate test arrays

* use random_uniform_arrays for arccos

* fix: handle case when len(s) == 0

* change lower and upper bounds of randomly generated array

* failure case seed

With this seed, one of the elements of head_grad for the first
gradient is zero and leads to failure in `arctanh` due to the way
it is implemented.

* change gradient implementation of _backward_arctanh

* remove seed

* fix higher order gradient implementation for multiple trig operators

* relax tolerance for tanh

* address comments

* add dtype argument.
* consistent function signature.
* concise list comprehension.

* fix implementation for log1p

* update function signature to be compatible with older python

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI

* retrigger CI
  • Loading branch information
kshitij12345 authored Feb 4, 2020
1 parent de30aa5 commit 71a5c9e
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 25 deletions.
12 changes: 12 additions & 0 deletions python/mxnet/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,18 @@ def random_arrays(*shapes):
return arrays


def random_uniform_arrays(*shapes, **kwargs):
"""Generate some random numpy arrays."""
low = kwargs.pop('low', 0.0)
high = kwargs.pop('high', 1.0)
dtype = kwargs.pop('dtype', default_dtype())
if len(kwargs) > 0:
raise TypeError('Got unexpected argument/s : ' + str(kwargs.keys()))
arrays = [np.random.uniform(low, high, size=s).astype(dtype)
for s in shapes]
return arrays


def random_sample(population, k):
"""Return a k length list of the elements chosen from the population sequence."""
assert 0 <= k <= len(population)
Expand Down
6 changes: 6 additions & 0 deletions src/nnvm/node_op_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class NodeOpGen {
dependent_node->attrs.name + "_negative",
{x}, nullptr, &dependent_node)};
}

nnvm::NodeEntry ones_like(const nnvm::NodeEntry &x) {
return nnvm::NodeEntry{mxnet::op::MakeNode("ones_like",
dependent_node->attrs.name + "_oneslike",
{x}, nullptr, &dependent_node)};
}
};

} // namespace util
Expand Down
10 changes: 6 additions & 4 deletions src/operator/tensor/elemwise_unary_op_logexp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,13 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log1p,
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto op = mxnet::util::NodeOpGen{n};

auto dydx = op.div(dydx_mul_dldy, dldy);

auto d2ydx2_mid = op.mul(dydx_mul_dldy, dydx_mul_dldy);
auto ones = op.ones_like(x);
auto dydx = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_log1p",
n->attrs.name + "_backward_log1p",
{ones, x}, nullptr, &n)};
auto d2ydx2_mid = op.mul(dydx, dydx);
auto d2ydx2_neg_mid = op.negative(d2ydx2_mid);
auto d2ydx2 = op.div(d2ydx2_neg_mid, dldy);
auto d2ydx2 = op.mul(d2ydx2_neg_mid, dldy);

std::vector<nnvm::NodeEntry> ret;

Expand Down
30 changes: 24 additions & 6 deletions src/operator/tensor/elemwise_unary_op_trig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsin,
auto dydx_mul_grad_x = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dydx_mul_grad_x, dydx);
auto ones = op.ones_like(x);
auto x_grad = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arcsin",
n->attrs.name + "_backward_arcsin",
{ones, x}, nullptr, &n)};
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_grad = op.mul(dydx_mul_grad_x, x_grad_square_mul_x);
Expand Down Expand Up @@ -246,7 +249,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccos,
auto dydx_mul_grad_x = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dydx_mul_grad_x, dydx);
auto ones = op.ones_like(x);
auto x_grad = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arccos",
n->attrs.name + "_backward_arccos",
{ones, x}, nullptr, &n)};
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_grad = op.mul(dydx_mul_grad_x, x_grad_square_mul_x);
Expand Down Expand Up @@ -295,7 +301,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctan,
auto dldy_mul_dydx = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dldy_mul_dydx, dldy);
auto ones = op.ones_like(x);
auto x_grad = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arctan",
n->attrs.name + "_backward_arctan",
{ones, x}, nullptr, &n)};
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_square_mul_2_x = op.mul(-2.0, x_grad_square_mul_x);
Expand Down Expand Up @@ -501,7 +510,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arcsinh,
auto dydx_mul_grad_x = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto grad_x = op.div(dydx_mul_grad_x, dydx);
auto ones = op.ones_like(x);
auto grad_x = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arcsinh",
n->attrs.name + "_backward_arcsinh",
{ones, x}, nullptr, &n)};
auto grad_x_square = op.square(grad_x);
auto grad_x_square_mul_x = op.mul(grad_x_square, x);
auto grad_grad_x = op.mul(dydx_mul_grad_x, grad_x_square_mul_x);
Expand Down Expand Up @@ -539,7 +551,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arccosh,
auto dydx_mul_grad_x = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto grad_x = op.div(dydx_mul_grad_x, dydx);
auto ones = op.ones_like(x);
auto grad_x = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arccosh",
n->attrs.name + "_backward_arccosh",
{ones, x}, nullptr, &n)};
auto grad_x_square = op.square(grad_x);
auto grad_x_square_mul_x = op.mul(grad_x_square, x);
auto grad_grad_x = op.mul(dydx_mul_grad_x, grad_x_square_mul_x);
Expand Down Expand Up @@ -583,7 +598,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_arctanh,
auto dldy_mul_dydx = nnvm::NodeEntry{n};
auto op = mxnet::util::NodeOpGen{n};

auto x_grad = op.div(dldy_mul_dydx, dldy);
auto ones = op.ones_like(x);
auto x_grad = nnvm::NodeEntry{mxnet::op::MakeNode("_backward_arctanh",
n->attrs.name + "_backward_arctanh",
{ones, x}, nullptr, &n)};
auto x_grad_square = op.square(x_grad);
auto x_grad_square_mul_x = op.mul(x_grad_square, x);
auto x_grad_square_mul_2_x = op.mul(2.0, x_grad_square_mul_x);
Expand Down
24 changes: 9 additions & 15 deletions tests/python/unittest/test_higher_order_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from common import with_seed
import mxnet
from mxnet import nd, autograd, gluon
from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd, same
from mxnet.test_utils import (
assert_almost_equal, random_arrays, random_uniform_arrays, rand_shape_nd, same)


@with_seed()
Expand Down Expand Up @@ -131,7 +132,7 @@ def grad_grad_op(x):
array = random_arrays(shape)
check_nth_order_unary(array, tanh, grad_op, 1, rtol=1e-6, atol=1e-6)
check_second_order_unary(
array, tanh, grad_grad_op, rtol=1e-6, atol=1e-6)
array, tanh, grad_grad_op, rtol=1e-6, atol=1e-5)


@with_seed()
Expand All @@ -144,12 +145,8 @@ def grad_grad_op(x):

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
# Hack: Decrease std_dev to make
# sure all elements
# are in range -1 to 1
# i.e. Domain of arcsin
array *= 0.2
# Domain of arcsin is [-1, 1]
array = random_uniform_arrays(shape, low=-0.99, high=0.99)[0]
check_second_order_unary(array, arcsin, grad_grad_op)


Expand All @@ -163,12 +160,8 @@ def grad_grad_op(x):

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
# Hack: Decrease std_dev to make
# sure all elements
# are in range -1 to 1
# i.e. Domain of arccos
array *= 0.2
# Domain of arccos is [-1, 1]
array = random_uniform_arrays(shape, low=-0.99, high=0.99)[0]
check_second_order_unary(array, arccos, grad_grad_op)


Expand Down Expand Up @@ -233,7 +226,8 @@ def grad_grad_op(x):

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
# Domain of arctanh is (-1, 1)
array = random_uniform_arrays(shape, low=-0.99, high=0.99)[0]
check_second_order_unary(array, arctanh, grad_grad_op)


Expand Down

0 comments on commit 71a5c9e

Please sign in to comment.