diff --git a/src/operator/numpy/np_true_divide.cc b/src/operator/numpy/np_true_divide.cc index 35df2c467e7b..1e46cc9d13b5 100644 --- a/src/operator/numpy/np_true_divide.cc +++ b/src/operator/numpy/np_true_divide.cc @@ -126,7 +126,7 @@ NNVM_REGISTER_OP(_npi_rtrue_divide_scalar) }) #endif .set_attr("FCompute", TrueDivideScalarCompute) -.set_attr("FGradient", ElemwiseGradUseNone{"_backward_rdiv_scalar"}) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_rdiv_scalar"}) .add_argument("data", "NDArray-or-Symbol", "source input") .add_argument("scalar", "float", "scalar input"); diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py index 239f300e028e..8e46f03e79bc 100644 --- a/tests/python/unittest/test_numpy_ndarray.py +++ b/tests/python/unittest/test_numpy_ndarray.py @@ -27,7 +27,7 @@ from mxnet.gluon import HybridBlock from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray, retry, use_np from common import with_seed, TemporaryDirectory -from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, assert_exception, is_op_runnable +from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, assert_exception, is_op_runnable, collapse_sum_like from mxnet.ndarray.ndarray import py_slice from mxnet.base import integer_types import scipy.stats as ss @@ -281,6 +281,62 @@ def test_np_ndarray_binary_element_wise_ops(): '<=': _np.less_equal }) + def _get_grad_func(op, scalar=None, reverse=False): + if op == '+': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd, x1.shape), + collapse_sum_like(ograd, x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd + else: + return lambda ograd, x1, x2, out: ograd + elif op == '-': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd, x1.shape), + -collapse_sum_like(ograd, x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd + else: + return lambda ograd, x1, x2, out: -ograd + elif op == '*': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd * x2, x1.shape), + collapse_sum_like(ograd * x1, x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd * x2 + else: + return lambda ograd, x1, x2, out: ograd * x1 + elif op == '/': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd / x2, x1.shape), + collapse_sum_like(-x1 * ograd / (x2 * x2), x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd / x2 + else: + return lambda ograd, x1, x2, out: -x1 * ograd / (x2 * x2) + elif op == 'mod': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd, x1.shape), + collapse_sum_like(-ograd * _np.floor(x1 / x2), x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd + else: + return lambda ograd, x1, x2, out: -ograd * _np.floor(x1 / x2) + elif op == 'pow': + if scalar is None: + return lambda ograd, x1, x2, out: (collapse_sum_like(ograd * x2 * _np.power(x1, x2 - 1), x1.shape), + collapse_sum_like(ograd * out * _np.log(x1), x2.shape)) + elif not reverse: + return lambda ograd, x1, x2, out: ograd * x2 * _np.power(x1, x2 - 1) + else: + return lambda ograd, x1, x2, out: ograd * out * _np.log(x1) + elif op in ('==', '!=', '<', '<=', '>', '>='): + if scalar is None: + return lambda ograd, x1, x2, out: (_np.zeros_like(x1), _np.zeros_like(x2)) + else: + return lambda ograd, x1, x2, out: _np.zeros_like(ograd) + return None + def get_np_ret(x1, x2, op): return np_op_map[op](x1, x2) @@ -364,13 +420,15 @@ def check_binary_op_result(shape1, shape2, op, dtype=None): mx_input1 = abs(_np.random.uniform()) + 1 np_input1 = mx_input1 else: - mx_input1 = rand_ndarray(shape1, dtype=dtype).abs() + 1 + mx_input1 = (rand_ndarray(shape1, dtype=dtype).abs() + 1).as_np_ndarray() + mx_input1.attach_grad() np_input1 = mx_input1.asnumpy() if shape2 is None: mx_input2 = abs(_np.random.uniform()) + 1 np_input2 = mx_input2 else: - mx_input2 = rand_ndarray(shape2, dtype=dtype).abs() + 1 + mx_input2 = (rand_ndarray(shape2, dtype=dtype).abs() + 1).as_np_ndarray() + mx_input2.attach_grad() np_input2 = mx_input2.asnumpy() scalar = None @@ -382,7 +440,9 @@ def check_binary_op_result(shape1, shape2, op, dtype=None): scalar = mx_input1 reverse = True + grad_func = _get_grad_func(op, scalar, reverse) np_out = get_np_ret(np_input1, np_input2, op) + ograd = _np.ones_like(np_out) for hybridize in [True, False]: if scalar is None: get_mx_ret_np = TestBinaryElementWiseOp(op) @@ -390,26 +450,49 @@ def check_binary_op_result(shape1, shape2, op, dtype=None): if hybridize: get_mx_ret_np.hybridize() get_mx_ret_classic.hybridize() - mx_out = get_mx_ret_np(mx_input1.as_np_ndarray(), mx_input2.as_np_ndarray()) + if grad_func is None: + mx_out = get_mx_ret_np(mx_input1, mx_input2) + else: + with mx.autograd.record(): + mx_out = get_mx_ret_np(mx_input1, mx_input2) + mx_out.backward() assert type(mx_out) == np.ndarray - assert np_out.shape == mx_out.shape if op in logic_ops: assert np_out.dtype == mx_out.dtype - assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5, use_broadcast=False) + + if grad_func is not None: + x1_grad_expected, x2_grad_expected = grad_func(ograd, np_input1, np_input2, np_out) + assert_almost_equal(mx_input1.grad.asnumpy(), x1_grad_expected, atol=1e-5, rtol=1e-3, + use_broadcast=False) + assert_almost_equal(mx_input2.grad.asnumpy(), x2_grad_expected, atol=1e-5, rtol=1e-3, + use_broadcast=False) else: get_mx_ret = TestBinaryElementWiseOp(op, scalar=scalar, reverse=reverse) if hybridize: get_mx_ret.hybridize() if reverse: - mx_out = get_mx_ret(mx_input2.as_np_ndarray()) - assert type(mx_out) == np.ndarray + mx_input = mx_input2 else: - mx_out = get_mx_ret(mx_input1.as_np_ndarray()) - assert type(mx_out) == np.ndarray - assert np_out.shape == mx_out.shape + mx_input = mx_input1 + + if grad_func is None: + mx_out = get_mx_ret(mx_input) + else: + with mx.autograd.record(): + mx_out = get_mx_ret(mx_input) + mx_out.backward() + assert type(mx_out) == np.ndarray + if op in logic_ops: assert np_out.dtype == mx_out.dtype - assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5, use_broadcast=False) + + # check grad + if grad_func is not None: + x_grad_expected = grad_func(ograd, np_input1, np_input2, np_out) + assert_almost_equal(mx_input.grad.asnumpy(), x_grad_expected, atol=1e-5, rtol=1e-3, + use_broadcast=False) dtypes = [_np.float32, _np.float64, None] ops = np_op_map.keys() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 32cd5b10717e..403ee8ddae1a 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1572,8 +1572,8 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False) if rgrads is None: assert_almost_equal(mx_test_x2.grad.asnumpy(), - collapse_sum_like(rgrad(y.asnumpy(), np_test_x2, np_test_x1), mx_test_x2.shape), - rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False) + collapse_sum_like(rgrad(y.asnumpy(), np_test_x2, np_test_x1), mx_test_x2.shape), + rtol=1e-1, atol=1e-2, equal_nan=True, use_broadcast=False) else: assert_almost_equal(mx_test_x2.grad.asnumpy(), collapse_sum_like(rgrad(y.asnumpy(), np_test_x1, np_test_x2), mx_test_x2.shape), @@ -1594,7 +1594,6 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, order='C') assertRaises(NotImplementedError, getattr(np, func), mx_test_x1, mx_test_x2, order='mxnet') - funcs = { 'add': (-1.0, 1.0, [lambda y, x1, x2: _np.ones(y.shape)], None), 'subtract': @@ -1603,7 +1602,7 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): 'multiply': (-1.0, 1.0, [lambda y, x1, x2: _np.broadcast_to(x2, y.shape)], [lambda y, x1, x2: _np.broadcast_to(x1, y.shape)]), 'divide': (0.1, 1.0, [lambda y, x1, x2: _np.ones(y.shape) / x2], - [lambda y, x1, x2: -x1 / (x2 * x2)]), + [lambda y, x1, x2: -x1 / (x2 * x2)]), 'mod': (1.0, 10.0, [lambda y, x1, x2: _np.ones(y.shape), lambda y, x1, x2: _np.zeros(y.shape)],