From 66e03c55deb3e5a6430d71780d015735a096548c Mon Sep 17 00:00:00 2001 From: ckt624 Date: Wed, 25 Sep 2019 01:28:58 -0400 Subject: [PATCH] Numpy det and slogdet operators (#15861) * Add alias. Add tests. Add slogdet tests. Add docs Change shapes Change tests. Change slogdet tests Change style. * Fix * Fix --- python/mxnet/_numpy_op_doc.py | 122 ++++++++++ src/operator/tensor/la_op.cc | 2 + src/operator/tensor/la_op.cu | 2 + src/operator/tensor/la_op.h | 7 +- tests/python/unittest/test_numpy_op.py | 305 ++++++++++++++++--------- 5 files changed, 332 insertions(+), 106 deletions(-) diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py index 6e2f5fa15919..bb9b8314f2aa 100644 --- a/python/mxnet/_numpy_op_doc.py +++ b/python/mxnet/_numpy_op_doc.py @@ -20,6 +20,128 @@ """Doc placeholder for numpy ops with prefix _np.""" +def _np__linalg_det(a): + """ + det(a) + + Compute the determinant of an array. + + Parameters + ---------- + a : (..., M, M) ndarray + Input array to compute determinants for. + + Returns + ------- + det : (...) ndarray + Determinant of `a`. + + See Also + -------- + slogdet : Another way to represent the determinant, more suitable + for large matrices where underflow/overflow may occur. + + Notes + ----- + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The determinant is computed via LU factorization using the LAPACK + routine z/dgetrf. + + Examples + -------- + The determinant of a 2-D array [[a, b], [c, d]] is ad - bc: + + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.linalg.det(a) + -2.0 + + Computing determinants for a stack of matrices: + + >>> a = np.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> np.linalg.det(a) + array([-2., -3., -8.]) + """ + pass + + +def _np__linalg_slogdet(a): + """ + slogdet(a) + + Compute the sign and (natural) logarithm of the determinant of an array. + + If an array has a very small or very large determinant, then a call to + `det` may overflow or underflow. This routine is more robust against such + issues, because it computes the logarithm of the determinant rather than + the determinant itself. + + Parameters + ---------- + a : (..., M, M) ndarray + Input array, has to be a square 2-D array. + + Returns + ------- + sign : (...) ndarray + A number representing the sign of the determinant. For a real matrix, + this is 1, 0, or -1. + logdet : (...) array_like + The natural log of the absolute value of the determinant. + + If the determinant is zero, then `sign` will be 0 and `logdet` will be + -Inf. In all cases, the determinant is equal to ``sign * np.exp(logdet)``. + + See Also + -------- + det + + Notes + ----- + + Broadcasting rules apply, see the `numpy.linalg` documentation for + details. + + The determinant is computed via LU factorization using the LAPACK + routine z/dgetrf. + + + Examples + -------- + The determinant of a 2-D array ``[[a, b], [c, d]]`` is ``ad - bc``: + + >>> a = np.array([[1, 2], [3, 4]]) + >>> (sign, logdet) = np.linalg.slogdet(a) + >>> (sign, logdet) + (-1., 0.69314718055994529) + >>> sign * np.exp(logdet) + -2.0 + + Computing log-determinants for a stack of matrices: + + >>> a = np.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ]) + >>> a.shape + (3, 2, 2) + >>> sign, logdet = np.linalg.slogdet(a) + >>> (sign, logdet) + (array([-1., -1., -1.]), array([ 0.69314718, 1.09861229, 2.07944154])) + >>> sign * np.exp(logdet) + array([-2., -3., -8.]) + + This routine succeeds where ordinary `det` does not: + + >>> np.linalg.det(np.eye(500) * 0.1) + 0.0 + >>> np.linalg.slogdet(np.eye(500) * 0.1) + (1., -1151.2925464970228) + """ + pass + + def _np_ones_like(a): """ Return an array of ones with the same shape and type as a given array. diff --git a/src/operator/tensor/la_op.cc b/src/operator/tensor/la_op.cc index ce7d1d5de692..24968eca3f06 100644 --- a/src/operator/tensor/la_op.cc +++ b/src/operator/tensor/la_op.cc @@ -941,6 +941,7 @@ NNVM_REGISTER_OP(_backward_linalg_inverse) NNVM_REGISTER_OP(_linalg_det) .add_alias("linalg_det") +.add_alias("_np__linalg_det") .describe(R"code(Compute the determinant of a matrix. Input is a tensor *A* of dimension *n >= 2*. @@ -991,6 +992,7 @@ NNVM_REGISTER_OP(_backward_linalg_det) .set_attr("FCompute", LaOpDetBackward); NNVM_REGISTER_OP(_linalg_slogdet) +.add_alias("_np__linalg_slogdet") .add_alias("linalg_slogdet") .describe(R"code(Compute the sign and log of the determinant of a matrix. Input is a tensor *A* of dimension *n >= 2*. diff --git a/src/operator/tensor/la_op.cu b/src/operator/tensor/la_op.cu index 68c33180e3d5..ec622f0b22a2 100644 --- a/src/operator/tensor/la_op.cu +++ b/src/operator/tensor/la_op.cu @@ -100,12 +100,14 @@ NNVM_REGISTER_OP(_backward_linalg_inverse) .set_attr("FCompute", LaOpBackward); NNVM_REGISTER_OP(_linalg_det) +.add_alias("_np__linalg_det") .set_attr("FCompute", LaOpDetForward); NNVM_REGISTER_OP(_backward_linalg_det) .set_attr("FCompute", LaOpDetBackward); NNVM_REGISTER_OP(_linalg_slogdet) +.add_alias("_np__linalg_slogdet") .set_attr("FCompute", LaOpDetForward); NNVM_REGISTER_OP(_backward_linalg_slogdet) diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h index e024693e3819..bb56dc52b594 100644 --- a/src/operator/tensor/la_op.h +++ b/src/operator/tensor/la_op.h @@ -26,6 +26,7 @@ #define MXNET_OPERATOR_TENSOR_LA_OP_H_ #include +#include #include #include #include "../mshadow_op.h" @@ -428,7 +429,11 @@ inline bool DetShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in[ndim-2], in[ndim-1]) << "Input A's last two dimension must be equal"; mxnet::TShape out; if (ndim == 2) { - out = mxnet::TShape(1, 1); + if (Imperative::Get()->is_np_shape()) { + out = mxnet::TShape(0, 1); + } else { + out = mxnet::TShape(1, 1); + } } else { out = mxnet::TShape(in.begin(), in.end() - 2); } diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 97a6149991b0..9d95d43fecbc 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -29,7 +29,7 @@ from common import assertRaises, with_seed import random import scipy.stats as ss -from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, retry +from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, collapse_sum_like from mxnet.runtime import Features from mxnet.numpy_op_signature import _get_builtin_op import platform @@ -143,8 +143,8 @@ def tensordot_backward(a, b, axes=2): test_tensordot = TestTensordot(axes) if hybridize: test_tensordot.hybridize() - a = rand_ndarray(shape = a_shape, dtype = dtype).as_np_ndarray() - b = rand_ndarray(shape = b_shape, dtype = dtype).as_np_ndarray() + a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray() + b = rand_ndarray(shape=b_shape, dtype=dtype).as_np_ndarray() a.attach_grad() b.attach_grad() @@ -169,7 +169,7 @@ def tensordot_backward(a, b, axes=2): b_sym = mx.sym.Variable("b").as_np_ndarray() mx_sym = mx.sym.np.tensordot(a_sym, b_sym, axes).as_nd_ndarray() check_numeric_gradient(mx_sym, [a.as_nd_ndarray(), b.as_nd_ndarray()], - rtol=1e-1, atol=1e-1, dtype = dtype) + rtol=1e-1, atol=1e-1, dtype=dtype) @with_seed() @@ -221,6 +221,96 @@ def test_np_dot(): assert False +@with_seed() +@use_np +def test_np_linalg_det(): + class TestDet(HybridBlock): + def __init__(self): + super(TestDet, self).__init__() + + def hybrid_forward(self, F, a): + return F.np.linalg.det(a) + + # test non zero size input + tensor_shapes = [ + (5, 5), + (3, 3, 3), + (2, 2, 2, 2, 2), + (1, 1) + ] + + for hybridize in [True, False]: + for shape in tensor_shapes: + for dtype in [_np.float32, _np.float64]: + a_shape = (1,) + shape + test_det = TestDet() + if hybridize: + test_det.hybridize() + a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray() + a.attach_grad() + + np_out = _np.linalg.det(a.asnumpy()) + with mx.autograd.record(): + mx_out = test_det(a) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1) + mx_out.backward() + + # Test imperative once again + mx_out = np.linalg.det(a) + np_out = _np.linalg.det(a.asnumpy()) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1, use_broadcast=False) + + # test numeric gradient + a_sym = mx.sym.Variable("a").as_np_ndarray() + mx_sym = mx.sym.np.linalg.det(a_sym).as_nd_ndarray() + check_numeric_gradient(mx_sym, [a.as_nd_ndarray()], + rtol=1e-1, atol=1e-1, dtype=dtype) + + +@with_seed() +@use_np +def test_np_linalg_slogdet(): + class TestSlogdet(HybridBlock): + def __init__(self): + super(TestSlogdet, self).__init__() + + def hybrid_forward(self, F, a): + return F.np.linalg.slogdet(a) + + # test non zero size input + tensor_shapes = [ + (5, 5), + (3, 3, 3), + (2, 2, 2, 2, 2), + (1, 1) + ] + + for hybridize in [True, False]: + for a_shape in tensor_shapes: + for dtype in [_np.float32, _np.float64]: + test_slogdet = TestSlogdet() + if hybridize: + test_slogdet.hybridize() + a = rand_ndarray(shape=a_shape, dtype=dtype).as_np_ndarray() + a.attach_grad() + + np_out = _np.linalg.slogdet(a.asnumpy()) + with mx.autograd.record(): + mx_out = test_slogdet(a) + assert mx_out[0].shape == np_out[0].shape + assert mx_out[1].shape == np_out[1].shape + assert_almost_equal(mx_out[0].asnumpy(), np_out[0], rtol=1e-1, atol=1e-1, use_broadcast=False) + assert_almost_equal(mx_out[1].asnumpy(), np_out[1], rtol=1e-1, atol=1e-1, use_broadcast=False) + mx_out[1].backward() + + # Test imperative once again + mx_out = np.linalg.slogdet(a) + np_out = _np.linalg.slogdet(a.asnumpy()) + assert_almost_equal(mx_out[0].asnumpy(), np_out[0], rtol=1e-1, atol=1e-1, use_broadcast=False) + assert_almost_equal(mx_out[1].asnumpy(), np_out[1], rtol=1e-1, atol=1e-1, use_broadcast=False) + + @with_seed() @use_np def test_np_ldexp(): @@ -234,25 +324,24 @@ def hybrid_forward(self, F, x1, x2): def _np_ldexp(x1, x2): return x1 * _np.power(2.0, x2) - def dldx(x1, x2): - grad_a = _np.power(2.0, x2) - grad_b = _np_ldexp(x1, x2) * _np.log(2.0) - if len(x1) == 1: - grad_a = _np.sum(grad_a) - if len(x2) == 1: - grad_b = _np.sum(grad_b) + def dldx(x1, x2): + out = _np_ldexp(x1, x2) + grad_a = _np.broadcast_to(_np.power(2.0, x2), out.shape) + grad_b = _np.broadcast_to(out * _np.log(2.0), out.shape) + grad_a = collapse_sum_like(grad_a, x1.shape) + grad_b = collapse_sum_like(grad_b, x2.shape) return [grad_a, grad_b] - shapes = [ + shapes = [ ((3, 1), (3, 1)), ((3, 1, 2), (3, 1, 2)), - ((1, ),(1, )), + ((1, ), (1, )), ((1, ), (2, )), ((3, ), (1, )), ((3, 0), (3, 0)), # zero-size shape ((0, 1), (0, 1)), # zero-size shape ((2, 0, 2), (2, 0, 2)), # zero-size shape - ] + ] for hybridize in [True, False]: for shape1, shape2 in shapes: @@ -260,7 +349,7 @@ def dldx(x1, x2): test_ldexp = TestLdexp() if hybridize: test_ldexp.hybridize() - x1 = rand_ndarray(shape=shape1, dtype=dtype).as_np_ndarray() + x1 = rand_ndarray(shape=shape1, dtype=dtype).as_np_ndarray() x1.attach_grad() x2 = rand_ndarray(shape=shape2, dtype=dtype).as_np_ndarray() x2.attach_grad() @@ -269,17 +358,17 @@ def dldx(x1, x2): with mx.autograd.record(): mx_out = test_ldexp(x1, x2) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1, use_broadcast=False) mx_out.backward() np_backward = dldx(x1.asnumpy(), x2.asnumpy()) - assert_almost_equal(x1.grad.asnumpy(), np_backward[0], atol=1e-1, rtol=1e-1) - assert_almost_equal(x2.grad.asnumpy(), np_backward[1], atol=1e-1, rtol=1e-1) + assert_almost_equal(x1.grad.asnumpy(), np_backward[0], atol=1e-1, rtol=1e-1, use_broadcast=False) + assert_almost_equal(x2.grad.asnumpy(), np_backward[1], atol=1e-1, rtol=1e-1, use_broadcast=False) # Test imperative once again mx_out = np.ldexp(x1, x2) np_out = _np_ldexp(x1.asnumpy(), x2.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-1, atol=1e-1, use_broadcast=False) @with_seed() @@ -313,16 +402,16 @@ def vdot_backward(a, b): with mx.autograd.record(): mx_out = test_vdot(a, b) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol = 1e-3, atol = 1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = vdot_backward(a.asnumpy(), b.asnumpy()) - assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol = 1e-2, atol=1e-2) - assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol = 1e-2, atol=1e-2) + assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol=1e-2, atol=1e-2, use_broadcast=False) + assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2, use_broadcast=False) # Test imperative once again mx_out = np.vdot(a, b) np_out = _np.vdot(a.asnumpy(), b.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) # test numeric gradient if len(shape) > 0 and _np.prod(shape) > 0: @@ -410,16 +499,16 @@ def inner_backward(a, b): with mx.autograd.record(): mx_out = test_inner(a, b) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol = 1e-3, atol = 1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = inner_backward(a.asnumpy(), b.asnumpy()) - assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol = 1e-2, atol=1e-2) - assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol = 1e-2, atol=1e-2) + assert_almost_equal(a.grad.asnumpy(), np_backward[0], rtol=1e-2, atol=1e-2, use_broadcast=False) + assert_almost_equal(b.grad.asnumpy(), np_backward[1], rtol=1e-2, atol=1e-2, use_broadcast=False) # Test imperative once again mx_out = np.inner(a, b) np_out = _np.inner(a.asnumpy(), b.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) # test numeric gradient a_sym = mx.sym.Variable("a").as_np_ndarray() @@ -461,13 +550,13 @@ def hybrid_forward(self, F, a, b): with mx.autograd.record(): mx_out = test_outer(a, b) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() # Test imperative once again mx_out = np.outer(a, b) np_out = _np.outer(a.asnumpy(), b.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) # test numeric gradient a_sym = mx.sym.Variable("a").as_np_ndarray() @@ -620,7 +709,7 @@ def _test_np_exception(func, shape, dim): y = test_gluon(x) assert y.shape == expected_ret.shape assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if itype == 'float16' else 1e-3, - atol=1e-5 if itype == 'float16' else 1e-5) + atol=1e-5 if itype == 'float16' else 1e-5, use_broadcast=False) y.backward() # only check the gradient with hardcoded input if is_int(itype): @@ -634,7 +723,7 @@ def _test_np_exception(func, shape, dim): else: mx_out = np.min(x, axis=axis, keepdims=keepdims) np_out = _np.amin(x.asnumpy(), axis=axis, keepdims=keepdims) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) # test zero and zero dim shapes = [(), (0), (2, 0), (0, 2, 1)] @@ -693,7 +782,7 @@ def is_int(dtype): y = test_mean(x) assert y.shape == expected_ret.shape assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3, - atol=1e-5 if dtype == 'float16' else 1e-5) + atol=1e-5 if dtype == 'float16' else 1e-5, use_broadcast=False) y.backward() N = x.size / y.size @@ -709,7 +798,7 @@ def is_int(dtype): # test imperative mx_out = np.mean(x, axis=axis, dtype=dtype, keepdims=keepdims) np_out = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -801,16 +890,17 @@ def test_np_linspace(): mx_ret = np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype) np_ret = _np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype) if retstep: - assert_almost_equal(mx_ret[0].asnumpy(), np_ret[0], atol=1e-3, rtol=1e-5) + assert_almost_equal(mx_ret[0].asnumpy(), np_ret[0], atol=1e-3, rtol=1e-5, use_broadcast=False) same(mx_ret[1], np_ret[1]) else: - assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-3, rtol=1e-5) + assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-3, rtol=1e-5, use_broadcast=False) # check for exception input for config in exception_configs: assertRaises(MXNetError, np.linspace, *config) # check linspace equivalent to arange for test_index in range(1000): - assert_almost_equal(mx.np.linspace(0, test_index, test_index + 1).asnumpy(), _np.arange(test_index + 1)) + assert_almost_equal(mx.np.linspace(0, test_index, test_index + 1).asnumpy(), _np.arange(test_index + 1), + use_broadcast=False) class TestLinspace(HybridBlock): def __init__(self, start, stop, num=50, endpoint=None, retstep=False, dtype=None, axis=0): @@ -843,7 +933,7 @@ def hybrid_forward(self, F, x): if hybridize: net.hybridize() mx_out = net(x) - assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-3, rtol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-3, rtol=1e-5, use_broadcast=False) @with_seed() @@ -1334,11 +1424,12 @@ def hybrid_forward(self, F, a, *args, **kwargs): with mx.autograd.record(): y = mx_func(mx_test_data) assert y.shape == np_out.shape - assert_almost_equal(y.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(y.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) if ref_grad: y.backward() - assert_almost_equal(mx_test_data.grad.asnumpy(), ref_grad(np_test_data), rtol=1e-1, atol=1e-2, equal_nan=True) + assert_almost_equal(mx_test_data.grad.asnumpy(), ref_grad(np_test_data), rtol=1e-1, atol=1e-2, + equal_nan=True, use_broadcast=False) funcs = { 'absolute' : (lambda x: -1. * (x < 0) + (x > 0), -1.0, 1.0), @@ -1413,14 +1504,14 @@ def hybrid_forward(self, F, a): with mx.autograd.record(): mx_out = test_relu(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = np_relu_grad(x.asnumpy()) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = npx.relu(x) np_out = np_relu(x.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -1450,14 +1541,14 @@ def hybrid_forward(self, F, a): with mx.autograd.record(): mx_out = test_sigmoid(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = np_sigmoid_grad(np_out) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = npx.sigmoid(x) np_out = np_sigmoid(x.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -1563,17 +1654,17 @@ def get_indices(axis_size): y = test_split(a) assert len(y) == len(expected_ret) for mx_out, np_out in zip(y, expected_ret): - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx.autograd.backward(y) - assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5, use_broadcast=False) # test imperative mx_outs = np.split(a, indices_or_sections=indices_or_sections, axis=axis) np_outs = _np.split(a.asnumpy(), indices_or_sections=indices_or_sections, axis=axis) for mx_out, np_out in zip(mx_outs, np_outs): - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -1613,19 +1704,19 @@ def get_new_shape(shape, axis): y = test_concat(a, b, c, d) assert y.shape == expected_ret.shape - assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5, use_broadcast=False) y.backward() - assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(b.grad.asnumpy(), _np.ones(b.shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(c.grad.asnumpy(), _np.ones(c.shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(d.grad.asnumpy(), _np.ones(d.shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(b.grad.asnumpy(), _np.ones(b.shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(c.grad.asnumpy(), _np.ones(c.shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(d.grad.asnumpy(), _np.ones(d.shape), rtol=1e-3, atol=1e-5, use_broadcast=False) # test imperative mx_out = np.concatenate([a, b, c, d], axis=axis) np_out = _np.concatenate([a.asnumpy(), b.asnumpy(), c.asnumpy(), d.asnumpy()], axis=axis) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -1668,10 +1759,10 @@ def hybrid_forward(self, F, a, *args): y.backward() - assert_almost_equal(mx_a.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(mx_b.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(mx_c.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) - assert_almost_equal(mx_d.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_a.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(mx_b.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(mx_c.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5, use_broadcast=False) + assert_almost_equal(mx_d.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5, use_broadcast=False) np_out = _np.stack([np_a, np_b, np_c, np_d], axis=axis) mx_out = np.stack([mx_a, mx_b, mx_c, mx_d], axis=axis) @@ -1701,14 +1792,14 @@ def hybrid_forward(self, F, a): with mx.autograd.record(): mx_out = test_ravel(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = _np.ones(shape) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = np.ravel(x) np_out = _np.ravel(x.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -2019,15 +2110,15 @@ def hybrid_forward(self, F, a): with mx.autograd.record(): mx_out = test_cumsum(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out.backward() np_backward = np_cumsum_backward(_np.ones(np_out.shape, dtype=otype), axis=axis, dtype=otype).reshape(x.shape) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5, use_broadcast=False) mx_out = np.cumsum(x, axis=axis, dtype=otype) np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -2058,7 +2149,7 @@ def test_sample_with_replacement(sampler, num_classes, shape, weight=None): expected_density = (weight.asnumpy() if weight is not None else _np.array([1 / num_classes] * num_classes)) # test almost equal - assert_almost_equal(generated_density, expected_density, rtol=1e-1, atol=1e-1) + assert_almost_equal(generated_density, expected_density, rtol=1e-1, atol=1e-1, use_broadcast=False) # test shape assert (samples.shape == shape) @@ -2075,7 +2166,7 @@ def test_sample_without_replacement(sampler, num_classes, shape, num_trials, wei out = sampler(num_classes, 1, replace=False, p=weight).item() bins[out] += 1 bins /= num_trials - assert_almost_equal(bins, expected_freq, rtol=1e-1, atol=1e-1) + assert_almost_equal(bins, expected_freq, rtol=1e-1, atol=1e-1, use_broadcast=False) def test_indexing_mode(sampler, set_size, samples_size, replace, weight=None): a = np.arange(set_size) @@ -2236,7 +2327,7 @@ def hybrid_forward(self, F, x): net.hybridize() mx_ret = net(a) np_ret = _np.linalg.norm(a.asnumpy(), ord=ord, axis=axis, keepdims=keepdims) - assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-5, rtol=1e-4) + assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-5, rtol=1e-4, use_broadcast=False) @with_seed() @@ -2295,18 +2386,18 @@ def get_grad_right(a1, a2): with mx.autograd.record(): mx_out = test_copysign(a1, a2) assert mx_out.shape == expected_np.shape - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) # Test gradient mx_out.backward() a1_grad, a2_grad = get_grad(a1_np, a2_np) - assert_almost_equal(a1.grad.asnumpy(), a1_grad, rtol=rtol, atol=atol) - assert_almost_equal(a2.grad.asnumpy(), a2_grad, rtol=rtol, atol=atol) + assert_almost_equal(a1.grad.asnumpy(), a1_grad, rtol=rtol, atol=atol, use_broadcast=False) + assert_almost_equal(a2.grad.asnumpy(), a2_grad, rtol=rtol, atol=atol, use_broadcast=False) # Test imperative once again mx_out = np.copysign(a1, a2) expected_np = _np.copysign(a1_np, a2_np) - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) types = ['float16', 'float32', 'float64'] for x_shape in shapes: @@ -2320,12 +2411,12 @@ def get_grad_right(a1, a2): with mx.autograd.record(): mx_out = np.copysign(x, scalar) assert mx_out.shape == expected_np.shape - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) # Test gradient mx_out.backward() x_grad = get_grad_left(x_np, scalar) - assert_almost_equal(x.grad.asnumpy(), x_grad, rtol=rtol, atol=atol) + assert_almost_equal(x.grad.asnumpy(), x_grad, rtol=rtol, atol=atol, use_broadcast=False) # Test right x_np = _np.array(_np.random.uniform(-2.0, 2.0, x_shape), dtype=dtype) @@ -2336,12 +2427,12 @@ def get_grad_right(a1, a2): with mx.autograd.record(): mx_out = np.copysign(scalar, x) assert mx_out.shape == expected_np.shape - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) # Test gradient mx_out.backward() x_grad = get_grad_right(scalar, x_np) - assert_almost_equal(x.grad.asnumpy(), x_grad, rtol=rtol, atol=atol) + assert_almost_equal(x.grad.asnumpy(), x_grad, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2410,22 +2501,22 @@ def get_grad(UT, L, V): # check UT @ L @ V == A t = _np.matmul(UT * L[..., None, :], V) assert t.shape == data_np.shape - assert_almost_equal(t, data_np, rtol=rtol, atol=atol) + assert_almost_equal(t, data_np, rtol=rtol, atol=atol, use_broadcast=False) # check UT @ U == I I = _np.matmul(UT, _np.swapaxes(UT, -2, -1)) I_np = _np.ones_like(UT) * _np.eye(shape[-2]) assert I.shape == I_np.shape - assert_almost_equal(I, I_np, rtol=rtol, atol=atol) + assert_almost_equal(I, I_np, rtol=rtol, atol=atol, use_broadcast=False) # check U @ UT == I I = _np.matmul(_np.swapaxes(UT, -2, -1), UT) I_np = _np.ones_like(UT) * _np.eye(shape[-2]) assert I.shape == I_np.shape - assert_almost_equal(I, I_np, rtol=rtol, atol=atol) + assert_almost_equal(I, I_np, rtol=rtol, atol=atol, use_broadcast=False) # check V @ VT == I I = _np.matmul(V, _np.swapaxes(V, -2, -1)) I_np = _np.ones_like(UT) * _np.eye(shape[-2]) assert I.shape == I_np.shape - assert_almost_equal(I, I_np, rtol=rtol, atol=atol) + assert_almost_equal(I, I_np, rtol=rtol, atol=atol, use_broadcast=False) # check descending singular values s = [L[..., i] - L[..., i + 1] for i in range(L.shape[-1] - 1)] s = _np.array(s) @@ -2436,7 +2527,8 @@ def get_grad(UT, L, V): mx.autograd.backward(ret) if ((s > 1e-5).all() and (L.size == 0 or (L > 1e-5).all())): backward_expected = get_grad(ret[0].asnumpy(), ret[1].asnumpy(), ret[2].asnumpy()) - assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol) + assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol, + use_broadcast=False) @with_seed() @@ -2481,18 +2573,18 @@ def g(data): with mx.autograd.record(): mx_out = test_vstack(*v) assert mx_out.shape == expected_np.shape - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) # Test gradient mx_out.backward() for i in range(3): expected_grad = g(v_np[i]) - assert_almost_equal(v[i].grad.asnumpy(), expected_grad, rtol=rtol, atol=atol) + assert_almost_equal(v[i].grad.asnumpy(), expected_grad, rtol=rtol, atol=atol, use_broadcast=False) # Test imperative once again mx_out = np.vstack(v) expected_np = _np.vstack(v_np) - assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), expected_np, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2608,15 +2700,18 @@ def g(data, axis1, axis2, offset): with mx.autograd.record(): out_mx = test_trace(data.as_np_ndarray()) assert out_mx.shape == expected_np.shape - assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol, + use_broadcast=False) out_mx.backward() backward_expected = g(data_np, axis1=axis1, axis2=axis2, offset=offset) - assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol) + assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol, + use_broadcast=False) # Test imperative once again data = mx.nd.array(data_np, dtype=dtype) out_mx = np.trace(data.as_np_ndarray(), axis1=axis1, axis2=axis2, offset=offset) - assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol, + use_broadcast=False) # bad params params = [ @@ -2664,11 +2759,11 @@ def hybrid_forward(self, F, x, *args, **kwargs): if hybridize: mx_func.hybridize() mx_out = mx_func(x) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) # test imperative mx_out = getattr(np, func)(M=config, dtype=dtype) np_out = np_func(M=config).astype(dtype) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed() @@ -2700,15 +2795,15 @@ def hybrid_forward(self, F, x): with mx.autograd.record(): mx_out = test_flip(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) mx_out.backward() np_backward = _np.ones(np_out.shape) - assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=rtol, atol=atol) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=rtol, atol=atol, use_broadcast=False) # Test imperative once again mx_out = np.flip(x, axis) np_out = _np.flip(x.asnumpy(), axis) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2736,11 +2831,11 @@ def hybrid_forward(self, F, x): np_out = _np.around(x.asnumpy(), d) mx_out = test_around(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) mx_out = np.around(x, d) np_out = _np.around(x.asnumpy(), d) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2796,18 +2891,18 @@ def dimReduce(src, des): with mx.autograd.record(): mx_out = test_arctan2(x1, x2) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) mx_out.backward() np_backward_1 = x21 / (x11 * x11 + x21 * x21) np_backward_2 = -1 * x11 / (x11 * x11 + x21 * x21) np_backward_1 = dimReduce(np_backward_1, x11) np_backward_2 = dimReduce(np_backward_2, x21) - assert_almost_equal(x1.grad.asnumpy(), np_backward_1, rtol=rtol, atol=atol) - assert_almost_equal(x2.grad.asnumpy(), np_backward_2, rtol=rtol, atol=atol) + assert_almost_equal(x1.grad.asnumpy(), np_backward_1, rtol=rtol, atol=atol, use_broadcast=False) + assert_almost_equal(x2.grad.asnumpy(), np_backward_2, rtol=rtol, atol=atol, use_broadcast=False) mx_out = np.arctan2(x1, x2) np_out = _np.arctan2(x1.asnumpy(), x2.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2833,13 +2928,13 @@ def hybrid_forward(self, F, x): np_out = _np.transpose(np_out) mx_out = test_nonzero(x) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol, atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol, atol, use_broadcast=False) # Test imperative once again mx_out = npx.nonzero(x) np_out = _np.nonzero(x.asnumpy()) np_out = _np.transpose(np_out) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol, atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol, atol, use_broadcast=False) @with_seed() @@ -2894,18 +2989,18 @@ def dimReduce(src, des): with mx.autograd.record(): mx_out = test_hypot(x1, x2) assert mx_out.shape == np_out.shape - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) mx_out.backward() np_backward_1 = x11 / np_out np_backward_2 = x21 / np_out np_backward_1 = dimReduce(np_backward_1, x11) np_backward_2 = dimReduce(np_backward_2, x21) - assert_almost_equal(x1.grad.asnumpy(), np_backward_1, rtol=rtol, atol=atol) - assert_almost_equal(x2.grad.asnumpy(), np_backward_2, rtol=rtol, atol=atol) + assert_almost_equal(x1.grad.asnumpy(), np_backward_1, rtol=rtol, atol=atol, use_broadcast=False) + assert_almost_equal(x2.grad.asnumpy(), np_backward_2, rtol=rtol, atol=atol, use_broadcast=False) mx_out = np.hypot(x1, x2) np_out = _np.hypot(x1.asnumpy(), x2.asnumpy()) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False) @with_seed() @@ -2947,13 +3042,13 @@ def hybrid_forward(self, F, a): mx_out = test_unique(x) assert mx_out[0].shape == np_out[0].shape for i in range(4): - assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5, use_broadcast=False) # Test imperative once again mx_out = np.unique(x, *config[1:]) np_out = _np.unique(x.asnumpy(), *config[1:]) for i in range(4): - assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_out[i].asnumpy(), np_out[i], rtol=1e-3, atol=1e-5, use_broadcast=False) @with_seed()