From 146b49ead32b941f74db694f2d453cb25650d252 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sun, 19 Jul 2020 14:12:50 -0700 Subject: [PATCH] Unittest tolerance handling improvements (#18694) * Add sm arch 80 to Makefile * Add TF32 to cuBLAS GEMMs Signed-off-by: Serge Panev * Add CUDA version guards Signed-off-by: Serge Panev * Remove useless TF32 for double and old CUDA version Signed-off-by: Serge Panev * Factorize VERSION_ADJUSTED_TF32_MATH Signed-off-by: Serge Panev * Add TF32 considerations to test_util.py:check_consistency() * Bypass test_gluon_gpu.py:test_large_models if gmem >32GB * Default tols in assert_almost_equal() now a function of dtype and ctx * Expand types listed by default_tols() * Fix pylint * All with_seed() tests to waitall in teardown * Elevate MXNET_TEST_SEED logging to WARNING * Revert test_gluon_gpu.py:test_rnn_layer to default tols * Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv} * test_numpy_interoperability.py to not fix seed for rest of CI * Further fix to test_np_linalg_tensorinv * Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system. * Fix test_operator_gpu.py::test_embedding_with_type * Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve} * Remove unneeded print() from test_numpy_interoperability.py * Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks. * Add tol handling of assert_almost_equal() with number args * Add tol handling of bool comparisons * Fix test_numpy_op.py::test_np_random_rayleigh * Fix test_operator_gpu.py::test_batchnorm_with_type * Fix test_gluon.py::test_sync_batchnorm in cpu selftest * Improve unittest failure reporting * Add to robustness of test_operator_gpu.py::test_embedding_with_type * Check_consistency() to use equal backward gradients for increased test robustness * Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps(). * test_utils.py fix for numeric gradient calc * Reinstate rtol=1e-2 for test_operator.py::test_order * Remove auto-cast of check_consistency() input data to least precise dtype (not needed) * Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op * Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin * Fix segfault-on-error of @retry decorator. Add test isolation. * assert_almost_equal() to handle a,b scalars * Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race * Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale * Remove test_utils.py:almost_equal_ignore_nan() * Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm * Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd * Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype * Try test_elemwise_binary_ops serial to avoid pytest worker crash * Fix (log_)softmax backward on empty ndarray * Temporarily log all CI seeds to troubleshoot seed non-determinism * Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism" This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47. * Temp log all CI seeds to troubleshoot unwanted seed determinism * Revert "Add sm arch 80 to Makefile" This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9. * Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm * Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism" This reverts commit ff328efb0be3445690669d5437a6af575ff12b49. * Marking test_sparse_dot_grad with garbage_expected after teardown error * Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,} * Temp skip of test_aggregate_duplication on gpu * Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique. * Add ndarray module isolation to help debug test_bbox_augmenters worker crash * Marking test_sparse_square_sum serial after pytest worker crash * Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,} Co-authored-by: Serge Panev Co-authored-by: Bart Gawrych --- python/mxnet/test_utils.py | 377 +++++++++++------- src/operator/linalg.h | 8 + src/operator/linalg_impl.h | 34 +- src/operator/nn/log_softmax.cc | 1 + src/operator/nn/softmax.cc | 1 + src/operator/numpy/np_true_divide-inl.h | 14 +- tests/python/gpu/test_gluon_gpu.py | 16 +- tests/python/gpu/test_gluon_model_zoo_gpu.py | 2 +- tests/python/gpu/test_operator_gpu.py | 95 ++--- tests/python/gpu/test_profiler_gpu.py | 2 + tests/python/unittest/common.py | 22 +- tests/python/unittest/test_autograd.py | 1 + .../test_contrib_gluon_data_vision.py | 5 +- tests/python/unittest/test_gluon.py | 4 +- .../unittest/test_gluon_probability_v1.py | 20 +- .../unittest/test_gluon_probability_v2.py | 21 +- tests/python/unittest/test_ndarray.py | 2 +- .../test_numpy_contrib_gluon_data_vision.py | 9 +- .../unittest/test_numpy_interoperability.py | 7 +- tests/python/unittest/test_numpy_op.py | 46 +-- tests/python/unittest/test_operator.py | 126 +++--- tests/python/unittest/test_sparse_operator.py | 1 + 22 files changed, 487 insertions(+), 327 deletions(-) diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index dd0278379f11..9ec0c6cd1219 100644 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -70,19 +70,110 @@ def default_dtype(): # _TODO: get default dtype from environment variable return np.float32 +def default_rtols(): + """Get default relative tolerances for data comparisons involving each data type.""" + return {np.dtype(np.float16): 1e-2, + np.dtype(np.float32): 1e-4, + np.dtype(np.float64): 1e-5, + np.dtype(np.bool): 0, + np.dtype(np.int8): 0, + np.dtype(np.uint8): 0, + np.dtype(np.int32): 0, + np.dtype(np.uint32): 0, + np.dtype(np.int64): 0, + np.dtype(np.uint64): 0} + +def default_atols(): + """Get default absolute tolerances for data comparisons involving each data type.""" + return {np.dtype(np.float16): 1e-1, + np.dtype(np.float32): 1e-3, + np.dtype(np.float64): 1e-20, + np.dtype(np.bool): 0, + np.dtype(np.int8): 0, + np.dtype(np.uint8): 0, + np.dtype(np.int32): 0, + np.dtype(np.uint32): 0, + np.dtype(np.int64): 0, + np.dtype(np.uint64): 0} + +def default_numeric_eps(): + """Get default epsilon for finite difference gradient calculations with data type.""" + # prefer a power-of-two eps, since no bits are dropped when serving as an input delta + return {np.dtype(np.float16): 1.0 / 2**6, + np.dtype(np.float32): 1.0 / 2**9, + np.dtype(np.float64): 1.0 / 2**14} + + +def effective_dtype(dat): + """ Return the most appropriate dtype for determining the tolerance used in dat comparisons + Parameters + ---------- + dat : np.ndarray or mx.nd.array or mx.np.ndarray + """ + # On arch 80 gpus, a float32-io gemm or conv op will trim the mantissa of data + # inputs to be of comparable precision to a float16, so float16 becomes the + # 'effective dtype' for tolerance tests involving such op outputs. + + # Is TF32 enabled in the ctx (the default on arch 80 GPUs) + def is_TF32_enabled(ctx): + try: + return (ctx.device_type == 'gpu' and + get_cuda_compute_capability(ctx) == 80 and + os.environ.get('NVIDIA_TF32_OVERRIDE') != '0') + except: # pylint: disable=bare-except + return False + + ctx = dat.ctx if hasattr(dat, 'ctx') else None + dtype = np.dtype(dat.dtype) + if dtype == np.dtype(np.float32) and is_TF32_enabled(ctx): + return np.dtype(np.float16) + else: + return dtype -def get_atol(atol=None): - """Get default numerical threshold for regression test.""" - # _TODO: get from env variable, different threshold might - # be needed for different device and dtype - return 1e-20 if atol is None else atol + +def get_tolerance(dat, tol, default_tol): + """ Return the tolerance to be used for dat comparisons based on the given tol, datatype and context. + Parameters + ---------- + dat : np.ndarray or mx.nd.array or mx.np.ndarray + tol : float, or a dict of dtype->float + default_tol : default dict of dtype->float for all types + """ + + if isinstance(tol, numbers.Number): + return tol + + # If the caller has supplied a tol dict, use that if it has an entry for dtype, + # else use the supplied default tol dict. + dtype = effective_dtype(dat) + tol = {} if tol is None else tol + return tol.get(dtype, default_tol[dtype]) -def get_rtol(rtol=None): +def get_tols(x, y, rtol, atol): + """For comparing two datasets 'x' and 'y', what relative and absolute tolerances should be used.""" + # Tolerance analysis needs 'dtype' of 'x' and 'y', so convert numbers to numpy scalars as needed + if isinstance(x, numbers.Number): + x = np.array(x) + if isinstance(y, numbers.Number): + y = np.array(y) + + # If tols are not specified, use the largest default tol for 'x' and 'y' based on their ctx and dtype. + rtol = max(get_tolerance(x, rtol, default_rtols()), + get_tolerance(y, rtol, default_rtols())) + atol = max(get_tolerance(x, atol, default_atols()), + get_tolerance(y, atol, default_atols())) + + return rtol, atol + + +def get_atol(atol=None, dtype=np.dtype(np.float64)): """Get default numerical threshold for regression test.""" - # _TODO: get from env variable, different threshold might - # be needed for different device and dtype - return 1e-5 if rtol is None else rtol + return default_atols()[dtype] if atol is None else atol + +def get_rtol(rtol=None, dtype=np.dtype(np.float64)): + """Get default numerical threshold for regression test.""" + return default_rtols()[dtype] if rtol is None else rtol def get_etol(etol=None): """Get default numerical threshold for regression test.""" @@ -499,10 +590,8 @@ def np_reduce(dat, axis, keepdims, numpy_reduce_func): return ret -def find_max_violation(a, b, rtol=None, atol=None): +def _find_max_violation(a, b, rtol, atol): """Finds and returns the location of maximum violation.""" - rtol = get_rtol(rtol) - atol = get_atol(atol) # 'smart' absdiff that considers inf's as equals (to match np.allclose) absdiff = np.where(np.equal(a, b), 0, np.abs(a-b)) tol = atol + rtol*np.abs(b) @@ -565,9 +654,9 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= ---------- a : np.ndarray or mx.nd.array b : np.ndarray or mx.nd.array - rtol : None or float + rtol : None or float or dict of dtype -> float The relative threshold. Default threshold will be used if set to ``None``. - atol : None or float + atol : None or float or dict of dtype -> float The absolute threshold. Default threshold will be used if set to ``None``. names : tuple of names, optional The names used in error message when an exception occurs @@ -579,8 +668,12 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= if not use_broadcast: checkShapes(a, b) - rtol = get_rtol(rtol) - atol = get_atol(atol) + rtol, atol = get_tols(a, b, rtol, atol) + + if isinstance(a, mx.numpy.ndarray): + a = a.asnumpy() + if isinstance(b, mx.numpy.ndarray): + b = b.asnumpy() use_np_allclose = isinstance(a, np.ndarray) and isinstance(b, np.ndarray) if not use_np_allclose: if not (hasattr(a, 'ctx') and hasattr(b, 'ctx') and a.ctx == b.ctx and a.dtype == b.dtype): @@ -604,32 +697,37 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= a = a.asnumpy() b = b.asnumpy() - index, rel = find_max_violation(a, b, rtol, atol) - indexErr = index - relErr = rel - - print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol)) - aTmp = a.copy() - bTmp = b.copy() - i = 1 - while i <= a.size: - if i <= mismatches[0]: - print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names))) + index, rel = _find_max_violation(a, b, rtol, atol) + if index != (): + # a, b are the numpy arrays + indexErr = index + relErr = rel + + print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol)) + aTmp = a.copy() + bTmp = b.copy() + i = 1 + while i <= a.size: + if i <= mismatches[0]: + print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names))) + + aTmp[index] = bTmp[index] = 0 + if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan): + break - aTmp[index] = bTmp[index] = 0 - if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan): - break + i += 1 + if i <= mismatches[1] or mismatches[1] <= 0: + index, rel = _find_max_violation(aTmp, bTmp, rtol, atol) + else: + break - i += 1 - if i <= mismatches[1] or mismatches[1] <= 0: - index, rel = find_max_violation(aTmp, bTmp, rtol, atol) - else: - break + mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else "" + errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \ + (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \ + locationError(a, b, indexErr, names, maxError=True)) + else: + errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e.\n" % (rel, rtol, atol) - mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else "" - errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \ - (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \ - locationError(a, b, indexErr, names, maxError=True)) np.set_printoptions(threshold=4, suppress=True) msg = npt.build_err_msg([a, b], err_msg=errMsg) @@ -648,16 +746,25 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, ---------- a : np.ndarray b : np.ndarray + rtol : None or float or dict of dtype -> float + The relative threshold. Default threshold will be used if set to ``None``. + atol : None or float or dict of dtype -> float + The absolute threshold. Default threshold will be used if set to ``None``. threshold : None or float The checking threshold. Default threshold will be used if set to ``None``. etol : None or float The error rate threshold. If etol is float, return true if error_rate < etol even if any error is found. + names : tuple of names, optional + The names used in error message when an exception occurs + equal_nan : boolean, optional + The flag determining how to treat NAN values in comparison + mismatches : tuple of mismatches + Maximum number of mismatches to be printed (mismatches[0]) and determine (mismatches[1]) """ etol = get_etol(etol) if etol > 0: - rtol = get_rtol(rtol) - atol = get_atol(atol) + rtol, atol = get_tols(a, b, rtol, atol) if isinstance(a, mx.nd.NDArray): a = a.asnumpy() if isinstance(b, mx.nd.NDArray): @@ -665,7 +772,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, equals = np.isclose(a, b, rtol=rtol, atol=atol) err = 1 - np.count_nonzero(equals) / equals.size if err > etol: - index, rel = find_max_violation(a, b, rtol, atol) + index, rel = _find_max_violation(a, b, rtol, atol) indexErr = index relErr = rel @@ -683,7 +790,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, i += 1 if i <= mismatches[1] or mismatches[1] <= 0: - index, rel = find_max_violation(aTmp, bTmp, rtol, atol) + index, rel = _find_max_violation(aTmp, bTmp, rtol, atol) else: break @@ -698,31 +805,6 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, assert_almost_equal(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan) -def almost_equal_ignore_nan(a, b, rtol=None, atol=None): - """Test that two NumPy arrays are almost equal (ignoring NaN in either array). - Combines a relative and absolute measure of approximate eqality. - If either the relative or absolute check passes, the arrays are considered equal. - Including an absolute check resolves issues with the relative check where all - array values are close to zero. - - Parameters - ---------- - a : np.ndarray - b : np.ndarray - rtol : None or float - The relative threshold. Default threshold will be used if set to ``None``. - atol : None or float - The absolute threshold. Default threshold will be used if set to ``None``. - """ - a = np.copy(a) - b = np.copy(b) - nan_mask = np.logical_or(np.isnan(a), np.isnan(b)) - a[nan_mask] = 0 - b[nan_mask] = 0 - - return almost_equal(a, b, rtol, atol) - - def assert_almost_equal_ignore_nan(a, b, rtol=None, atol=None, names=('a', 'b')): """Test that two NumPy arrays are almost equal (ignoring NaN in either array). Combines a relative and absolute measure of approximate eqality. @@ -954,7 +1036,7 @@ def as_stype(var, stype, dtype): return approx_grads -def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rtol=1e-2, +def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=None, rtol=None, atol=None, grad_nodes=None, use_forward_train=True, ctx=None, grad_stype_dict=None, dtype=default_dtype()): """Verify an operation by checking backward pass via finite difference method. @@ -979,8 +1061,10 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto The auxiliary states required when generating the executor for the symbol. numeric_eps : float, optional Delta for the finite difference method that approximates the gradient. - check_eps : float, optional - relative error eps used when comparing numeric grad to symbolic grad. + rtol : None or float + The relative threshold. Default threshold will be used if set to ``None``. + atol : None or float + The absolute threshold. Default threshold will be used if set to ``None``. grad_nodes : None or list or tuple or dict, optional Names of the nodes to check gradient on use_forward_train : bool @@ -997,9 +1081,6 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto [1] https://github.com/Theano/Theano/blob/master/theano/gradient.py """ assert dtype in (np.float16, np.float32, np.float64) - # cannot use finite differences with small eps without high precision - if dtype in (np.float32, np.float16): - assert numeric_eps >= 1e-5 if ctx is None: ctx = default_context() @@ -1074,18 +1155,18 @@ def random_projection(shape): executor.forward(is_train=True) assert len(executor.outputs) == 1 + + eps = get_tolerance(executor.outputs[0], numeric_eps, default_numeric_eps()) + # cannot use finite differences with small eps without high precision + if dtype in (np.float32, np.float16): + assert eps >= 1e-5 + executor.backward() - symbolic_grads = {} - for k in grad_nodes: - grad_k = executor.grad_dict[k] - if grad_k is not None: - symbolic_grads[k] = grad_k.asnumpy() - else: - symbolic_grads[k] = None + symbolic_grads = executor.grad_dict numeric_gradients = numeric_grad( executor, location_npy, aux_states_npy, - eps=numeric_eps, use_forward_train=use_forward_train, dtype=dtype) + eps=eps, use_forward_train=use_forward_train, dtype=dtype) for name in grad_nodes: fd_grad = numeric_gradients[name] @@ -1095,6 +1176,8 @@ def random_projection(shape): assert_almost_equal(fd_grad, sym_grad, rtol, atol, ("NUMERICAL_%s"%name, "BACKWARD_%s"%name)) elif grad_req[name] == 'add': + if isinstance(sym_grad, mx.nd.NDArray): + sym_grad = sym_grad.asnumpy() assert_almost_equal(fd_grad, sym_grad - orig_grad, rtol, atol, ("NUMERICAL_%s"%name, "BACKWARD_%s"%name)) elif grad_req[name] == 'null': @@ -1103,7 +1186,7 @@ def random_projection(shape): raise ValueError("Invalid grad_req %s for argument %s"%(grad_req[name], name)) -def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None, +def check_symbolic_forward(sym, location, expected, rtol=None, atol=None, aux_states=None, ctx=None, equal_nan=False, dtype=default_dtype()): """Compares a symbol's forward results with the expected ones. @@ -1127,8 +1210,10 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None, Contains arrays corresponding to exe.outputs. - if type is dict of str to np.ndarray Contains mapping between sym.list_output() and exe.outputs. - check_eps : float, optional - Relative error to check to. + rtol : None or float + The relative threshold. Default threshold will be used if set to ``None``. + atol : None or float + The absolute threshold. Default threshold will be used if set to ``None``. aux_states : list of np.ndarray of dict, optional - if type is list of np.ndarray Contains all the NumPy arrays corresponding to sym.list_auxiliary_states @@ -1177,14 +1262,14 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None, executor.forward(is_train=False) - outputs = [x.asnumpy() for x in executor.outputs] + outputs = executor.outputs for output_name, expect, output in zip(sym.list_outputs(), expected, outputs): assert_almost_equal(expect, output, rtol, atol, ("EXPECTED_%s"%output_name, "FORWARD_%s"%output_name), equal_nan=equal_nan) return executor.outputs -def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=None, +def check_symbolic_backward(sym, location, out_grads, expected, rtol=None, atol=None, aux_states=None, grad_req='write', ctx=None, grad_stypes=None, equal_nan=False, dtype=default_dtype()): """Compares a symbol's backward results with the expected ones. @@ -1215,8 +1300,10 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol= Contains arrays corresponding to exe.grad_arrays - if type is dict of str to np.ndarray Contains mapping between ``sym.list_arguments()`` and exe.outputs. - check_eps: float, optional - Relative error to check to. + rtol : None or float + The relative threshold. Default threshold will be used if set to ``None``. + atol : None or float + The absolute threshold. Default threshold will be used if set to ``None``. aux_states : list of np.ndarray or dict of str to np.ndarray grad_req : str or list of str or dict of str to str, optional Gradient requirements. 'write', 'add' or 'null'. @@ -1302,7 +1389,7 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol= assert out_grads is None executor.backward(out_grads) - grads = {k: v.asnumpy() for k, v in args_grad_data.items()} + grads = args_grad_data for name in expected: if grad_req[name] == 'write': @@ -1310,7 +1397,8 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol= ("EXPECTED_%s"%name, "BACKWARD_%s"%name), equal_nan=equal_nan) elif grad_req[name] == 'add': - assert_almost_equal(expected[name], grads[name] - args_grad_npy[name], + grad = grads[name].asnumpy() if isinstance(grads[name], mx.nd.NDArray) else grads[name] + assert_almost_equal(expected[name], grad - args_grad_npy[name], rtol, atol, ("EXPECTED_%s"%name, "BACKWARD_%s"%name), equal_nan=equal_nan) elif grad_req[name] == 'null': @@ -1395,16 +1483,8 @@ def check_speed(sym, location=None, ctx=None, N=20, grad_req=None, typ="whole", raise ValueError('typ can only be "whole" or "forward".') -def get_tolerance(rtol, ctx): - if 'atol' in ctx: - return ctx['atol'] - if 'atol_mult' in ctx: - return ctx['atol_mult'] * rtol - return rtol - - def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', - arg_params=None, aux_params=None, tol=None, + arg_params=None, aux_params=None, rtol=None, atol=None, raise_on_err=True, ground_truth=None, equal_nan=False, use_uniform=False, rand_type=np.float64): """Check symbol gives the same output for different running context @@ -1419,6 +1499,20 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', Standard deviation of the inner normal distribution. Used in initialization. grad_req : str or list of str or dict of str to str Gradient requirement. + arg_params : dict of input name -> input data + data to use for non-aux inputs + aux_params : dict of input name -> input data + data to use for aux inputs + rtol : float or dictionary dtype->float, optional + The relative error tolerance. + atol : float or dictionary dtype->float, optional + The absolute error tolerance. + raise_on_err : bool, optional, defaults to True + Should an error raise an exception (or just output exception message) + ground_truth : dict of output name -> data, optional + Provided ideal result to be compared against + equal_nan : bool, optional, defaults to False + Should nans be treated as equal in the comparison use_unifrom: bool Optional, When flag set to true, random input data generated follows uniform distribution, @@ -1454,20 +1548,6 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', 'type_dict': {'concat_arg0': np.float32, 'concat_arg1': np.float32}}] >>> check_consistency(sym, ctx_list) """ - if tol is None: - tol = {np.dtype(np.float16): 1e-1, - np.dtype(np.float32): 1e-3, - np.dtype(np.float64): 1e-5, - np.dtype(np.uint8): 0, - np.dtype(np.int32): 0, - np.dtype(np.int64): 0} - elif isinstance(tol, numbers.Number): - tol = {np.dtype(np.float16): tol, - np.dtype(np.float32): tol, - np.dtype(np.float64): tol, - np.dtype(np.uint8): tol, - np.dtype(np.int32): tol, - np.dtype(np.int64): tol} assert len(ctx_list) > 1 if isinstance(sym, Symbol): @@ -1485,10 +1565,16 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', arg_params = {} if arg_params is None else arg_params aux_params = {} if aux_params is None else aux_params - for n, arr in exe_list[0].arg_dict.items(): + + # returns the least precise of two dtypes + def smaller_dtype(dt1, dt2): + return dt1 if dt2 is None or np.dtype(dt1).itemsize < np.dtype(dt2).itemsize else dt2 + + # It's important to assign random inputs in a deterministic order, for reproducibility. + for n, arr in _sorted_items(exe_list[0].arg_dict): if n not in arg_params: if use_uniform: - arg_params[n] = np.random.uniform(low=-0.92, high=0.92, + arg_params[n] = np.random.uniform(low=-0.92 * scale, high=0.92 * scale, size=arr.shape).astype(rand_type) else: arg_params[n] = np.random.normal(size=arr.shape, @@ -1511,25 +1597,22 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', exe.forward(is_train=False) dtypes = [np.dtype(exe.outputs[0].dtype) for exe in exe_list] - max_idx = np.argmax(dtypes) + # Select the ground truth as the first model having the highest precision output[0] + gt_idx = np.argmax(dtypes) gt = ground_truth if gt is None: - gt = exe_list[max_idx].output_dict.copy() + gt = exe_list[gt_idx].output_dict.copy() for i, exe in enumerate(exe_list): - if i == max_idx: + if i == gt_idx: continue - rtol = tol[dtypes[i]] - atol = get_tolerance(rtol, ctx_list[i]) for name, arr in zip(output_names, exe.outputs): - # Previously, the cast was to dtypes[i], but symbol may be mixed-precision, - # so casting the ground truth to the actual output type seems more correct. - gtarr = gt[name].astype(arr.dtype) + gtarr = gt[name] try: assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan) except AssertionError as e: - print('Predict Err: ctx %d vs ctx %d at %s'%(i, max_idx, name)) + print('Predict Err: ctx %d vs ctx %d at %s'%(i, gt_idx, name)) traceback.print_exc() if raise_on_err: raise e @@ -1538,33 +1621,55 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', # train if grad_req != 'null': + # Perform forward() for exe in exe_list: exe.forward(is_train=True) - exe.backward(exe.outputs) + # Use the first executor's output data, cast to the least precise dtype, + # as the gradient data to pass to all executor's backward() call. + least_precise_dtype = [out.dtype for out in exe_list[0].outputs] + for exe in exe_list: + least_precise_dtype = [smaller_dtype(out1.dtype, dt) \ + for (out1, dt) in zip(exe.outputs, least_precise_dtype)] + golden_data_np = [out.astype(dt).asnumpy() \ + for (out, dt) in zip(exe_list[0].outputs, least_precise_dtype)] + # Perform backward() + for exe in exe_list: + out_grads = [mx.nd.array(golden_np, ctx=exe._ctx, + dtype=out.dtype).tostype(out.stype) + for (golden_np, out) in zip(golden_data_np, exe.outputs)] + exe.backward(out_grads) + gt = ground_truth if gt is None: - gt = exe_list[max_idx].output_dict.copy() + gt = exe_list[gt_idx].output_dict.copy() if grad_req != 'null': - gt.update(exe_list[max_idx].grad_dict) + gt.update(exe_list[gt_idx].grad_dict) for i, exe in enumerate(exe_list): - if i == max_idx: + if i == gt_idx: continue - rtol = tol[dtypes[i]] - atol = get_tolerance(rtol, ctx_list[i]) curr = zip(output_names + arg_names, exe.outputs + exe.grad_arrays) for name, arr in curr: if gt[name] is None: assert arr is None, name continue - # Previous cast was to dtypes[i], but symbol may be mixed-precision, - # so casting the ground truth to the actual output type seems more correct. - gtarr = gt[name].astype(arr.dtype) + gtarr = gt[name] try: - assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan) + rt, at = rtol, atol + # If the primary data i/o type is float16, then the tolerance used when + # comparing a float32 input gradient (e.g. batchnorm gamma) should be float16. + smaller_arr_dtype = smaller_dtype(arr.dtype, dtypes[i]) + smaller_gt_dtype = smaller_dtype(gtarr.dtype, dtypes[gt_idx]) + if smaller_arr_dtype != arr.dtype or \ + smaller_gt_dtype != gtarr.dtype: + rt, at = get_tols(arr.astype(smaller_arr_dtype), + gtarr.astype(smaller_gt_dtype), rtol, atol) + assert_almost_equal(arr, gtarr, rtol=rt, atol=at, equal_nan=equal_nan) except AssertionError as e: - print('Train Err: ctx %d vs ctx %d at %s'%(i, max_idx, name)) + print('Train Err: {} {} ctx {} vs {} {} ctx {} at {}'.format( + np.dtype(arr.dtype).name, arr.ctx, i, + np.dtype(gtarr.dtype).name, gtarr.ctx, gt_idx, name)) traceback.print_exc() if raise_on_err: raise e diff --git a/src/operator/linalg.h b/src/operator/linalg.h index 291e251f5cbc..3e82c6a2fad1 100644 --- a/src/operator/linalg.h +++ b/src/operator/linalg.h @@ -280,6 +280,14 @@ void linalg_batch_det_backward_helper(const Tensor& LU, const DType zero_det, const mxnet::OpContext& ctx); +#ifdef __CUDACC__ +#if CUDA_VERSION < 11000 +#define VERSION_ADJUSTED_TF32_MATH CUBLAS_DEFAULT_MATH +#else +#define VERSION_ADJUSTED_TF32_MATH CUBLAS_TF32_TENSOR_OP_MATH +#endif +#endif // __CUDACC__ + #include "linalg_impl.h" #endif // MXNET_OPERATOR_LINALG_H_ diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h index 104acd585bdb..6d94f33bc700 100644 --- a/src/operator/linalg_impl.h +++ b/src/operator/linalg_impl.h @@ -212,12 +212,15 @@ inline void linalg_gemm(const Tensor& A, #else cublasDataType_t full_datatype = CUBLAS_DATA_FULL; #endif + auto handle = Stream::GetBlasHandle(s); + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); CUBLAS_CALL(cublasSgemmEx( - Stream::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N), + handle, (tB ? CUBLAS_OP_T : CUBLAS_OP_N), (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0), (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, full_datatype, B.stride_, A.dptr_, full_datatype, A.stride_, &beta, C.dptr_, full_datatype, - C.stride_)) + C.stride_)); + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); } #else @@ -235,13 +238,16 @@ void linalg_gemm_axis(const Tensor& A, const Tensor::GetBlasHandle(s), \ + auto handle = Stream::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ + CUBLAS_CALL(cublas##fname(handle, \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \ C.size(2), C.size(0), (tB ? B.size(2) : B.size(0)), &alpha, \ B.dptr_, B.size(1)*B.stride_, B.stride_, \ A.dptr_, A.size(1)*A.stride_, A.stride_, &beta, \ C.dptr_, C.size(1)*C.stride_, C.stride_, A.size(1))) \ + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \ } LINALG_GPU_GEMM_AXIS(SgemmStridedBatched, float) LINALG_GPU_GEMM_AXIS(DgemmStridedBatched, double) @@ -349,13 +355,22 @@ void linalg_gemm(const Tensor::GetBlasHandle(s), \ + auto handle = Stream::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ + CUBLAS_CALL(cublas##fname(handle, \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \ C.size(2), C.size(1), (tB ? B.size(2) : B.size(1)), \ - &alpha, B.dptr_, B.stride_, B.size(1) * B.stride_, \ - A.dptr_, A.stride_, A.size(1) * A.stride_, \ - &beta, C.dptr_, C.stride_, C.size(1) * C.stride_, A.size(0))) \ + &alpha, \ + B.dptr_, B.stride_, \ + static_cast(B.size(1) * B.stride_), \ + A.dptr_, A.stride_, \ + static_cast(A.size(1) * A.stride_), \ + &beta, \ + C.dptr_, C.stride_, \ + static_cast(C.size(1) * C.stride_), \ + A.size(0))) \ + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \ } LINALG_GPU_BATCH_GEMM(DgemmStridedBatched, double) @@ -380,7 +395,7 @@ void linalg_gemm(const Tensor(const Tensor::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ for (index_t i = 0; i < A.size(2); ++i) { \ CUBLAS_CALL(cublas##fname(Stream::GetBlasHandle(s), \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ @@ -430,6 +447,7 @@ void linalg_gemm(const Tensor& inputs, const std::vector& req, const std::vector& outputs) { + if (inputs[0].shape().Size() == 0U) return; const SoftmaxParam& param = nnvm::get(attrs.parsed); if (SupportMKLDNNLogSoftmax(param, inputs[1], outputs[0])) { MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc index b95e159f9862..9b28b71560bd 100644 --- a/src/operator/nn/softmax.cc +++ b/src/operator/nn/softmax.cc @@ -59,6 +59,7 @@ static void SoftmaxGradComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { + if (inputs[0].shape().Size() == 0U) return; const SoftmaxParam& param = nnvm::get(attrs.parsed); if (SupportMKLDNNSoftmax(param, inputs[1], outputs[0])) { MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h index 6e975111c8c2..ea0057b1606c 100644 --- a/src/operator/numpy/np_true_divide-inl.h +++ b/src/operator/numpy/np_true_divide-inl.h @@ -59,15 +59,17 @@ void TrueDivideScalarCompute(const nnvm::NodeAttrs &attrs, }); }); } else { - CHECK_EQ(outputs[0].type_flag_, mxnet::common::GetDefaultDtype()) + CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64) << "true_divide only supports float32 and float64" " output when input's dtype is " << type_string(inputs[0].type_flag_); - MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - Kernel, xpu>::Launch( - s, data.Size(), out.dptr(), data.dptr(), - static_cast(alpha)); + MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, { + MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, data.Size(), out.dptr(), data.dptr(), + static_cast(alpha)); + }); }); }); } diff --git a/tests/python/gpu/test_gluon_gpu.py b/tests/python/gpu/test_gluon_gpu.py index ba7deaeab503..e259b74b9fad 100644 --- a/tests/python/gpu/test_gluon_gpu.py +++ b/tests/python/gpu/test_gluon_gpu.py @@ -50,10 +50,9 @@ def check_rnn_layer(layer): states = layer.begin_state(16) co, cs = layer(x, states) - # atol of 1e-6 required, as exposed by seed 2124685726 - assert_almost_equal(go, co, rtol=1e-2, atol=1e-6) + assert_almost_equal(go, co) for g, c in zip(gs, cs): - assert_almost_equal(g, c, rtol=1e-2, atol=1e-6) + assert_almost_equal(g, c) @with_seed() @@ -70,9 +69,9 @@ def check_rnn_layer_w_rand_inputs(layer): states = layer.begin_state(16) co, cs = layer(x, states) - assert_almost_equal(go, co, rtol=1e-2, atol=1e-6) + assert_almost_equal(go, co) for g, c in zip(gs, cs): - assert_almost_equal(g, c, rtol=1e-2, atol=1e-6) + assert_almost_equal(g, c) @with_seed() @@ -485,6 +484,13 @@ def tensor_size(big_tensor_bytes): # This in the past has given cudnnFind() trouble when it needed to allocate similar I/O's # from the area carved out by the MXNET_GPU_MEM_POOL_RESERVE setting (by default 5%). (free_mem_bytes, total_mem_bytes) = mx.context.gpu_memory_info(ctx.device_id) + # This test needs to be 'qualified' for use with each new larger memory size + largest_supported_total_mem_GB = 32 + if (total_mem_bytes > largest_supported_total_mem_GB * 1024 * 1024 * 1024): + sys.stderr.write( + ' bypassing test due to too-large global memory of size {} ... '.format(total_mem_bytes)) + return + start_size = tensor_size(0.20 * total_mem_bytes) num_trials = 10 sys.stderr.write( diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py index 427297173bae..1d0d3f4b2313 100644 --- a/tests/python/gpu/test_gluon_model_zoo_gpu.py +++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py @@ -89,7 +89,7 @@ def test_inference(model_name): max_val = np.max(np.abs(cpu_out.asnumpy())) gpu_max_val = np.max(np.abs(gpu_out.asnumpy())) eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val)) - assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val, rtol=1e-3, atol=1e-3) + assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val) def get_nn_model(name): if "densenet" in name: diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 9b969607666d..d088b44f85b7 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -23,6 +23,7 @@ import mxnet as mx import numpy as np import pytest +import itertools from mxnet.test_utils import check_consistency, set_default_context, assert_almost_equal, assert_allclose from mxnet.test_utils import check_symbolic_forward, check_symbolic_backward, discard_stderr from mxnet.test_utils import default_context, rand_shape_2d, rand_ndarray, same @@ -404,30 +405,20 @@ def test_batchnorm_with_type(): ] # V2, 2D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) + bools = [False, True] + for fix_gamma, cudnn_off in itertools.product(bools, bools): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_2D) # V2, 1D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - # - # # V2, 3D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_3D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_3D) + for fix_gamma, cudnn_off in itertools.product(bools, bools): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_1D) + + # V2, 3D + for fix_gamma, cudnn_off in itertools.product(bools, [True,]): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_3D) @with_seed() @@ -529,9 +520,9 @@ def test_convolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol) # test ability to turn off training on bias - check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol) + check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, rtol=tol, atol=tol) # Apply N symbols against each of M contexts, checking that all NxM combinations match. @@ -616,7 +607,6 @@ def test_conv_deconv_guards(): # Test cases for convolution and deconvolution via strided fft. Ensure that the framework # guards against problematic CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING in cuDNN [7.3.1,7.5) # see https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_750.html#rel_750 - tol = 1e-1 for (op, opname) in [(mx.sym.Convolution, 'conv'), (mx.sym.Deconvolution, 'deconv')]: dataname = opname + '_data' ctx = {'ctx': mx.gpu(0), dataname: (32, 32, 64, 64), 'type_dict': {dataname: np.float32}} @@ -631,7 +621,7 @@ def test_conv_deconv_guards(): try: sym = op(**test_case_args) sym_no_cudnn = op(cudnn_off=True, **test_case_args) - check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=tol) + check_consistency([sym, sym_no_cudnn], [ctx, ctx], scale=0.1) except: print('Test failure of mx.sym.{} with args: {}'.format(op.__name__, test_case_args)) raise @@ -655,7 +645,7 @@ def _conv_with_num_streams(seed): cudnn_off=True, name='conv') try: # tol can be pretty high- we're looking for a large diff due to garbaged workspace - check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=1e-2) + check_consistency([sym, sym_no_cudnn], [ctx, ctx], rtol=1e-2, atol=1e-2) except: print('Failing conv size = {}'.format(size)) raise @@ -678,20 +668,19 @@ def test_convolution_multiple_streams(): @pytest.mark.serial def test_convolution_large_c(): problematic_c = 64 * 1024 - # The convolution accumulates many values, so set large tolerances. - tol = {np.dtype(np.float32): 1, - np.dtype(np.float64): 1} + # The convolution accumulates many values, so scale the input magnitude. + scale = 0.1 def test_1D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float64}}] sym = mx.sym.Convolution(layout='NCW', num_filter=8, kernel=(2,), name='conv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) def test_2D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float64}}] sym = mx.sym.Convolution(layout='NCHW', num_filter=4, kernel=(2,2), name='conv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) # Run with different data tensor shapes to run cudnnFind() multiple times. # First, populate algo and op caches with models that always use cudnnFind() (req == 'write'). @@ -709,20 +698,19 @@ def test_2D_with_width(width, grad_req): @pytest.mark.serial def test_deconvolution_large_c(): problematic_c = 64 * 1024 - # The deconvolution accumulates many values, so set large tolerances. - tol = {np.dtype(np.float32): 1, - np.dtype(np.float64): 1} + # The deconvolution accumulates many values, so scale the input magnitude. + scale = 0.1 def test_1D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float64}}] sym = mx.sym.Deconvolution(layout='NCW', num_filter=problematic_c, kernel=(2,), name='deconv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) def test_2D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float64}}] sym = mx.sym.Deconvolution(layout='NCHW', num_filter=problematic_c, kernel=(2,2), name='deconv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) # Run with different data tensor shapes to run cudnnFind() multiple times. # First, populate algo and op caches with models that always use cudnnFind() (req == 'write'). @@ -831,8 +819,8 @@ def test_deconvolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) - check_consistency(sym, ctx_list, tol=tol, grad_req="add") + check_consistency(sym, ctx_list, rtol=tol, atol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add") # 2D deconvolution sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv') @@ -847,8 +835,8 @@ def test_deconvolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) - check_consistency(sym, ctx_list, tol=tol, grad_req="add") + check_consistency(sym, ctx_list, rtol=tol, atol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add") @with_seed() @@ -931,10 +919,11 @@ def test_bilinear_sampler_with_type(): def test_grid_generator_with_type(): data = mx.sym.Variable('data') sym = mx.sym.GridGenerator(data=data, transform_type='affine', target_shape=(20, 20)) + scale = 1 ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}}, {'ctx': mx.cpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}}] - check_consistency(sym, ctx_list) - check_consistency(sym, ctx_list, grad_req="add") + check_consistency(sym, ctx_list, scale=scale) + check_consistency(sym, ctx_list, scale=scale, grad_req="add") sym = mx.sym.GridGenerator(data=data, transform_type='warp', target_shape=(20, 20)) ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}}, {'ctx': mx.cpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}}] @@ -1080,7 +1069,7 @@ def test_pooling_versions_helper(pool_op_list, data, kernel, pool_type, pad, str pool_op)) sym_list.append(sym) - check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), tol=tol) + check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), rtol=tol, atol=tol) def test_pooling_dim(dim, pool_type, dtype, pool_op_list, p_value=2, count_include_pad=True, tol=None): @@ -1239,7 +1228,7 @@ def test_flatten_slice_after_conv(): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.cpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}}] - check_consistency(slice_sym, ctx_list) + check_consistency(slice_sym, ctx_list, scale=0.5) @with_seed() @@ -1545,7 +1534,7 @@ def test_embedding_helper(data_types, weight_types, low_pad, high_pad): 'type_dict': {'embedding_data': data_type, 'embedding_weight': weight_type}}) arg_params = {'embedding_data': np.random.randint(low=-low_pad, high=V+high_pad, size=(N,))} check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'}, - arg_params=arg_params) + arg_params=arg_params, scale=0.1) data_types = [np.float16, np.float32, np.float64, np.int32] weight_types = [np.float16, np.float32, np.float64] @@ -1678,7 +1667,7 @@ def test_deformable_psroipooling_with_type(): 'deformable_psroipool_trans': np.float16}}, ] - check_consistency(sym, ctx_list, scale=0.1, tol=tol, + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol, grad_req={'deformable_psroipool_data': 'write', 'deformable_psroipool_rois': 'null', 'deformable_psroipool_trans': 'write'}, arg_params=arg_params) @@ -1710,9 +1699,9 @@ def test_deformable_convolution_with_type(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # test ability to turn off training on bias - check_consistency(sym, ctx_list, scale=0.1, tol=tol, + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol, grad_req={'deformable_conv_data': 'write', 'deformable_conv_offset': 'write', 'deformable_conv_weight': 'write', @@ -1745,7 +1734,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), pad=(1,1), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Stride > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -1766,7 +1755,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), stride=(2,2), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Dilate > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -1787,7 +1776,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), dilate=(2,2), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Deformable group > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -1808,7 +1797,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=4, kernel=(3,3), num_deformable_group=2, name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) def check_rnn_layer(layer): diff --git a/tests/python/gpu/test_profiler_gpu.py b/tests/python/gpu/test_profiler_gpu.py index 11a0b7d12c0e..89eb425c744f 100644 --- a/tests/python/gpu/test_profiler_gpu.py +++ b/tests/python/gpu/test_profiler_gpu.py @@ -27,6 +27,8 @@ # They will be detected by test framework, as long as the current file has a different filename from test_profiler import * +# Test seen to crash pytest worker during development of https://github.com/apache/incubator-mxnet/pull/18694 +del test_aggregate_duplication def test_gpu_memory_profiler_symbolic(): iter_num = 5 diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py index 021961646f3c..331f32d789b7 100644 --- a/tests/python/unittest/common.py +++ b/tests/python/unittest/common.py @@ -222,11 +222,13 @@ def test_new(*args, **kwargs): try: orig_test(*args, **kwargs) except: - # With exceptions, repeat test_msg at INFO level to be sure it's seen. - if log_level < logging.INFO: - logger.info(test_msg) + # With exceptions, repeat test_msg at WARNING level to be sure it's seen. + if log_level < logging.WARNING: + logger.warning(test_msg) raise finally: + # Provide test-isolation for any test having this decorator + mx.nd.waitall() np.random.set_state(post_test_state) return test_new return test_helper @@ -285,7 +287,7 @@ def setup_module(): seed = np.random.randint(0, np.iinfo(np.int32).max) else: seed = int(module_seed_str) - logger.warn('*** module-level seed is set: all tests running deterministically ***') + logger.warning('*** module-level seed is set: all tests running deterministically ***') logger.info('Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=%s to reproduce.', seed) np.random.seed(seed) mx.random.seed(seed) @@ -293,7 +295,7 @@ def setup_module(): # The MXNET_TEST_SEED environment variable will override MXNET_MODULE_SEED for tests with # the 'with_seed()' decoration. Inform the user of this once here at the module level. if os.getenv('MXNET_TEST_SEED') is not None: - logger.warn('*** test-level seed set: all "@with_seed()" tests run deterministically ***') + logger.warning('*** test-level seed set: all "@with_seed()" tests run deterministically ***') def teardown_module(): @@ -359,13 +361,13 @@ def test_helper(orig_test): @functools.wraps(orig_test) def test_new(*args, **kwargs): """Wrapper for tests function.""" - for _ in range(n): + for i in range(n): try: orig_test(*args, **kwargs) + return except AssertionError as e: - err = e - continue - return - raise err + if i == n-1: + raise e + mx.nd.waitall() return test_new return test_helper diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py index cc1e87ae94e4..ad0601cdbb0f 100644 --- a/tests/python/unittest/test_autograd.py +++ b/tests/python/unittest/test_autograd.py @@ -440,6 +440,7 @@ def check_grad_with_stype(array_stype, grad_stype, expected_stype): check_grad_with_stype(stype, grad_stype, grad_stype) @with_seed() +@pytest.mark.garbage_expected def test_sparse_dot_grad(): def check_sparse_dot_grad(rhs): lhs = rand_ndarray((2, 8), 'csr') diff --git a/tests/python/unittest/test_contrib_gluon_data_vision.py b/tests/python/unittest/test_contrib_gluon_data_vision.py index d2e38d66cb20..166b07f843d7 100644 --- a/tests/python/unittest/test_contrib_gluon_data_vision.py +++ b/tests/python/unittest/test_contrib_gluon_data_vision.py @@ -19,7 +19,7 @@ import numpy as np import scipy.ndimage from mxnet.test_utils import * -from common import assertRaises, with_seed +from common import assertRaises, with_seed, setup_module, teardown_module import shutil import tempfile import unittest @@ -63,6 +63,7 @@ def tearDown(self): print("cleanup {}".format(self.IMAGES_DIR)) shutil.rmtree(self.IMAGES_DIR) + @with_seed() def test_imageiter(self): im_list = [[np.random.randint(0, 5), x] for x in self.IMAGES] os.makedirs('./data', exist_ok=True) @@ -95,6 +96,7 @@ def test_imageiter(self): for batch in it: pass + @with_seed() def test_image_bbox_iter(self): im_list = [_generate_objects() + [x] for x in self.IMAGES] det_iter = mx.gluon.contrib.data.vision.ImageBboxDataLoader(2, (3, 300, 300), imglist=im_list, path_root='') @@ -131,6 +133,7 @@ def test_image_bbox_iter(self): ] + @with_seed() def test_bbox_augmenters(self): # only test if all augmenters will work # TODO(Joshua Zhang): verify the augmenter outputs diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 52bab2b04ca4..588f5c131525 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -22,7 +22,7 @@ from mxnet import gluon from mxnet.gluon import nn from mxnet.base import py_str, MXNetError -from mxnet.test_utils import assert_almost_equal +from mxnet.test_utils import assert_almost_equal, default_context from mxnet.util import is_np_array from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from mxnet.test_utils import use_np @@ -810,7 +810,7 @@ def _syncParameters(bn1, bn2, ctx): input2grad.asnumpy(), atol=atol, rtol=rtol) cfgs = [(1, False)] - num_gpus = mx.context.num_gpus() + num_gpus = 0 if default_context().device_type != 'gpu' else mx.context.num_gpus() batch_size = 24 for i in range(1, num_gpus + 1): if batch_size % i == 0: diff --git a/tests/python/unittest/test_gluon_probability_v1.py b/tests/python/unittest/test_gluon_probability_v1.py index 92721f610495..b5b164458225 100644 --- a/tests/python/unittest/test_gluon_probability_v1.py +++ b/tests/python/unittest/test_gluon_probability_v1.py @@ -417,7 +417,7 @@ def hybrid_forward(self, F, scale, *args): # Test icdf for shape, hybridize in itertools.product(shapes, [True, False]): scale = np.random.uniform(0.5, 1.5, shape) - samples = np.random.uniform(size=shape) + samples = np.random.uniform(size=shape, high=1.0-1e-4) net = TestHalfCauchy("icdf") if hybridize: net.hybridize() @@ -1727,15 +1727,15 @@ def _stable_inv(cov): net.hybridize() mx_out = net(loc, cov_param, samples) assert mx_out.shape == samples.shape[:-1] - # Select the first element in the batch, because scipy does not support batching. - loc_t = loc.reshape(-1, event_shape)[0].asnumpy() - sigma_t = sigma.reshape(-1, event_shape, - event_shape)[0].asnumpy() if mx_out.shape == (): mx_out_t = mx_out.asnumpy() else: mx_out_t = mx_out.flatten()[0].asnumpy() samples_t = samples.reshape(-1, event_shape).asnumpy()[0] + # Select the first element in the batch, because scipy does not support batching. + loc_t = loc.reshape(-1, event_shape)[0].asnumpy() + sigma_t = sigma.reshape(-1, event_shape, + event_shape)[0].asnumpy() scipy_mvn = ss.multivariate_normal(loc_t, sigma_t) ss_out = scipy_mvn.logpdf(samples_t) assert_almost_equal(mx_out_t, ss_out, atol=1e-4, @@ -1758,14 +1758,14 @@ def _stable_inv(cov): net.hybridize() mx_out = net(loc, cov_param) assert mx_out.shape == sigma.shape[:-2] - # Select the first element in the batch, because scipy does not support batching. - loc_t = loc.reshape(-1, event_shape)[0].asnumpy() - sigma_t = sigma.reshape(-1, event_shape, - event_shape)[0].asnumpy() if mx_out.shape == (): mx_out_t = mx_out.asnumpy() else: mx_out_t = mx_out.flatten()[0].asnumpy() + # Select the first element in the batch, because scipy does not support batching. + loc_t = loc.reshape(-1, event_shape)[0].asnumpy() + sigma_t = sigma.reshape(-1, event_shape, + event_shape)[0].asnumpy() scipy_mvn = ss.multivariate_normal(loc_t, sigma_t) ss_out = scipy_mvn.entropy() assert_almost_equal(mx_out_t, ss_out, atol=1e-4, @@ -2084,7 +2084,7 @@ def rate(): return np.random.uniform(0.5, 1.5, shape) # exponential, geometric for dist in [mgp.Exponential, mgp.Geometric]: for shape in shapes: - def s(): return np.random.uniform(size=shape) + def s(): return np.random.uniform(size=shape, low=1e-3) _test_zero_kl(_dist_factory(dist, s), shape) if monte_carlo_test: _test_monte_carlo(_dist_factory(dist, s), diff --git a/tests/python/unittest/test_gluon_probability_v2.py b/tests/python/unittest/test_gluon_probability_v2.py index 9a36b4fc7056..d75aa6962cda 100644 --- a/tests/python/unittest/test_gluon_probability_v2.py +++ b/tests/python/unittest/test_gluon_probability_v2.py @@ -417,7 +417,7 @@ def forward(self, scale, *args): # Test icdf for shape, hybridize in itertools.product(shapes, [True, False]): scale = np.random.uniform(0.5, 1.5, shape) - samples = np.random.uniform(size=shape) + samples = np.random.uniform(size=shape, high=1.0-1e-4) net = TestHalfCauchy("icdf") if hybridize: net.hybridize() @@ -1727,14 +1727,14 @@ def _stable_inv(cov): net.hybridize() mx_out = net(loc, cov_param, samples) assert mx_out.shape == samples.shape[:-1] - # Select the first element in the batch, because scipy does not support batching. - loc_t = loc.reshape(-1, event_shape)[0].asnumpy() - sigma_t = sigma.reshape(-1, event_shape, - event_shape)[0].asnumpy() if mx_out.shape == (): mx_out_t = mx_out.asnumpy() else: mx_out_t = mx_out.asnumpy().flatten()[0] + # Select the first element in the batch, because scipy does not support batching. + loc_t = loc.reshape(-1, event_shape)[0].asnumpy() + sigma_t = sigma.reshape(-1, event_shape, + event_shape)[0].asnumpy() samples_t = samples.reshape(-1, event_shape).asnumpy()[0] scipy_mvn = ss.multivariate_normal(loc_t, sigma_t) ss_out = scipy_mvn.logpdf(samples_t) @@ -1758,14 +1758,14 @@ def _stable_inv(cov): net.hybridize() mx_out = net(loc, cov_param) assert mx_out.shape == sigma.shape[:-2] - # Select the first element in the batch, because scipy does not support batching. - loc_t = loc.reshape(-1, event_shape)[0].asnumpy() - sigma_t = sigma.reshape(-1, event_shape, - event_shape)[0].asnumpy() if mx_out.shape == (): mx_out_t = mx_out.asnumpy() else: mx_out_t = mx_out.asnumpy().flatten()[0] + # Select the first element in the batch, because scipy does not support batching. + loc_t = loc.reshape(-1, event_shape)[0].asnumpy() + sigma_t = sigma.reshape(-1, event_shape, + event_shape)[0].asnumpy() scipy_mvn = ss.multivariate_normal(loc_t, sigma_t) ss_out = scipy_mvn.entropy() assert_almost_equal(mx_out_t, ss_out, atol=1e-4, @@ -2081,10 +2081,11 @@ def rate(): return np.random.uniform(0.5, 1.5, shape) _dist_factory(dist, rate), repeated_times) + # exponential, geometric for dist in [mgp.Exponential, mgp.Geometric]: for shape in shapes: - def s(): return np.random.uniform(size=shape) + def s(): return np.random.uniform(size=shape, low=1e-3) _test_zero_kl(_dist_factory(dist, s), shape) if monte_carlo_test: _test_monte_carlo(_dist_factory(dist, s), diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 2aab5c096ab9..a01746ee5e6f 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -24,7 +24,7 @@ import random import functools import pytest -from common import with_seed, assertRaises, TemporaryDirectory +from common import with_seed, assertRaises, TemporaryDirectory, setup_module, teardown_module from mxnet.test_utils import almost_equal from mxnet.test_utils import assert_almost_equal, assert_exception from mxnet.test_utils import default_context diff --git a/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py b/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py index 9c9c7fd1ed56..8c3e76aac91c 100644 --- a/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py +++ b/tests/python/unittest/test_numpy_contrib_gluon_data_vision.py @@ -19,7 +19,7 @@ import numpy as np import scipy.ndimage from mxnet.test_utils import * -from common import assertRaises, with_seed +from common import assertRaises, with_seed, setup_module, teardown_module import shutil import tempfile import unittest @@ -63,10 +63,11 @@ def tearDown(self): print("cleanup {}".format(self.IMAGES_DIR)) shutil.rmtree(self.IMAGES_DIR) + @with_seed() @use_np def test_imageiter(self): im_list = [[np.random.randint(0, 5), x] for x in self.IMAGES] - fname = './data/test_imageiter.lst' + fname = './data/test_numpy_imageiter.lst' file_list = ['\t'.join([str(k), str(np.random.randint(0, 5)), x]) for k, x in enumerate(self.IMAGES)] with open(fname, 'w') as f: @@ -95,6 +96,7 @@ def test_imageiter(self): for batch in it: pass + @with_seed() @use_np def test_image_bbox_iter(self): im_list = [_generate_objects() + [x] for x in self.IMAGES] @@ -110,7 +112,7 @@ def test_image_bbox_iter(self): pass # test file list with last batch handle - fname = './data/test_imagedetiter.lst' + fname = './data/test_numpy_imagedetiter.lst' im_list = [[k] + _generate_objects() + [x] for k, x in enumerate(self.IMAGES)] with open(fname, 'w') as f: for line in im_list: @@ -130,6 +132,7 @@ def test_image_bbox_iter(self): path_imglist=fname, path_root='', last_batch='keep') ] + @with_seed() @use_np def test_bbox_augmenters(self): # only test if all augmenters will work diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 8b50fc46f036..3b9786408df9 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -29,7 +29,7 @@ from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable -from common import assertRaises, with_seed +from common import assertRaises, with_seed, random_seed from mxnet.numpy_dispatch_protocol import with_array_function_protocol, with_array_ufunc_protocol from mxnet.numpy_dispatch_protocol import _NUMPY_ARRAY_FUNCTION_LIST, _NUMPY_ARRAY_UFUNC_LIST @@ -517,8 +517,8 @@ def _add_workload_linalg_cholesky(): dtypes = (np.float32, np.float64) for shape, dtype in itertools.product(shapes, dtypes): - _np.random.seed(1) - a = _np.random.randn(*shape) + with random_seed(1): + a = _np.random.randn(*shape) t = list(range(len(shape))) t[-2:] = -1, -2 @@ -2898,7 +2898,6 @@ def _add_workload_unwrap(): phase[3:] += np.pi phase_s = np.vstack((phase,phase)) OpArgMngr.add_workload('unwrap', phase) - print(phase_s.shape) OpArgMngr.add_workload('unwrap', phase_s, axis=1) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 88ad77fc978d..edc41f9e771d 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -30,7 +30,7 @@ from mxnet.gluon import HybridBlock from mxnet.base import MXNetError from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray -from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like +from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like, effective_dtype from mxnet.test_utils import new_matrix_with_real_eigvals_nd from mxnet.test_utils import new_sym_matrix_with_real_eigvals_nd from common import assertRaises, with_seed, retry, xfail_when_nonstandard_decimal_separator @@ -1849,15 +1849,18 @@ def _test_batchnorm_impl(axis, running_mean = running_mean * momentum + \ data_mean_flat * (1 - momentum) + + m = _np.prod(shape) / shape[axis] + # cudnn uses m-1 in the denominator of its sample variance calculation, not m + sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1) running_var = running_var * momentum + \ - data_var_flat * (1 - momentum) + data_var_flat * sample_var_adjust * (1 - momentum) W = bn_gamma.reshape(expand_shape) dnx = ograd * W xsm = data - data_mean nd = 1.0 / np.sqrt(data_var + epsilon) nx = xsm * nd - m = _np.prod(shape) / shape[axis] dvar = (dnx * xsm).sum(axis=reduce_axis, keepdims=True, ) * (-0.5) * np.power(nd, 3) dmean = -nd * dnx.sum(axis=reduce_axis, keepdims=True) - \ @@ -1966,6 +1969,7 @@ def np_log_softmax(x, axis=-1): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True) mx_out.backward() + mx_a.grad.wait_to_read() assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5) @@ -4608,7 +4612,7 @@ def hybrid_forward(self, F, loc, scale): scale = np.ones(scale) mx_out = getattr(np.random, op_name)(loc, scale) np_out = getattr(_np.random, op_name)(loc, scale) - assert_almost_equal(mx_out.asnumpy().shape, np_out.shape) + assert mx_out.asnumpy().shape == np_out.shape @with_seed() @@ -4654,7 +4658,7 @@ def hybrid_forward(self, F, mean, sigma): for ((shape1, shape2), out_shape) in zip(param_shape, output_shapes): mx_out = np.random.lognormal(np.zeros(shape1), np.ones(shape2), out_shape) np_out = _np.random.lognormal(np.zeros(shape1).asnumpy(), np.ones(shape2).asnumpy(), out_shape) - assert_almost_equal(mx_out.asnumpy().shape, np_out.shape) + assert mx_out.asnumpy().shape == np_out.shape def _test_lognormal_exception(sigma): output = np.random.lognormal(sigma=sigma).asnumpy() @@ -4913,7 +4917,7 @@ def hybrid_forward(self, F, scale): with mx.autograd.record(): mx_out = test_rayleigh(scale) np_out = _np.random.rayleigh(scale = scale.asnumpy(), size = shape) - assert_almost_equal(np_out.shape, mx_out.shape) + assert np_out.shape == mx_out.shape mx_out.backward() assert scale.grad.shape == shape assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5) @@ -4921,7 +4925,7 @@ def hybrid_forward(self, F, scale): for shape in shapes: mx_out = np.random.rayleigh(np.array([1]), shape) np_out = _np.random.rayleigh(np.array([1]).asnumpy(), shape) - assert_almost_equal(mx_out.asnumpy().shape, np_out.shape) + assert mx_out.asnumpy().shape == np_out.shape def _test_rayleigh_exception(scale): output = np.random.rayleigh(scale=scale).asnumpy() @@ -4954,7 +4958,7 @@ def hybrid_forward(self, F, scale): with mx.autograd.record(): mx_out = test_exponential_grad(scale) np_out = _np.random.exponential(scale = scale.asnumpy(), size = out_shape) - assert_almost_equal(np_out.shape, mx_out.shape) + assert np_out.shape == mx_out.shape mx_out.backward() assert scale.grad.shape == out_shape assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5) @@ -5724,6 +5728,8 @@ def check_svd(UT, L, V, data_np): data_np = _np.random.uniform(-10.0, 10.0, shape) data_np = _np.array(data_np, dtype=dtype) data = np.array(data_np, dtype=dtype) + if effective_dtype(data) == np.dtype(np.float16): + continue data.attach_grad() with mx.autograd.record(): ret = test_svd(data) @@ -6115,7 +6121,7 @@ def check_solve(x, a_np, b_np): print(e) else: assert x.shape == x_expected.shape - assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + assert_almost_equal(x, x_expected) def newInvertibleMatrix_2D(shape, max_cond=4): while 1: @@ -6155,7 +6161,6 @@ def get_grad_b(A, X): nrhs = (-1, 0, 1, 2, 3) dtypes = ['float32', 'float64'] for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs): - rtol, atol =1e-2, 1e-4 test_solve = TestSolve() if hybridize: test_solve.hybridize() @@ -6189,8 +6194,8 @@ def get_grad_b(A, X): mx.autograd.backward(mx_out) b_backward_expected = get_grad_b(a.asnumpy(), mx_out.asnumpy()) a_backward_expected = -_np.matmul(b_backward_expected, _np.swapaxes(mx_out, -1, -2).asnumpy()) - assert_almost_equal(a.grad.asnumpy(), a_backward_expected, rtol=rtol, atol=atol) - assert_almost_equal(b.grad.asnumpy(), b_backward_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, a_backward_expected) + assert_almost_equal(b.grad, b_backward_expected) # check imperative once again mx_out = np.linalg.solve(a, b) @@ -6215,7 +6220,7 @@ def check_tensorinv(inv_a, a_np, ind): print(e) else: assert inv_a.shape == inv_a_expected.shape - assert_almost_equal(inv_a.asnumpy(), inv_a_expected, rtol=rtol, atol=atol) + assert_almost_equal(inv_a, inv_a_expected) def newInvertibleMatrix_2D(shape, max_cond=4): while 1: @@ -6258,11 +6263,6 @@ def get_grad_A(A, ind): ] dtypes = ['float32', 'float64'] for hybridize, shape, dtype, in itertools.product([False, True], shapes, dtypes): - rtol = 1e-3 - atol = 1e-5 - if dtype == 'float32': - rtol = 1e-2 - atol = 1e-4 ind = shape[0] test_tensorinv = TestTensorinv(ind=ind) if hybridize: @@ -6290,7 +6290,7 @@ def get_grad_A(A, ind): if 0 not in mx_out.shape: mx.autograd.backward(mx_out) grad_A_expected = get_grad_A(a.asnumpy(), ind) - assert_almost_equal(a.grad.asnumpy(), grad_A_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, grad_A_expected) # check imperative once again mx_out = np.linalg.tensorinv(a, ind) @@ -6343,7 +6343,7 @@ def check_tensorsolve(x, a_np, b_np, axes): print(e) else: assert x.shape == x_expected.shape - assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + assert_almost_equal(x, x_expected) def shapeInfer(a_shape, b_shape, axes=None): # b_shape - Right-hand tensor shape, which can be of any shape. @@ -6405,8 +6405,6 @@ def newInvertibleMatrix_2D(shape, max_cond=4): for hybridize in [True, False]: for dtype in dtypes: for a_shape, b_shape, axes in shapes: - rtol = 1e-2 if dtype == 'float32' else 1e-3 - atol = 1e-4 if dtype == 'float32' else 1e-5 test_tensorsolve = TestTensorsolve(axes) if hybridize: test_tensorsolve.hybridize() @@ -6443,8 +6441,8 @@ def newInvertibleMatrix_2D(shape, max_cond=4): mx.autograd.backward(mx_out) grad_a_expected, grad_b_expected = get_tensorsolve_backward( a.asnumpy(), b.asnumpy(), mx_out.asnumpy(), a_axes, a_origin_axes, a_trans_shape) - assert_almost_equal(a.grad.asnumpy(), grad_a_expected, rtol=rtol, atol=atol) - assert_almost_equal(b.grad.asnumpy(), grad_b_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, grad_a_expected) + assert_almost_equal(b.grad, grad_b_expected) # check imperative once again mx_out = test_tensorsolve(a, b) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 1578e1450326..344cb7279f98 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -458,21 +458,26 @@ def test_symbol_pow(): @with_seed() def test_fully_connected(): + # Create data of given shape as a uniform distribution centered on 0.0 + def random_data(shape, dtype=np.float32): + return mx.nd.random.uniform(low=-0.5, + high=0.5, shape=shape, dtype=dtype) data = mx.sym.var("data") fc_weight = mx.sym.var("weight") fc_bias = mx.sym.var("bias") fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc') - data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32) - fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32) - fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32) - fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32) + + data = random_data(shape=(5, 5, 5, 13)) + fc_weight = random_data(shape=(10, 325)) + fc_bias = random_data(shape=(10)) + fc_bias2 = random_data(shape=(10, 1)) + data_np = data.asnumpy().reshape(5, 325) fc_weight_np = np.transpose(fc_weight.asnumpy()) fc_bias_np = fc_bias.asnumpy() res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy() check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res}) - check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, - numeric_eps=1e-2, rtol=1e-4, atol=1e-2) + check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}) # TODO: Fix Bug #15032 when bias has ndim > 1 #check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res}) @@ -1544,15 +1549,18 @@ def _test_batchnorm_impl(axis, running_mean = running_mean * momentum + \ data_mean_flat * (1 - momentum) + + m = np.prod(shape) / shape[axis] + # cudnn uses m-1 in the denominator of its sample variance calculation, not m + sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1) running_var = running_var * momentum + \ - data_var_flat * (1 - momentum) + data_var_flat * sample_var_adjust * (1 - momentum) W = bn_gamma.reshape(expand_shape) dnx = ograd * W xsm = data - data_mean nd = 1.0 / mx.nd.sqrt(data_var + epsilon) nx = xsm * nd - m = np.prod(shape) / shape[axis] dvar = (dnx * xsm).sum(axis=axis, keepdims=True, exclude=True) * (-0.5) * mx.nd.power(nd, 3) dmean = -nd * dnx.sum(axis=axis, keepdims=True, exclude=True) - \ @@ -2478,13 +2486,13 @@ def test_reduce_inner(numpy_reduce_func, numpy_reduce_grad_func, mx_reduce_sym, args_grad={'a': grad_nd}) net.forward(is_train=True) - equal_forward = almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, 1E-4, 1E-4) - assert equal_forward + # check forward + assert_almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, rtol=1e-4, atol=1e-4) net.backward(out_grads=mx.nd.array(outgrad_npy)) bc_grad_groundtruth = np.broadcast_to(grad_groundtruth, grad_nd.shape) - equal_backward = almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, 1E-4, 1E-4) - assert equal_backward + # check backward + assert_almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, rtol=1e-4, atol=1e-4) test_none_axis = [True, False] for test_none in test_none_axis: @@ -4074,7 +4082,7 @@ def get_large_matrix(): out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=a_npy.size, is_ascend=is_ascend) else: out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=5, is_ascend=is_ascend) - check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx) + check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx) check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy]) b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5) @@ -4122,7 +4130,7 @@ def get_large_matrix(): for is_ascend in [True, False]: b = mx.sym.topk(a, axis=axis, is_ascend=is_ascend, ret_typ="value", k=k) out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=k, is_ascend=is_ascend) - check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx) + check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx) check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy]) b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5) @@ -4285,7 +4293,7 @@ def test_grid_generator(): # check forward exe.arg_dict['affine'][:] = np.array([[1.0,0,0,0,1.0,0]]) exe.forward(is_train=True) - output = exe.outputs[0].asnumpy() + output = exe.outputs[0] output[0,0,:,:] = (output[0,0,:,:] + 1) * (target_shape[1] - 1) / 2.0 output[0,1,:,:] = (output[0,1,:,:] + 1) * (target_shape[0] - 1) / 2.0 xv, yv = np.meshgrid(np.arange(target_shape[0]), np.arange(target_shape[1])) @@ -4300,7 +4308,7 @@ def test_grid_generator(): tmp[1] = -1.0 + (np.arange(target_shape[0]*target_shape[1]) // target_shape[1]) * (2.0 / (target_shape[0]-1)) tmp[2] = 1 grad_est = np.dot(out_grad[0].reshape(2,target_shape[0]*target_shape[1]),tmp.T).reshape(1,6) - assert_almost_equal(exe.grad_dict['affine'], grad_est, rtol=1e-3, atol=1e-5) + assert_almost_equal(exe.grad_dict['affine'], grad_est) # check addto exe = grid._simple_bind(ctx=default_context(), affine=(1,6), grad_req='add') grid_grad_npy = np.random.normal(size=exe.grad_dict['affine'].shape) @@ -4308,7 +4316,7 @@ def test_grid_generator(): exe.arg_dict['affine'][:] = np.array([[1.0, 0, 0, 0, 1.0, 0]]) exe.forward(is_train=True) exe.backward(mx.nd.array(out_grad)) - assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy, rtol=1e-2, atol=1e-5) + assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy) # transform_type = warp test_case = [(12,21),(4,3),(6,12)] @@ -5354,51 +5362,62 @@ def test_div_sqrt_dim(): check_symbolic_forward(test, [data_tmp], [data_tmp / np.sqrt(data_tmp.shape[-1])]) +# helper function to identify inputs likely to fail check_numeric_gradient tol test +# due to finite difference method inaccuracies or function discontuities at the origin +def bad_input_finder(f, f_grad, dtype): + eps = default_numeric_eps()[np.dtype(dtype)] + rtol = default_rtols()[np.dtype(dtype)] + def expected_relative_error(x): + fd_gradient = (f(x+eps/2) - f(x-eps/2)) / eps + return abs(fd_gradient/f_grad(x) - 1) + def is_fd_problem_input(x): + return abs(x) < eps/2 or expected_relative_error(x) > rtol + return np.vectorize(is_fd_problem_input) + @with_seed() def test_reciprocal_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid possible division by 0 errors and finite difference method inaccuracies. - # Factor of 6 below set empirically, depends on eps. - # Issue exposed by seed 879579887. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 6*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(np.reciprocal, + lambda x: -np.reciprocal(x)**2, np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.reciprocal(data) - check_numeric_gradient(test, [data_tmp], numeric_eps = eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [np.reciprocal(data_tmp)]) @with_seed() def test_cbrt_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid finite difference method inaccuracies due to infinite gradient at the origin. - # Factor of 4 below set empirically, depends on eps. - # Issue exposed by seed 553872106. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 4*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(np.cbrt, + lambda x: 1./(3 * np.cbrt(x)**2), np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.cbrt(data) - - check_numeric_gradient(test, [data_tmp], numeric_eps=eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [np.cbrt(data_tmp)]) @with_seed() def test_rcbrt_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid possible division by 0 errors and finite difference method inaccuracies. - # Factor of 4 below set empirically, depends on eps. - # Issue exposed by seed 788174893. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 4*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(lambda x: 1./np.cbrt(x), + lambda x: -1./(3 * np.cbrt(x)**4), np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.rcbrt(data) - check_numeric_gradient(test, [data_tmp], numeric_eps = eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [1/np.cbrt(data_tmp)]) @@ -5807,7 +5826,7 @@ def test_deformable_convolution(): # By now we only have gpu implementation if default_context().device_type == 'gpu': check_numeric_gradient(op, [im_data, offset_data, weight, bias], rtol=rtol, atol=atol, - grad_nodes=grad_nodes, ctx=mx.gpu(0)) + grad_nodes=grad_nodes, ctx=mx.gpu(0), numeric_eps=1.0/64) def _validate_sample_location(input_rois, input_offset, spatial_scale, pooled_w, pooled_h, sample_per_part, part_size, output_dim, num_classes, trans_std, feat_h, feat_w): @@ -5900,10 +5919,11 @@ def test_deformable_psroipooling(): grad_nodes=grad_nodes, ctx=mx.gpu(0)) -def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): - num_eps = 1e-6 - rtol_bw = 1e-5 - atol_bw = 1e-6 +def _gemm_test_helper(dtype, grad_check, rtol_fw = None, atol_fw = None, + rtol_bw = None, atol_bw = None, num_eps = None): + def np_random_data(shape, dtype=np.float32): + return np.random.uniform(low=-0.5, + high=0.5, size=shape).astype(dtype) data1 = mx.symbol.Variable('data1') data2 = mx.symbol.Variable('data2') @@ -5922,10 +5942,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): shape2 = (3, 2) shape3 = (3, 3) shape4 = (2, 2) - data_in1 = np.random.uniform(1, 10, shape1).astype(dtype) - data_in2 = np.random.uniform(1, 10, shape2).astype(dtype) - data_in3 = np.random.uniform(1, 10, shape3).astype(dtype) - data_in4 = np.random.uniform(1, 10, shape4).astype(dtype) + data_in1 = np_random_data(shape1, dtype) + data_in2 = np_random_data(shape2, dtype) + data_in3 = np_random_data(shape3, dtype) + data_in4 = np_random_data(shape4, dtype) # Check all transpositions of gemm operator. data_in1_t = np.transpose(data_in1) data_in2_t = np.transpose(data_in2) @@ -6032,10 +6052,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): def test_gemm(): _gemm_test_helper(np.float64, True) os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0" - _gemm_test_helper(np.float32, False, rtol_fw = 1e-5, atol_fw = 1e-7) + _gemm_test_helper(np.float32, True) if default_context().device_type == 'gpu': os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "1" - _gemm_test_helper(np.float32, False, rtol_fw = 2e-5, atol_fw = 2e-7) + _gemm_test_helper(np.float32, True) os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0" # Helper functions for test_laop diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index dc072013ea50..8bc086e14e52 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -1598,6 +1598,7 @@ def test_fallback(func_name, axis=0, keepdims=True, exclude=True): @with_seed() +@pytest.mark.serial def test_sparse_square_sum(): dim0 = 30 dim1 = 30