diff --git a/ci/windows/test_py2_cpu.ps1 b/ci/windows/test_py2_cpu.ps1 index 1c4a72682ae5..26b02d890fe3 100644 --- a/ci/windows/test_py2_cpu.ps1 +++ b/ci/windows/test_py2_cpu.ps1 @@ -27,3 +27,7 @@ C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 - if (! $?) { Throw ("Error running unittest") } C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train if (! $?) { Throw ("Error running train tests") } +# Adding this extra test since it's not possible to set env var on the fly in Windows. +$env:MXNET_SAFE_ACCUMULATION=1 +C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest\test_operator.py:test_norm +if (! $?) { Throw ("Error running unittest") } diff --git a/ci/windows/test_py2_gpu.ps1 b/ci/windows/test_py2_gpu.ps1 index 8a6c8e9b44f9..8418bd0b490d 100644 --- a/ci/windows/test_py2_gpu.ps1 +++ b/ci/windows/test_py2_gpu.ps1 @@ -31,3 +31,7 @@ C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 - if (! $?) { Throw ("Error running tests") } C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error tests\python\train if (! $?) { Throw ("Error running tests") } +# Adding this extra test since it's not possible to set env var on the fly in Windows. +$env:MXNET_SAFE_ACCUMULATION=1 +C:\Python27\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py:test_norm +if (! $?) { Throw ("Error running tests") } diff --git a/ci/windows/test_py3_cpu.ps1 b/ci/windows/test_py3_cpu.ps1 index a7067f9f3f83..56bf5c6d4595 100644 --- a/ci/windows/test_py3_cpu.ps1 +++ b/ci/windows/test_py3_cpu.ps1 @@ -27,3 +27,7 @@ C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 - if (! $?) { Throw ("Error running unittest") } C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train if (! $?) { Throw ("Error running train tests") } +# Adding this extra test since it's not possible to set env var on the fly in Windows. +$env:MXNET_SAFE_ACCUMULATION=1 +C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_unittest.xml tests\python\unittest\test_operator.py:test_norm +if (! $?) { Throw ("Error running unittest") } diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1 index 5fbc9f2f8036..ef07b428fca2 100644 --- a/ci/windows/test_py3_gpu.ps1 +++ b/ci/windows/test_py3_gpu.ps1 @@ -31,3 +31,7 @@ C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 - if (! $?) { Throw ("Error running tests") } C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_train.xml tests\python\train if (! $?) { Throw ("Error running tests") } +# Adding this extra test since it's not possible to set env var on the fly in Windows. +$env:MXNET_SAFE_ACCUMULATION=1 +C:\Python37\python.exe -m nose -v --with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error --with-xunit --xunit-file nosetests_operator.xml tests\python\gpu\test_operator_gpu.py:test_norm +if (! $?) { Throw ("Error running tests") } diff --git a/docs/faq/env_var.md b/docs/faq/env_var.md index c5ebd54c55a1..c1c23ba969d2 100644 --- a/docs/faq/env_var.md +++ b/docs/faq/env_var.md @@ -280,6 +280,14 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca - Values: Int ```(default=4)``` - This variable controls how many CuDNN dropout state resources to create for each GPU context for use in operator. +* MXNET_SAFE_ACCUMULATION + - Values: Values: 0(false) or 1(true) ```(default=0)``` + - If this variable is set, the accumulation will enter the safe mode, meaning accumulation is done in a data type of higher precision than + the input data type, leading to more accurate accumulation results with a possible performance loss and backward compatibility loss. + For example, when the variable is set to 1(true), if the input data type is float16, then the accumulation will be done + with float32. + - Model accuracies do not necessarily improve with this environment variable turned on. + Settings for Minimum Memory Usage --------------------------------- - Make sure ```min(MXNET_EXEC_NUM_TEMP, MXNET_GPU_WORKER_NTHREADS) = 1``` diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 9fec6cd1255a..f7d9f13fd869 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -1183,12 +1183,23 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs, } else { small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false); } + if (param.ord == 1) { - ReduceAxesComputeImpl( + if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) { + ReduceAxesComputeImpl( + ctx, inputs, req, outputs, small); + } else { + ReduceAxesComputeImpl( ctx, inputs, req, outputs, small); + } } else if (param.ord == 2) { - ReduceAxesComputeImpl( + if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) { + ReduceAxesComputeImpl( ctx, inputs, req, outputs, small); + } else { + ReduceAxesComputeImpl( + ctx, inputs, req, outputs, small); + } } } diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 7db07596d7f8..01798f58f16e 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -3482,51 +3482,61 @@ def l2norm(input_data, axis=0, keepdims=True): epsilon = 1e-3 acc_type = {np.float16: np.float32, np.float32: np.float32, np.float64: np.float64, np.int32: np.int32, np.int64: np.int64} + dtype_to_str = {np.float16: 'float16', np.float32: 'float32', np.float64: 'float64', + np.int32: 'int32', np.int64: 'int64'} is_windows = sys.platform.startswith('win') - for order in [1, 2]: - for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]: - for i in range(in_data_dim): - for out_dtype in ['float32', 'float64', 'int32', 'int64']: - if (dtype == np.int32 or dtype == np.int64) and ('int' not in out_dtype or is_windows): - continue - if dtype != np.int32 and dtype != np.int64 and 'int' in out_dtype: - continue - backward_dtype = np.float32 if out_dtype == 'float32' else np.float64 - skip_backward = 'int' in out_dtype - print(order, dtype, i, out_dtype, in_shape) - in_data = np.random.uniform(-1, 1, in_shape).astype(acc_type[dtype]) - in_data[abs(in_data) < epsilon] = 2 * epsilon - norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, out_dtype=out_dtype, keepdims=True) - npy_out = l1norm(in_data, i) if order is 1 else l2norm(in_data, i) - npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out - check_symbolic_forward(norm_sym, [in_data.astype(dtype)], [npy_out.astype(out_dtype)], - rtol=1e-3, atol=1e-5, ctx=ctx) - if not skip_backward: - check_symbolic_backward(norm_sym, [in_data.astype(dtype)], - [np.ones(npy_out.shape).astype(out_dtype)], - [npy_out_backward], rtol=1e-3, atol=1e-5, ctx=ctx, - dtype=backward_dtype) - # Disable numeric gradient https://github.com/apache/incubator-mxnet/issues/11509 - # check gradient - if dtype is not np.float16 and not skip_backward: - check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon, - rtol=1e-1, atol=1e-3, dtype=backward_dtype) - if i < in_data_dim-1: - norm_sym = mx.symbol.norm(data=data, ord=order, axis=(i, i+1), keepdims=True) - npy_out = l1norm(in_data, (i, i+1)) if order is 1 else l2norm(in_data, (i, i+1)) + for enforce_safe_acc in ["1", "0"]: + if is_windows: + if enforce_safe_acc == "0": + break + enforce_safe_acc = "0" if "MXNET_SAFE_ACCUMULATION" not in os.environ else os.environ["MXNET_SAFE_ACCUMULATION"] + else: + os.environ["MXNET_SAFE_ACCUMULATION"] = enforce_safe_acc + for order in [1, 2]: + for dtype in [np.float16, np.float32, np.float64]: + for i in range(in_data_dim): + for out_dtype in ['float32', 'float64']: + backward_dtype = np.float32 if out_dtype == 'float32' else np.float64 + accumulation_type = acc_type[dtype] + if enforce_safe_acc == "0": + backward_dtype = dtype + out_dtype = dtype_to_str[dtype] + accumulation_type = dtype + skip_backward = 'int' in out_dtype + in_data = np.random.uniform(-1, 1, in_shape).astype(accumulation_type) + in_data[abs(in_data) < epsilon] = 2 * epsilon + norm_sym = mx.symbol.norm(data=data, ord=order, axis=i, out_dtype=out_dtype, keepdims=True) + npy_out = l1norm(in_data, i) if order is 1 else l2norm(in_data, i) npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out - check_symbolic_forward(norm_sym, [in_data], [npy_out.astype(dtype)], - rtol=1e-3 if dtype is np.float16 else 1e-3, - atol=1e-5 if dtype is np.float16 else 1e-5, ctx=ctx) - if not skip_backward: - check_symbolic_backward(norm_sym, [in_data], + check_symbolic_forward(norm_sym, [in_data.astype(dtype)], [npy_out.astype(out_dtype)], + rtol=1e-2 if dtype == np.float16 else 1e-3, + atol=1e-4 if dtype == np.float16 else 1e-5, ctx=ctx, dtype=dtype) + if dtype is not np.float16 and not skip_backward: + check_symbolic_backward(norm_sym, [in_data.astype(dtype)], [np.ones(npy_out.shape).astype(out_dtype)], - [npy_out_backward.astype(out_dtype)], - rtol=1e-3, atol=1e-5, ctx=ctx, dtype=backward_dtype) + [npy_out_backward], rtol=1e-3, atol=1e-5, ctx=ctx, + dtype=backward_dtype) + # Disable numeric gradient https://github.com/apache/incubator-mxnet/issues/11509 # check gradient if dtype is not np.float16 and not skip_backward: check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon, rtol=1e-1, atol=1e-3, dtype=backward_dtype) + if i < in_data_dim-1: + norm_sym = mx.symbol.norm(data=data, ord=order, axis=(i, i+1), keepdims=True) + npy_out = l1norm(in_data, (i, i+1)) if order is 1 else l2norm(in_data, (i, i+1)) + npy_out_backward = np.sign(in_data) if order is 1 else in_data/npy_out + check_symbolic_forward(norm_sym, [in_data], [npy_out.astype(dtype)], + rtol=1e-2 if dtype is np.float16 else 1e-3, + atol=1e-4 if dtype is np.float16 else 1e-5, ctx=ctx) + if dtype is not np.float16 and not skip_backward: + check_symbolic_backward(norm_sym, [in_data], + [np.ones(npy_out.shape).astype(out_dtype)], + [npy_out_backward.astype(out_dtype)], + rtol=1e-3, atol=1e-5, ctx=ctx, dtype=backward_dtype) + # check gradient + if dtype is not np.float16 and not skip_backward: + check_numeric_gradient(norm_sym, [in_data], numeric_eps=epsilon, + rtol=1e-1, atol=1e-3, dtype=backward_dtype) def test_layer_norm():