From 1245499bbff250a55390444bd801e81b8c8612d8 Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 10:40:46 +0800 Subject: [PATCH 01/18] fix meansum nan --- 3rdparty/mshadow/mshadow/base.h | 92 ++++++++++++++++++++++---- src/operator/mshadow_op.h | 46 ++++--------- tests/python/unittest/test_operator.py | 23 +++---- 3 files changed, 102 insertions(+), 59 deletions(-) diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index e0e9602c00db..f229dec1a38d 100755 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -606,6 +606,64 @@ struct divto { typedef op::div OPType; }; } // namespace sv + +#ifndef __CUDA_ARCH__ +using std::isnan; +using std::isinf; +#endif + +/*! \brief + * determines if the given floating point + * number is not a number */ +namespace isnan_typed { + template + MSHADOW_XINLINE bool IsNan(volatile DType val) { + return false; + } + template<> + MSHADOW_XINLINE bool IsNan(volatile float val) { + return isnan(val); + } + template<> + MSHADOW_XINLINE bool IsNan(volatile double val) { + return isnan(val); + } + template<> + MSHADOW_XINLINE bool IsNan(volatile long double val) { + return isnan(val); + } + template<> + MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) { + return (val.half_ & 0x7fff) > 0x7c00; + } +} // namespace isnan_typed + +/*! \brief + * determines if the given floating point + * number is a positive or negative infinity */ +namespace isinf_typed { + template + MSHADOW_XINLINE bool IsInf(volatile DType val) { + return false; + } + template<> + MSHADOW_XINLINE bool IsInf(volatile float val) { + return isinf(val); + } + template<> + MSHADOW_XINLINE bool IsInf(volatile double val) { + return isinf(val); + } + template<> + MSHADOW_XINLINE bool IsInf(volatile long double val) { + return isinf(val); + } + template<> + MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) { + return (val.half_ & 0x7fff) == 0x7c00; + } +} // namespace isinf_typed + /*! \brief namespace for potential reducer operations */ namespace red { namespace limits { @@ -669,6 +727,11 @@ template<> MSHADOW_XINLINE double NegInfValue(void) { return -HUGE_VAL; } +/*! \brief negative infinity value of float16 */ +template<> +MSHADOW_XINLINE half::half_t NegInfValue(void) { + return half::half_t::Binary(0xfc00); +} /*! * \brief maximum value of certain types @@ -730,6 +793,11 @@ template<> MSHADOW_XINLINE double PosInfValue(void) { return HUGE_VAL; } +/*! \brief positive infinity value of float16 */ +template<> +MSHADOW_XINLINE half::half_t PosInfValue(void) { + return half::half_t::Binary(0x7c00); +} } // namespace limits @@ -745,7 +813,11 @@ struct sum { MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*) DType y = src - residual; DType t = dst + y; - residual = (t - dst) - y; + if (isinf_typed::IsInf(t)) { + residual = 0; + } else { + residual = (t - dst) - y; + } dst = t; } /*! \brief combine the results of two reducers */ @@ -797,12 +869,9 @@ struct maximum { /*! \brief do reduction into dst */ template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) - using namespace std; -#ifdef __CUDACC__ - dst = ::max(dst, src); -#else - dst = max(dst, src); -#endif // __CUDACC__ + if (!isnan_typed::IsNan(dst)) { + dst = DType(dst > src ? dst : src); + } } /*! \brief do reduction into dst */ template @@ -853,12 +922,9 @@ struct minimum { /*! \brief do reduction into dst */ template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) - using namespace std; -#ifdef __CUDACC__ - dst = ::min(dst, src); -#else - dst = min(dst, src); -#endif // __CUDACC__ + if (!isnan_typed::IsNan(dst)) { + dst = DType(dst < src ? dst : src); + } } /*! \brief do reduction into dst */ template diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index e14c8dbb0b78..08d81f6f443f 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -27,6 +27,7 @@ #define MXNET_OPERATOR_MSHADOW_OP_H_ #include +#include #include "math.h" #include "math_functions-inl.h" #include "special_functions-inl.h" @@ -41,6 +42,8 @@ namespace mxnet { namespace op { namespace mshadow_op { +using mshadow::isnan_typed::IsNan; + #ifdef __CUDA_ARCH__ __constant__ const float PI = 3.14159265358979323846; __constant__ const float SELU_ALPHA = 1.6732632423543772848170429916717; @@ -51,7 +54,6 @@ const float PI = 3.14159265358979323846; const float SELU_ALPHA = 1.6732632423543772848170429916717; const float SELU_LAMBDA = 1.0507009873554804934193349852946; const float SQRT_2 = 1.4142135623730950488016887242096; -using std::isnan; #endif using std::enable_if; using std::is_unsigned; @@ -826,37 +828,13 @@ struct product { } }; -namespace isnan_typed { - template - MSHADOW_XINLINE bool IsNan(volatile DType val) { - return false; - } - template<> - MSHADOW_XINLINE bool IsNan(volatile float val) { - return isnan(val); - } - template<> - MSHADOW_XINLINE bool IsNan(volatile double val) { - return isnan(val); - } - template<> - MSHADOW_XINLINE bool IsNan(volatile long double val) { - return isnan(val); - } - - template<> - MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) { - return (val.half_ & 0x7fff) > 0x7c00; - } -}; // namespace isnan_typed - -MXNET_UNARY_MATH_OP_NC(relu, isnan_typed::IsNan(a) || (a > DType(0)) ? a : DType(0)); +MXNET_UNARY_MATH_OP_NC(relu, IsNan(a) || (a > DType(0)) ? a : DType(0)); /*! \brief used for computing gradient of relu operator */ struct relu_grad : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a) { - if (isnan_typed::IsNan(a)) { + if (IsNan(a)) { return a; } else { return a > DType(0) ? DType(1) : DType(0); @@ -868,7 +846,7 @@ struct relu_grad : public mxnet_op::tunable { struct maximum : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { - if (isnan_typed::IsNan(a)) { + if (IsNan(a)) { return a; } else { return (a > b ? a : b); @@ -880,7 +858,7 @@ struct maximum : public mxnet_op::tunable { struct minimum : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { - if (isnan_typed::IsNan(a)) { + if (IsNan(a)) { return a; } else { return DType(a < b ? a : b); @@ -893,13 +871,13 @@ struct nansum { /*! \brief do reduction into dst */ template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) - if (isnan_typed::IsNan(src)) return; + if (IsNan(src)) return; dst += src; } /*! \brief do reduction into dst */ template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*) - if (isnan_typed::IsNan(src)) return; + if (IsNan(src)) return; DType y = src - residual; DType t = dst + y; residual = (t - dst) - y; @@ -945,7 +923,7 @@ struct nansum { struct nansum_grad : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { - return isnan_typed::IsNan(a) ? DType(0) : DType(1); + return IsNan(a) ? DType(0) : DType(1); } }; @@ -954,7 +932,7 @@ struct nanprod { /*! \brief do reduction into dst */ template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) - if (isnan_typed::IsNan(src)) return; + if (IsNan(src)) return; dst *= src; } /*! \brief do reduction into dst */ @@ -1128,7 +1106,7 @@ struct sum { struct nanprod_grad : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { - return isnan_typed::IsNan(a) ? DType(0) : b / a; + return IsNan(a) ? DType(0) : b / a; } }; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 66bd9ec6b489..136623630a72 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9244,21 +9244,20 @@ def test_sample_normal_default_shape(): assert s.shape == (1, 1) -def test_min_max_inf(): - dtypes = [np.float32, np.double] - elem_list = [-1, 1, 0, np.inf, -np.inf] - +def test_inf_and_nan(): + dtypes = [np.float16, np.float32, np.double] + elem_list = [-1, 1, 0, np.inf, -np.inf, np.nan] + op_names = ['min', 'max', 'mean', 'sum'] for dtype in dtypes: for a in elem_list: for b in elem_list: - data_np = np.array([a, b], dtype=dtype) - data_mx = mx.nd.array(data_np, dtype=dtype) - - min_data_np, max_data_np = data_np.min(), data_np.max() - min_data_mx, max_data_mx = data_mx.min(), data_mx.max() - - assert_array_equal(min_data_np, min_data_mx.asnumpy()) - assert_array_equal(max_data_np, max_data_mx.asnumpy()) + for op_name in op_names: + print(dtype, a, b, op_name) + data_np = np.array([a, b], dtype=dtype) + data_mx = mx.nd.array(data_np, dtype=dtype) + out_data_np = getattr(data_np, op_name)() + out_data_mx = getattr(data_mx, op_name)() + assert_array_equal(out_data_np, out_data_mx.asnumpy()) if __name__ == '__main__': From 100d9c1c8e346ae001d91ba05a5ede703ae885e8 Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 10:48:46 +0800 Subject: [PATCH 02/18] remove print in testcase --- tests/python/unittest/test_operator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 136623630a72..68e3d4bb0e98 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9252,7 +9252,6 @@ def test_inf_and_nan(): for a in elem_list: for b in elem_list: for op_name in op_names: - print(dtype, a, b, op_name) data_np = np.array([a, b], dtype=dtype) data_mx = mx.nd.array(data_np, dtype=dtype) out_data_np = getattr(data_np, op_name)() From 4aa2dc6cc45af0fbfdc79e207dd5e5c0c84eab6c Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 10:53:51 +0800 Subject: [PATCH 03/18] update to avoid assignment --- 3rdparty/mshadow/mshadow/base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index f229dec1a38d..15d0fdde0d80 100755 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -870,7 +870,7 @@ struct maximum { template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) if (!isnan_typed::IsNan(dst)) { - dst = DType(dst > src ? dst : src); + if (!(dst > src)) dst = src; } } /*! \brief do reduction into dst */ @@ -923,7 +923,7 @@ struct minimum { template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) if (!isnan_typed::IsNan(dst)) { - dst = DType(dst < src ? dst : src); + if (!(dst < src)) dst = src; } } /*! \brief do reduction into dst */ From 1d557e09056b12f88b38ab08a511f71b8152ac52 Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 10:59:50 +0800 Subject: [PATCH 04/18] update --- 3rdparty/mshadow/mshadow/base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index 15d0fdde0d80..fa497d3c9cc3 100755 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -870,7 +870,7 @@ struct maximum { template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) if (!isnan_typed::IsNan(dst)) { - if (!(dst > src)) dst = src; + if (!(dst >= src)) dst = src; } } /*! \brief do reduction into dst */ @@ -923,7 +923,7 @@ struct minimum { template MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*) if (!isnan_typed::IsNan(dst)) { - if (!(dst < src)) dst = src; + if (!(dst <= src)) dst = src; } } /*! \brief do reduction into dst */ From 1d794479be3e657c10ba99b8b4214fbfe71750ab Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 17:10:26 +0800 Subject: [PATCH 05/18] fix argmin and argmax, update julia unittest --- 3rdparty/mshadow/mshadow/extension/reduce_with_axis.h | 2 +- julia/test/unittest/ndarray.jl | 8 ++++---- tests/python/unittest/test_operator.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h b/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h index 54bcc750cfc5..26b6156ad6f9 100644 --- a/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h +++ b/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h @@ -112,7 +112,7 @@ struct Plan, DTy index_t z = (x*size_+k)*trailing_+y; DType tmp = res; Reducer::Reduce(res, src_.Eval(z/last_, z%last_)); - if (tmp != res) { + if (tmp != res && !isnan_typed::IsNan(tmp)) { idx = k; } } diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl index 638963f1b8aa..ac91e8d0f76e 100644 --- a/julia/test/unittest/ndarray.jl +++ b/julia/test/unittest/ndarray.jl @@ -1525,8 +1525,8 @@ function test_argmax() NaN 2 6] x = NDArray(A) - @test copy(argmax(x, dims = 1)) == [1 1 2] - @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1) + @test copy(argmax(x, dims = 1)) == [2 1 2] + @test copy(argmax(x, dims = 2)) == reshape([2, 1], :, 1) end end @@ -1547,8 +1547,8 @@ function test_argmin() NaN 2 6] x = NDArray(A) - @test copy(argmin(x, dims = 1)) == [1 2 1] - @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1) + @test copy(argmin(x, dims = 1)) == [2 2 1] + @test copy(argmin(x, dims = 2)) == reshape([1, 1], :, 1) end end diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 68e3d4bb0e98..ee2598cf5c84 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9247,7 +9247,7 @@ def test_sample_normal_default_shape(): def test_inf_and_nan(): dtypes = [np.float16, np.float32, np.double] elem_list = [-1, 1, 0, np.inf, -np.inf, np.nan] - op_names = ['min', 'max', 'mean', 'sum'] + op_names = ['min', 'max', 'mean', 'sum', 'argmin', 'argmax'] for dtype in dtypes: for a in elem_list: for b in elem_list: From 9b744d5a5a10c007956069f7c294e30c92725662 Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 18:39:38 +0800 Subject: [PATCH 06/18] update argmin/argmax docs in julia bindings --- julia/src/ndarray/reduction.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/julia/src/ndarray/reduction.jl b/julia/src/ndarray/reduction.jl index 833b483ca321..2045ce231674 100644 --- a/julia/src/ndarray/reduction.jl +++ b/julia/src/ndarray/reduction.jl @@ -47,8 +47,7 @@ broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} = """ argmax(x::NDArray; dims) -> indices -Note that `NaN` is skipped during comparison. -This is different from Julia `Base.argmax`. +Note that `NaN` is treated as greater than all other values in `argmax`. ## Examples @@ -77,8 +76,7 @@ Base.argmax(x::NDArray; dims = :) = _argmax(x, dims) .+ 1 """ argmin(x::NDArray; dims) -> indices -Note that `NaN` is skipped during comparison. -This is different from Julia `Base.argmin`. +Note that `NaN` is treated as less than all other values in `argmin`. ## Examples From 66280f1870ac4f1bbfa9f9f9781aa22fe29bb46d Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 21:56:39 +0800 Subject: [PATCH 07/18] debug --- tests/python/unittest/test_operator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index ee2598cf5c84..de843ec173b4 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9252,6 +9252,7 @@ def test_inf_and_nan(): for a in elem_list: for b in elem_list: for op_name in op_names: + print(dtype, a, b, op_name) data_np = np.array([a, b], dtype=dtype) data_mx = mx.nd.array(data_np, dtype=dtype) out_data_np = getattr(data_np, op_name)() From 848c57b84618c7af14c982bc80ad28a7d975eb3c Mon Sep 17 00:00:00 2001 From: wkcn Date: Sun, 22 Sep 2019 23:26:19 +0800 Subject: [PATCH 08/18] update --- tests/python/unittest/test_operator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index de843ec173b4..d75535042b94 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9248,16 +9248,20 @@ def test_inf_and_nan(): dtypes = [np.float16, np.float32, np.double] elem_list = [-1, 1, 0, np.inf, -np.inf, np.nan] op_names = ['min', 'max', 'mean', 'sum', 'argmin', 'argmax'] + record = [] for dtype in dtypes: for a in elem_list: for b in elem_list: for op_name in op_names: - print(dtype, a, b, op_name) data_np = np.array([a, b], dtype=dtype) data_mx = mx.nd.array(data_np, dtype=dtype) out_data_np = getattr(data_np, op_name)() out_data_mx = getattr(data_mx, op_name)() - assert_array_equal(out_data_np, out_data_mx.asnumpy()) + try: + assert_array_equal(out_data_np, out_data_mx.asnumpy()) + except AssertionError: + record.append((dtype, a, b, op_name)) + assert len(record) == 0, record if __name__ == '__main__': From 4de6bacf27ae2ec6f54aef245b3b08e33ae3272a Mon Sep 17 00:00:00 2001 From: wkcn Date: Mon, 23 Sep 2019 00:03:27 +0800 Subject: [PATCH 09/18] update test --- tests/python/unittest/test_operator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index d75535042b94..c80c56d0dd88 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9259,8 +9259,10 @@ def test_inf_and_nan(): out_data_mx = getattr(data_mx, op_name)() try: assert_array_equal(out_data_np, out_data_mx.asnumpy()) - except AssertionError: - record.append((dtype, a, b, op_name)) + except AssertionError as e: + args = (dtype, a, b, op_name) + print(args, e, '\n---------\n') + record.append(args) assert len(record) == 0, record From 9c3a72cf608ace147127e09d0273c3cae0de5a15 Mon Sep 17 00:00:00 2001 From: wkcn Date: Mon, 23 Sep 2019 00:25:37 +0800 Subject: [PATCH 10/18] fix sum merge --- 3rdparty/mshadow/mshadow/base.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index fa497d3c9cc3..a55c3dae26da 100755 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -829,10 +829,15 @@ struct sum { template MSHADOW_XINLINE static void Merge(volatile DType& dst_val, volatile DType& dst_residual, volatile DType& src_val, volatile DType& src_residual) { // NOLINT(*) DType t1 = dst_val + src_val; - DType e = t1 - dst_val; - DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual; - dst_val = t1 + t2; - dst_residual = t2 - (dst_val - t1); + if (isinf_typed::IsInf(t1)) { + dst_val = t1; + dst_residual = 0; + } else { + DType e = t1 - dst_val; + DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual; + dst_val = t1 + t2; + dst_residual = t2 - (dst_val - t1); + } } /*! \brief finalize reduction */ template From 9f2e4fc7f2a37702036706c0f1ac0acac897979e Mon Sep 17 00:00:00 2001 From: wkcn Date: Tue, 24 Sep 2019 11:10:43 +0800 Subject: [PATCH 11/18] update testcase --- julia/test/unittest/ndarray.jl | 16 ++++---- python/mxnet/ndarray/ndarray.py | 1 + tests/python/unittest/test_ndarray.py | 54 +++++++++++++++++++++----- tests/python/unittest/test_operator.py | 22 ----------- 4 files changed, 53 insertions(+), 40 deletions(-) diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl index ac91e8d0f76e..5d18ac8ac4b3 100644 --- a/julia/test/unittest/ndarray.jl +++ b/julia/test/unittest/ndarray.jl @@ -1515,8 +1515,8 @@ function test_argmax() 4 2 6] x = NDArray(A) - @test copy(argmax(x, dims = 1)) == [2 1 2] - @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1) + @test copy(argmax(x, dims = 1)) == [x[1] for x in argmax(A, dims = 1)] + @test copy(argmax(x, dims = 2)) == [x[2] for x in argmax(A, dims = 2)] end @info "NDArray::argmax::NaN" @@ -1525,8 +1525,8 @@ function test_argmax() NaN 2 6] x = NDArray(A) - @test copy(argmax(x, dims = 1)) == [2 1 2] - @test copy(argmax(x, dims = 2)) == reshape([2, 1], :, 1) + @test copy(argmax(x, dims = 1)) == [x[1] for x in argmax(A, dims = 1)] + @test copy(argmax(x, dims = 2)) == [x[2] for x in argmax(A, dims = 2)] end end @@ -1537,8 +1537,8 @@ function test_argmin() 4 2 6] x = NDArray(A) - @test copy(argmin(x, dims = 1)) == [1 2 1] - @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1) + @test copy(argmin(x, dims = 1)) == [x[1] for x in argmin(A, dims = 1)] + @test copy(argmin(x, dims = 2)) == [x[2] for x in argmin(A, dims = 2)] end @info "NDArray::argmin::NaN" @@ -1547,8 +1547,8 @@ function test_argmin() NaN 2 6] x = NDArray(A) - @test copy(argmin(x, dims = 1)) == [2 2 1] - @test copy(argmin(x, dims = 2)) == reshape([1, 1], :, 1) + @test copy(argmin(x, dims = 1)) == [x[1] for x in argmin(A, dims = 1)] + @test copy(argmin(x, dims = 2)) == [x[2] for x in argmin(A, dims = 2)] end end diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 4e3c7efa7be3..162687c9bbeb 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -4909,6 +4909,7 @@ class DLDataType(ctypes.Structure): "bool": (1, 1, 1), "uint32": (1, 32, 1), "uint64": (1, 64, 1), + 'float16': (2, 16, 1), "float32": (2, 32, 1), "float64": (2, 64, 1), } diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index bee4bff0f7c0..d81459a83504 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -21,6 +21,7 @@ from itertools import permutations, combinations_with_replacement import os import pickle as pkl +import random import functools from nose.tools import assert_raises, raises from common import with_seed, assertRaises, TemporaryDirectory @@ -31,7 +32,7 @@ from mxnet.test_utils import same from mxnet.test_utils import random_sample, rand_shape_nd, random_arrays from mxnet import runtime -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose, assert_array_equal, assert_array_almost_equal import mxnet.autograd @@ -578,13 +579,40 @@ def test_dot(): @with_seed() def test_reduce(): - sample_num = 200 - def test_reduce_inner(numpy_reduce_func, nd_reduce_func, multi_axes): + sample_num = 300 + def test_reduce_inner(numpy_reduce_func, nd_reduce_func, multi_axes, + allow_almost_equal=False, check_dtype=True): + dtypes = [(np.float16, 1), + (np.float32, 5), + (np.double, 6)] for i in range(sample_num): + dtype, decimal = random.choice(dtypes) ndim = np.random.randint(1, 6) shape = np.random.randint(1, 11, size=ndim) - dat = np.random.rand(*shape) - 0.5 + dat = (np.random.rand(*shape) - 0.5).astype(dtype) keepdims = np.random.randint(0, 2) + + allow_nan = np.random.randint(0, 2) + if allow_nan: + total_nans = np.random.randint(0, dat.size//10+1) + dat.ravel()[np.random.choice( + dat.size, total_nans, replace=False)] = np.nan + + allow_inf = np.random.randint(0, 2) + if allow_inf: + r = np.random.randint(0, 3) + total_infs = np.random.randint(0, dat.size//20+1) + if r == 0: + total_pos_infs, total_neg_infs = total_infs, 0 + elif r == 1: + total_pos_infs, total_neg_infs = 0, total_infs + else: + total_pos_infs = total_neg_infs = total_infs // 2 + dat.ravel()[np.random.choice( + dat.size, total_pos_infs, replace=False)] = np.inf + dat.ravel()[np.random.choice( + dat.size, total_neg_infs, replace=False)] = -np.inf + if multi_axes: axis_flags = np.random.randint(0, 2, size=ndim) axes = [] @@ -599,16 +627,22 @@ def test_reduce_inner(numpy_reduce_func, nd_reduce_func, multi_axes): axes = np.random.randint(0, ndim) numpy_ret = numpy_reduce_func(dat, axis=axes, keepdims=keepdims) - ndarray_ret = nd_reduce_func(mx.nd.array(dat), axis=axes, keepdims=keepdims) + mx_arr = mx.nd.array(dat, dtype=dtype) + ndarray_ret = nd_reduce_func(mx_arr, axis=axes, keepdims=keepdims) if type(ndarray_ret) is mx.ndarray.NDArray: ndarray_ret = ndarray_ret.asnumpy() assert (ndarray_ret.shape == numpy_ret.shape) or \ (ndarray_ret.shape == (1,) and numpy_ret.shape == ()), "nd:%s, numpy:%s" \ %(ndarray_ret.shape, numpy_ret.shape) - err = np.square(ndarray_ret - numpy_ret).mean() - assert err < 1E-4 + if check_dtype: + assert ndarray_ret.dtype == numpy_ret.dtype,\ + (ndarray_ret.dtype, numpy_ret.dtype) + if allow_almost_equal: + assert_array_almost_equal(ndarray_ret, numpy_ret, decimal=decimal) + else: + assert_array_equal(ndarray_ret, numpy_ret) test_reduce_inner(lambda data, axis, keepdims:np_reduce(data, axis, keepdims, np.sum), - mx.nd.sum, True) + mx.nd.sum, True, allow_almost_equal=True) test_reduce_inner(lambda data, axis, keepdims:np_reduce(data, axis, keepdims, np.max), mx.nd.max, True) test_reduce_inner(lambda data, axis, keepdims:np_reduce(data, axis, keepdims, np.min), @@ -617,10 +651,10 @@ def test_reduce_inner(numpy_reduce_func, nd_reduce_func, multi_axes): # Force numpy to match mxnet's float32. test_reduce_inner(lambda data, axis, keepdims:np_reduce(np.float32(data), axis, keepdims, np.argmax), - mx.nd.argmax, False) + mx.nd.argmax, False, check_dtype=False) test_reduce_inner(lambda data, axis, keepdims:np_reduce(np.float32(data), axis, keepdims, np.argmin), - mx.nd.argmin, False) + mx.nd.argmin, False, check_dtype=False) @with_seed() diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index c80c56d0dd88..8a2d566d9c52 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9244,28 +9244,6 @@ def test_sample_normal_default_shape(): assert s.shape == (1, 1) -def test_inf_and_nan(): - dtypes = [np.float16, np.float32, np.double] - elem_list = [-1, 1, 0, np.inf, -np.inf, np.nan] - op_names = ['min', 'max', 'mean', 'sum', 'argmin', 'argmax'] - record = [] - for dtype in dtypes: - for a in elem_list: - for b in elem_list: - for op_name in op_names: - data_np = np.array([a, b], dtype=dtype) - data_mx = mx.nd.array(data_np, dtype=dtype) - out_data_np = getattr(data_np, op_name)() - out_data_mx = getattr(data_mx, op_name)() - try: - assert_array_equal(out_data_np, out_data_mx.asnumpy()) - except AssertionError as e: - args = (dtype, a, b, op_name) - print(args, e, '\n---------\n') - record.append(args) - assert len(record) == 0, record - - if __name__ == '__main__': import nose nose.runmodule() From 714951c06ceee16eb9414b3338fd5d8bc4be0192 Mon Sep 17 00:00:00 2001 From: wkcn Date: Tue, 24 Sep 2019 13:32:32 +0800 Subject: [PATCH 12/18] update including sign --- julia/test/unittest/ndarray.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl index 5d18ac8ac4b3..599b0a65bfc4 100644 --- a/julia/test/unittest/ndarray.jl +++ b/julia/test/unittest/ndarray.jl @@ -1515,8 +1515,8 @@ function test_argmax() 4 2 6] x = NDArray(A) - @test copy(argmax(x, dims = 1)) == [x[1] for x in argmax(A, dims = 1)] - @test copy(argmax(x, dims = 2)) == [x[2] for x in argmax(A, dims = 2)] + @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)] + @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)] end @info "NDArray::argmax::NaN" @@ -1525,8 +1525,8 @@ function test_argmax() NaN 2 6] x = NDArray(A) - @test copy(argmax(x, dims = 1)) == [x[1] for x in argmax(A, dims = 1)] - @test copy(argmax(x, dims = 2)) == [x[2] for x in argmax(A, dims = 2)] + @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)] + @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)] end end @@ -1537,8 +1537,8 @@ function test_argmin() 4 2 6] x = NDArray(A) - @test copy(argmin(x, dims = 1)) == [x[1] for x in argmin(A, dims = 1)] - @test copy(argmin(x, dims = 2)) == [x[2] for x in argmin(A, dims = 2)] + @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)] + @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)] end @info "NDArray::argmin::NaN" @@ -1547,8 +1547,8 @@ function test_argmin() NaN 2 6] x = NDArray(A) - @test copy(argmin(x, dims = 1)) == [x[1] for x in argmin(A, dims = 1)] - @test copy(argmin(x, dims = 2)) == [x[2] for x in argmin(A, dims = 2)] + @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)] + @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)] end end From 6666bd6834454786fa9baf6ebd8e4274024c3c19 Mon Sep 17 00:00:00 2001 From: wkcn Date: Thu, 17 Oct 2019 17:14:15 +0800 Subject: [PATCH 13/18] fix allclose --- src/operator/contrib/allclose_op-inl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/contrib/allclose_op-inl.h b/src/operator/contrib/allclose_op-inl.h index a858450f0007..32a4244484a0 100644 --- a/src/operator/contrib/allclose_op-inl.h +++ b/src/operator/contrib/allclose_op-inl.h @@ -84,7 +84,7 @@ inline bool AllCloseType(const nnvm::NodeAttrs& attrs, return (*out_attrs)[0] != -1; } -using namespace mshadow_op::isnan_typed; +using mshadow::isnan_typed::IsNan; template struct allclose_forward { From dca0c2a02ca78984eede93a4a81283ea6dd5e66b Mon Sep 17 00:00:00 2001 From: JackieWu Date: Fri, 18 Oct 2019 01:14:39 +0800 Subject: [PATCH 14/18] ci From 9183cb3a2284a7d920bb720a19c38dc49d66fdea Mon Sep 17 00:00:00 2001 From: wkcn Date: Tue, 12 Nov 2019 09:39:54 +0800 Subject: [PATCH 15/18] use constants --- 3rdparty/mshadow/mshadow/base.h | 9 +++++---- 3rdparty/mshadow/mshadow/half.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h index a55c3dae26da..acaeb1bb1890 100755 --- a/3rdparty/mshadow/mshadow/base.h +++ b/3rdparty/mshadow/mshadow/base.h @@ -634,7 +634,7 @@ namespace isnan_typed { } template<> MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) { - return (val.half_ & 0x7fff) > 0x7c00; + return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) > MSHADOW_HALF_EXPONENT_BITS; } } // namespace isnan_typed @@ -660,7 +660,7 @@ namespace isinf_typed { } template<> MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) { - return (val.half_ & 0x7fff) == 0x7c00; + return (val.half_ & (~MSHADOW_HALF_SIGN_BIT)) == MSHADOW_HALF_EXPONENT_BITS; } } // namespace isinf_typed @@ -730,7 +730,8 @@ MSHADOW_XINLINE double NegInfValue(void) { /*! \brief negative infinity value of float16 */ template<> MSHADOW_XINLINE half::half_t NegInfValue(void) { - return half::half_t::Binary(0xfc00); + return half::half_t::Binary( + MSHADOW_HALF_SIGN_BIT | MSHADOW_HALF_EXPONENT_BITS); } /*! @@ -796,7 +797,7 @@ MSHADOW_XINLINE double PosInfValue(void) { /*! \brief positive infinity value of float16 */ template<> MSHADOW_XINLINE half::half_t PosInfValue(void) { - return half::half_t::Binary(0x7c00); + return half::half_t::Binary(MSHADOW_HALF_EXPONENT_BITS); } } // namespace limits diff --git a/3rdparty/mshadow/mshadow/half.h b/3rdparty/mshadow/mshadow/half.h index 2dded0a7752e..1cc53ae0460f 100644 --- a/3rdparty/mshadow/mshadow/half.h +++ b/3rdparty/mshadow/mshadow/half.h @@ -349,6 +349,8 @@ MSHADOW_HALF_OPERATOR(bool, <=) #define MSHADOW_HALF_MIN mshadow::half::half_t::Binary(0xFBFF); #define MSHADOW_HALF_MAX mshadow::half::half_t::Binary(0x7BFF); +#define MSHADOW_HALF_SIGN_BIT 0x8000 +#define MSHADOW_HALF_EXPONENT_BITS 0x7c00 } // namespace half } // namespace mshadow #endif // MSHADOW_HALF_H_ From 3b51b794036bea9fbb9ae15eb7b34a93286725fa Mon Sep 17 00:00:00 2001 From: wkcn Date: Tue, 12 Nov 2019 11:08:03 +0800 Subject: [PATCH 16/18] fix build for isinf and isnan --- src/operator/mshadow_op.h | 2 +- src/operator/tensor/elemwise_unary_op.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 4b00af1811d9..4ae587188d1b 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -43,7 +43,7 @@ namespace op { namespace mshadow_op { using mshadow::isnan_typed::IsNan; -using mshadow::isnan_typed::IsInf; +using mshadow::isinf_typed::IsInf; #ifdef __CUDA_ARCH__ __constant__ const float PI = 3.14159265358979323846; diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 27013dfb98ae..577c994a8ee1 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -699,9 +699,9 @@ struct nan_to_num_forward { const DType posinf, const DType neginf) { DType val = in_data[i]; - if (mshadow_op::isnan_typed::IsNan(val)) val = nan; - if (val > 0 && mshadow_op::isinf_typed::IsInf(val)) val = posinf; - if (val < 0 && mshadow_op::isinf_typed::IsInf(val)) val = neginf; + if (mshadow_op::IsNan(val)) val = nan; + if (val > 0 && mshadow_op::IsInf(val)) val = posinf; + if (val < 0 && mshadow_op::IsInf(val)) val = neginf; KERNEL_ASSIGN(out_data[i], req, val); } }; @@ -758,9 +758,9 @@ struct nan_to_num_backward { const DType* out_grad, const DType* in_data) { DType val = out_grad[i]; - if (mshadow_op::isnan_typed::IsNan(in_data[i])) val = 0; - if (val > 0 && mshadow_op::isinf_typed::IsInf(in_data[i])) val = 0; - if (val < 0 && mshadow_op::isinf_typed::IsInf(in_data[i])) val = 0; + if (mshadow_op::IsNan(in_data[i])) val = 0; + if (val > 0 && mshadow_op::IsInf(in_data[i])) val = 0; + if (val < 0 && mshadow_op::IsInf(in_data[i])) val = 0; KERNEL_ASSIGN(in_grad[i], req, val); } }; From c8761ada298ed823cd59b67e9683b87bdc8ccba7 Mon Sep 17 00:00:00 2001 From: JackieWu Date: Tue, 12 Nov 2019 14:53:26 +0800 Subject: [PATCH 17/18] ci From 3a2f062c5e5809b9abf24b9331fd597d09f02490 Mon Sep 17 00:00:00 2001 From: JackieWu Date: Tue, 12 Nov 2019 19:05:13 +0800 Subject: [PATCH 18/18] ci