Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1426] Fix the wrong result of sum, mean, argmin, argmax when inputs contain inf or nan #16234

Merged
merged 21 commits into from
Nov 12, 2019
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 88 additions & 17 deletions 3rdparty/mshadow/mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,64 @@ struct divto {
typedef op::div OPType;
};
} // namespace sv

#ifndef __CUDA_ARCH__
using std::isnan;
using std::isinf;
#endif

/*! \brief
* determines if the given floating point
* number is not a number */
namespace isnan_typed {
template<typename DType>
MSHADOW_XINLINE bool IsNan(volatile DType val) {
return false;
}
template<>
MSHADOW_XINLINE bool IsNan(volatile float val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile double val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile long double val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
return (val.half_ & 0x7fff) > 0x7c00;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you turn these magic values into constants with documentation? While I get 0x7ffff, 0x7c00 for example, looks quite arbitrary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @marcoabreu , I add two constants MSHADOW_HALF_SIGN_BIT and MSHADOW_HALF_EXPONENT_BITS in 3rdparty/mshadow/mshadow/half.h, and replace these two magic values.

}
} // namespace isnan_typed

/*! \brief
* determines if the given floating point
* number is a positive or negative infinity */
namespace isinf_typed {
template<typename DType>
MSHADOW_XINLINE bool IsInf(volatile DType val) {
return false;
}
template<>
MSHADOW_XINLINE bool IsInf(volatile float val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile double val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile long double val) {
return isinf(val);
}
template<>
MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) {
return (val.half_ & 0x7fff) == 0x7c00;
}
} // namespace isinf_typed

wkcn marked this conversation as resolved.
Show resolved Hide resolved
/*! \brief namespace for potential reducer operations */
namespace red {
namespace limits {
Expand Down Expand Up @@ -674,6 +732,11 @@ template<>
MSHADOW_XINLINE double NegInfValue<double>(void) {
return -HUGE_VAL;
}
/*! \brief negative infinity value of float16 */
template<>
MSHADOW_XINLINE half::half_t NegInfValue<half::half_t>(void) {
return half::half_t::Binary(0xfc00);
}

/*!
* \brief maximum value of certain types
Expand Down Expand Up @@ -740,6 +803,11 @@ template<>
MSHADOW_XINLINE double PosInfValue<double>(void) {
return HUGE_VAL;
}
/*! \brief positive infinity value of float16 */
template<>
MSHADOW_XINLINE half::half_t PosInfValue<half::half_t>(void) {
return half::half_t::Binary(0x7c00);
}

} // namespace limits

Expand All @@ -755,7 +823,11 @@ struct sum {
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*)
DType y = src - residual;
DType t = dst + y;
residual = (t - dst) - y;
if (isinf_typed::IsInf(t)) {
residual = 0;
} else {
residual = (t - dst) - y;
}
dst = t;
}
/*! \brief combine the results of two reducers */
Expand All @@ -767,10 +839,15 @@ struct sum {
template<typename DType>
MSHADOW_XINLINE static void Merge(volatile DType& dst_val, volatile DType& dst_residual, volatile DType& src_val, volatile DType& src_residual) { // NOLINT(*)
DType t1 = dst_val + src_val;
DType e = t1 - dst_val;
DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
dst_val = t1 + t2;
dst_residual = t2 - (dst_val - t1);
if (isinf_typed::IsInf(t1)) {
dst_val = t1;
dst_residual = 0;
} else {
DType e = t1 - dst_val;
DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
dst_val = t1 + t2;
dst_residual = t2 - (dst_val - t1);
}
}
/*! \brief finalize reduction */
template<typename DType>
Expand Down Expand Up @@ -807,12 +884,9 @@ struct maximum {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
using namespace std;
#ifdef __CUDACC__
dst = ::max(dst, src);
#else
dst = max(dst, src);
#endif // __CUDACC__
if (!isnan_typed::IsNan(dst)) {
if (!(dst >= src)) dst = src;
wkcn marked this conversation as resolved.
Show resolved Hide resolved
}
}
/*! \brief do reduction into dst */
template<typename DType>
Expand Down Expand Up @@ -863,12 +937,9 @@ struct minimum {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
using namespace std;
#ifdef __CUDACC__
dst = ::min(dst, src);
#else
dst = min(dst, src);
#endif // __CUDACC__
if (!isnan_typed::IsNan(dst)) {
if (!(dst <= src)) dst = src;
}
}
/*! \brief do reduction into dst */
template<typename DType>
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/mshadow/mshadow/extension/reduce_with_axis.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ struct Plan<ReduceWithAxisExp<Reducer, SrcExp, DType, dimsrc, mask, dimdst>, DTy
index_t z = (x*size_+k)*trailing_+y;
DType tmp = res;
Reducer::Reduce(res, src_.Eval(z/last_, z%last_));
if (tmp != res) {
if (tmp != res && !isnan_typed::IsNan(tmp)) {
wkcn marked this conversation as resolved.
Show resolved Hide resolved
idx = k;
}
}
Expand Down
6 changes: 2 additions & 4 deletions julia/src/ndarray/reduction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
"""
argmax(x::NDArray; dims) -> indices

Note that `NaN` is skipped during comparison.
This is different from Julia `Base.argmax`.
Note that `NaN` is treated as greater than all other values in `argmax`.

## Examples

Expand Down Expand Up @@ -77,8 +76,7 @@ Base.argmax(x::NDArray; dims = :) = _argmax(x, dims) .+ 1
"""
argmin(x::NDArray; dims) -> indices

Note that `NaN` is skipped during comparison.
This is different from Julia `Base.argmin`.
Note that `NaN` is treated as less than all other values in `argmin`.

## Examples

Expand Down
16 changes: 8 additions & 8 deletions julia/test/unittest/ndarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1515,8 +1515,8 @@ function test_argmax()
4 2 6]
x = NDArray(A)

@test copy(argmax(x, dims = 1)) == [2 1 2]
@test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
@test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
@test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
end

@info "NDArray::argmax::NaN"
Expand All @@ -1525,8 +1525,8 @@ function test_argmax()
NaN 2 6]
x = NDArray(A)

@test copy(argmax(x, dims = 1)) == [1 1 2]
@test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
@test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
@test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
end
end

Expand All @@ -1537,8 +1537,8 @@ function test_argmin()
4 2 6]
x = NDArray(A)

@test copy(argmin(x, dims = 1)) == [1 2 1]
@test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
@test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
@test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
end

@info "NDArray::argmin::NaN"
Expand All @@ -1547,8 +1547,8 @@ function test_argmin()
NaN 2 6]
x = NDArray(A)

@test copy(argmin(x, dims = 1)) == [1 2 1]
@test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
@test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
@test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
end
end

Expand Down
1 change: 1 addition & 0 deletions python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4935,6 +4935,7 @@ class DLDataType(ctypes.Structure):
"bool": (1, 1, 1),
"uint32": (1, 32, 1),
"uint64": (1, 64, 1),
'float16': (2, 16, 1),
"float32": (2, 32, 1),
"float64": (2, 64, 1),
}
Expand Down
2 changes: 1 addition & 1 deletion src/operator/contrib/allclose_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ inline bool AllCloseType(const nnvm::NodeAttrs& attrs,
return (*out_attrs)[0] != -1;
}

using namespace mshadow_op::isnan_typed;
using mshadow::isnan_typed::IsNan;

template<int req>
struct allclose_forward {
Expand Down
46 changes: 12 additions & 34 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define MXNET_OPERATOR_MSHADOW_OP_H_

#include <mxnet/base.h>
#include <mshadow/base.h>
#include "math.h"
#include "math_functions-inl.h"
#include "special_functions-inl.h"
Expand All @@ -41,6 +42,8 @@ namespace mxnet {
namespace op {
namespace mshadow_op {

using mshadow::isnan_typed::IsNan;

#ifdef __CUDA_ARCH__
__constant__ const float PI = 3.14159265358979323846;
__constant__ const float SELU_ALPHA = 1.6732632423543772848170429916717;
Expand All @@ -51,7 +54,6 @@ const float PI = 3.14159265358979323846;
const float SELU_ALPHA = 1.6732632423543772848170429916717;
const float SELU_LAMBDA = 1.0507009873554804934193349852946;
const float SQRT_2 = 1.4142135623730950488016887242096;
using std::isnan;
#endif
using std::enable_if;
using std::is_unsigned;
Expand Down Expand Up @@ -854,37 +856,13 @@ struct product {
}
};

namespace isnan_typed {
template<typename DType>
MSHADOW_XINLINE bool IsNan(volatile DType val) {
return false;
}
template<>
MSHADOW_XINLINE bool IsNan(volatile float val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile double val) {
return isnan(val);
}
template<>
MSHADOW_XINLINE bool IsNan(volatile long double val) {
return isnan(val);
}

template<>
MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
return (val.half_ & 0x7fff) > 0x7c00;
}
}; // namespace isnan_typed

MXNET_UNARY_MATH_OP_NC(relu, isnan_typed::IsNan(a) || (a > DType(0)) ? a : DType(0));
MXNET_UNARY_MATH_OP_NC(relu, IsNan(a) || (a > DType(0)) ? a : DType(0));

/*! \brief used for computing gradient of relu operator */
struct relu_grad : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a) {
if (isnan_typed::IsNan(a)) {
if (IsNan(a)) {
return a;
} else {
return a > DType(0) ? DType(1) : DType(0);
Expand All @@ -896,7 +874,7 @@ struct relu_grad : public mxnet_op::tunable {
struct maximum : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
if (isnan_typed::IsNan(a)) {
if (IsNan(a)) {
return a;
} else {
return (a > b ? a : b);
Expand All @@ -908,7 +886,7 @@ struct maximum : public mxnet_op::tunable {
struct minimum : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
if (isnan_typed::IsNan(a)) {
if (IsNan(a)) {
return a;
} else {
return DType(a < b ? a : b);
Expand All @@ -921,13 +899,13 @@ struct nansum {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
if (isnan_typed::IsNan(src)) return;
if (IsNan(src)) return;
dst += src;
}
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*)
if (isnan_typed::IsNan(src)) return;
if (IsNan(src)) return;
DType y = src - residual;
DType t = dst + y;
residual = (t - dst) - y;
Expand Down Expand Up @@ -973,7 +951,7 @@ struct nansum {
struct nansum_grad : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return isnan_typed::IsNan(a) ? DType(0) : DType(1);
return IsNan(a) ? DType(0) : DType(1);
}
};

Expand All @@ -982,7 +960,7 @@ struct nanprod {
/*! \brief do reduction into dst */
template<typename DType>
MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
if (isnan_typed::IsNan(src)) return;
if (IsNan(src)) return;
dst *= src;
}
/*! \brief do reduction into dst */
Expand Down Expand Up @@ -1156,7 +1134,7 @@ struct sum {
struct nanprod_grad : public mxnet_op::tunable {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
return isnan_typed::IsNan(a) ? DType(0) : b / a;
return IsNan(a) ? DType(0) : b / a;
}
};

Expand Down
Loading