apache · wkcn · Nov 12, 2019 · Sep 22, 2019 · Sep 22, 2019 · Sep 22, 2019
diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -606,6 +606,64 @@ struct divto {
   typedef op::div OPType;
 };
 }  // namespace sv
+
+#ifndef __CUDA_ARCH__
+using std::isnan;
+using std::isinf;
+#endif
+
+/*! \brief
+ *  determines if the given floating point
+ *  number is not a number */
+namespace isnan_typed {
+  template<typename DType>
+  MSHADOW_XINLINE bool IsNan(volatile DType val) {
+    return false;
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile float val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile double val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile long double val) {
+    return isnan(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
+    return (val.half_ & 0x7fff) > 0x7c00;
+  }
+}  // namespace isnan_typed
+
+/*! \brief
+ *  determines if the given floating point
+ *  number is a positive or negative infinity */
+namespace isinf_typed {
+  template<typename DType>
+  MSHADOW_XINLINE bool IsInf(volatile DType val) {
+    return false;
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile float val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile double val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile long double val) {
+    return isinf(val);
+  }
+  template<>
+  MSHADOW_XINLINE bool IsInf(volatile mshadow::half::half_t val) {
+    return (val.half_ & 0x7fff) == 0x7c00;
+  }
+}  // namespace isinf_typed
+
 /*! \brief namespace for potential reducer operations */
 namespace red {
 namespace limits {
@@ -674,6 +732,11 @@ template<>
 MSHADOW_XINLINE double NegInfValue<double>(void) {
   return -HUGE_VAL;
 }
+/*! \brief negative infinity value of float16 */
+template<>
+MSHADOW_XINLINE half::half_t NegInfValue<half::half_t>(void) {
+  return half::half_t::Binary(0xfc00);
+}
 
 /*!
  * \brief maximum value of certain types
@@ -740,6 +803,11 @@ template<>
 MSHADOW_XINLINE double PosInfValue<double>(void) {
   return HUGE_VAL;
 }
+/*! \brief positive infinity value of float16 */
+template<>
+MSHADOW_XINLINE half::half_t PosInfValue<half::half_t>(void) {
+  return half::half_t::Binary(0x7c00);
+}
 
 }  // namespace limits
 
@@ -755,7 +823,11 @@ struct sum {
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src, volatile DType& residual) { // NOLINT(*)
     DType y = src - residual;
     DType t = dst + y;
-    residual = (t - dst) - y;
+    if (isinf_typed::IsInf(t)) {
+      residual = 0;
+    } else {
+      residual = (t - dst) - y;
+    }
     dst = t;
   }
   /*! \brief combine the results of two reducers */
@@ -767,10 +839,15 @@ struct sum {
   template<typename DType>
   MSHADOW_XINLINE static void Merge(volatile DType& dst_val, volatile DType& dst_residual, volatile DType& src_val, volatile DType& src_residual) { // NOLINT(*)
     DType t1 = dst_val + src_val;
-    DType e = t1 - dst_val;
-    DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
-    dst_val = t1 + t2;
-    dst_residual = t2 - (dst_val - t1);
+    if (isinf_typed::IsInf(t1)) {
+      dst_val = t1;
+      dst_residual = 0;
+    } else {
+      DType e = t1 - dst_val;
+      DType t2 = ((src_val - e) + (dst_val - (t1 - e))) + dst_residual + src_residual;
+      dst_val = t1 + t2;
+      dst_residual = t2 - (dst_val - t1);
+    }
   }
   /*! \brief finalize reduction */
   template<typename DType>
@@ -807,12 +884,9 @@ struct maximum {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src) { // NOLINT(*)
-    using namespace std;
-#ifdef __CUDACC__
-    dst = ::max(dst, src);
-#else
-    dst = max(dst, src);
-#endif  // __CUDACC__
+    if (!isnan_typed::IsNan(dst)) {
+      if (!(dst >= src)) dst = src;
+    }
   }
   /*! \brief do reduction into dst */
   template<typename DType>
@@ -863,12 +937,9 @@ struct minimum {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src) { // NOLINT(*)
-    using namespace std;
-#ifdef __CUDACC__
-    dst = ::min(dst, src);
-#else
-    dst = min(dst, src);
-#endif  // __CUDACC__
+    if (!isnan_typed::IsNan(dst)) {
+      if (!(dst <= src)) dst = src;
+    }
   }
   /*! \brief do reduction into dst */
   template<typename DType>

diff --git a/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h b/3rdparty/mshadow/mshadow/extension/reduce_with_axis.h
@@ -112,7 +112,7 @@ struct Plan<ReduceWithAxisExp<Reducer, SrcExp, DType, dimsrc, mask, dimdst>, DTy
         index_t z = (x*size_+k)*trailing_+y;
         DType tmp = res;
         Reducer::Reduce(res, src_.Eval(z/last_, z%last_));
-        if (tmp != res) {
+        if (tmp != res && !isnan_typed::IsNan(tmp)) {
           idx = k;
         }
       }

diff --git a/julia/src/ndarray/reduction.jl b/julia/src/ndarray/reduction.jl
@@ -47,8 +47,7 @@ broadcasted(::typeof(min), x::NDArray{T}, y::NDArray{T}) where {T} =
 """
     argmax(x::NDArray; dims) -> indices
 
-Note that `NaN` is skipped during comparison.
-This is different from Julia `Base.argmax`.
+Note that `NaN` is treated as greater than all other values in `argmax`.
 
 ## Examples
 
@@ -77,8 +76,7 @@ Base.argmax(x::NDArray; dims = :) = _argmax(x, dims) .+ 1
 """
     argmin(x::NDArray; dims) -> indices
 
-Note that `NaN` is skipped during comparison.
-This is different from Julia `Base.argmin`.
+Note that `NaN` is treated as less than all other values in `argmin`.
 
 ## Examples
 

diff --git a/julia/test/unittest/ndarray.jl b/julia/test/unittest/ndarray.jl
@@ -1515,8 +1515,8 @@ function test_argmax()
          4 2 6]
     x = NDArray(A)
 
-    @test copy(argmax(x, dims = 1)) == [2 1 2]
-    @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
+    @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
+    @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
   end
 
   @info "NDArray::argmax::NaN"
@@ -1525,8 +1525,8 @@ function test_argmax()
          NaN 2 6]
     x = NDArray(A)
 
-    @test copy(argmax(x, dims = 1)) == [1 1 2]
-    @test copy(argmax(x, dims = 2)) == reshape([2, 3], :, 1)
+    @test copy(argmax(x, dims = 1)) == [x[1] for x ∈ argmax(A, dims = 1)]
+    @test copy(argmax(x, dims = 2)) == [x[2] for x ∈ argmax(A, dims = 2)]
   end
 end
 
@@ -1537,8 +1537,8 @@ function test_argmin()
          4 2 6]
     x = NDArray(A)
 
-    @test copy(argmin(x, dims = 1)) == [1 2 1]
-    @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
+    @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
+    @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
   end
 
   @info "NDArray::argmin::NaN"
@@ -1547,8 +1547,8 @@ function test_argmin()
          NaN 2 6]
     x = NDArray(A)
 
-    @test copy(argmin(x, dims = 1)) == [1 2 1]
-    @test copy(argmin(x, dims = 2)) == reshape([1, 2], :, 1)
+    @test copy(argmin(x, dims = 1)) == [x[1] for x ∈ argmin(A, dims = 1)]
+    @test copy(argmin(x, dims = 2)) == [x[2] for x ∈ argmin(A, dims = 2)]
   end
 end
 

@@ -4935,6 +4935,7 @@ class DLDataType(ctypes.Structure):
         "bool": (1, 1, 1),
         "uint32": (1, 32, 1),
         "uint64": (1, 64, 1),
+        'float16': (2, 16, 1),
         "float32": (2, 32, 1),
         "float64": (2, 64, 1),
     }

diff --git a/src/operator/contrib/allclose_op-inl.h b/src/operator/contrib/allclose_op-inl.h
@@ -84,7 +84,7 @@ inline bool AllCloseType(const nnvm::NodeAttrs& attrs,
   return (*out_attrs)[0] != -1;
 }
 
-using namespace mshadow_op::isnan_typed;
+using mshadow::isnan_typed::IsNan;
 
 template<int req>
 struct allclose_forward {

diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
@@ -27,6 +27,7 @@
 #define MXNET_OPERATOR_MSHADOW_OP_H_
 
 #include <mxnet/base.h>
+#include <mshadow/base.h>
 #include "math.h"
 #include "math_functions-inl.h"
 #include "special_functions-inl.h"
@@ -41,6 +42,8 @@ namespace mxnet {
 namespace op {
 namespace mshadow_op {
 
+using mshadow::isnan_typed::IsNan;
+
 #ifdef __CUDA_ARCH__
 __constant__ const float PI = 3.14159265358979323846;
 __constant__ const float SELU_ALPHA = 1.6732632423543772848170429916717;
@@ -51,7 +54,6 @@ const float PI = 3.14159265358979323846;
 const float SELU_ALPHA = 1.6732632423543772848170429916717;
 const float SELU_LAMBDA = 1.0507009873554804934193349852946;
 const float SQRT_2 = 1.4142135623730950488016887242096;
-using std::isnan;
 #endif
 using std::enable_if;
 using std::is_unsigned;
@@ -854,37 +856,13 @@ struct product {
   }
 };
 
-namespace isnan_typed {
-  template<typename DType>
-  MSHADOW_XINLINE bool IsNan(volatile DType val) {
-    return false;
-  }
-  template<>
-  MSHADOW_XINLINE bool IsNan(volatile float val) {
-    return isnan(val);
-  }
-  template<>
-  MSHADOW_XINLINE bool IsNan(volatile double val) {
-    return isnan(val);
-  }
-  template<>
-  MSHADOW_XINLINE bool IsNan(volatile long double val) {
-    return isnan(val);
-  }
-
-  template<>
-  MSHADOW_XINLINE bool IsNan(volatile mshadow::half::half_t val) {
-    return (val.half_ & 0x7fff) > 0x7c00;
-  }
-};  // namespace isnan_typed
-
-MXNET_UNARY_MATH_OP_NC(relu, isnan_typed::IsNan(a) || (a > DType(0)) ? a : DType(0));
+MXNET_UNARY_MATH_OP_NC(relu, IsNan(a) || (a > DType(0)) ? a : DType(0));
 
 /*! \brief used for computing gradient of relu operator */
 struct relu_grad : public mxnet_op::tunable {
   template<typename DType>
   MSHADOW_XINLINE static DType Map(DType a) {
-    if (isnan_typed::IsNan(a)) {
+    if (IsNan(a)) {
       return a;
     } else {
       return a > DType(0) ? DType(1) : DType(0);
@@ -896,7 +874,7 @@ struct relu_grad : public mxnet_op::tunable {
 struct maximum : public mxnet_op::tunable {
   template<typename DType>
   MSHADOW_XINLINE static DType Map(DType a, DType b) {
-    if (isnan_typed::IsNan(a)) {
+    if (IsNan(a)) {
       return a;
     } else {
       return (a > b ? a : b);
@@ -908,7 +886,7 @@ struct maximum : public mxnet_op::tunable {
 struct minimum : public mxnet_op::tunable {
   template<typename DType>
   MSHADOW_XINLINE static DType Map(DType a, DType b) {
-    if (isnan_typed::IsNan(a)) {
+    if (IsNan(a)) {
       return a;
     } else {
       return DType(a < b ? a : b);
@@ -921,13 +899,13 @@ struct nansum {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
-    if (isnan_typed::IsNan(src)) return;
+    if (IsNan(src)) return;
     dst += src;
   }
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src, volatile DType& residual) { // NOLINT(*)
-    if (isnan_typed::IsNan(src)) return;
+    if (IsNan(src)) return;
     DType y = src - residual;
     DType t = dst + y;
     residual = (t - dst) - y;
@@ -973,7 +951,7 @@ struct nansum {
 struct nansum_grad : public mxnet_op::tunable {
   template<typename DType>
   MSHADOW_XINLINE static DType Map(DType a, DType b) {
-    return isnan_typed::IsNan(a) ? DType(0) : DType(1);
+    return IsNan(a) ? DType(0) : DType(1);
   }
 };
 
@@ -982,7 +960,7 @@ struct nanprod {
   /*! \brief do reduction into dst */
   template<typename DType>
   MSHADOW_XINLINE static void Reduce(volatile DType& dst, volatile DType src) { // NOLINT(*)
-    if (isnan_typed::IsNan(src)) return;
+    if (IsNan(src)) return;
     dst *= src;
   }
   /*! \brief do reduction into dst */
@@ -1156,7 +1134,7 @@ struct sum {
 struct nanprod_grad : public mxnet_op::tunable {
   template<typename DType>
   MSHADOW_XINLINE static DType Map(DType a, DType b) {
-    return isnan_typed::IsNan(a) ? DType(0) : b / a;
+    return IsNan(a) ? DType(0) : b / a;
   }
 };