add sum for boolean type in mainline

apache · Oct 11, 2019 · 11a2571 · 11a2571
1 parent d8193c6
commit 11a2571
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 13 deletions.
diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -650,6 +650,11 @@ template<>
 MSHADOW_XINLINE int64_t MinValue<int64_t>(void) {
   return LLONG_MIN;
 }
+/*! \brief minimum value of bool */
+template<>
+MSHADOW_XINLINE bool MinValue<bool>(void) {
+  return false;
+}
 
 /*!
  * \brief negative infinity of certain types
@@ -711,6 +716,11 @@ template<>
 MSHADOW_XINLINE int64_t MaxValue<int64_t>(void) {
   return LLONG_MAX;
 }
+/*! \brief maximum value of bool */
+template<>
+MSHADOW_XINLINE bool MaxValue<bool>(void) {
+  return true;
+}
 
 /*!
  * \brief positive infinity of certain types

diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
@@ -2231,14 +2231,17 @@ def is_cd_run():
 _features = Features()
 
 
+def built_with_tvm_op():
+    """Returns True if MXNet is compiled with TVM-generated operators."""
+    return _features.is_enabled("TVM_OP")
+
 def has_tvm_ops():
     """Returns True if MXNet is compiled with TVM generated operators. If current ctx
     is GPU, it only returns True for CUDA compute capability > 52 where FP16 is supported."""
-    built_with_tvm_op = _features.is_enabled("TVM_OP")
     if current_context().device_type == 'gpu':
         try:
             import tvm
         except ImportError:
             return False
-        return built_with_tvm_op and (int("".join(tvm.nd.gpu(0).compute_version.split('.'))) >= 53)
-    return built_with_tvm_op
+        return built_with_tvm_op() and (int("".join(tvm.nd.gpu(0).compute_version.split('.'))) >= 53)
+    return built_with_tvm_op()
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
@@ -369,6 +369,13 @@ struct AccType<mshadow::half::half_t> {
       {__VA_ARGS__}                                        \
     }                                                      \
     break;                                                 \
+  case mshadow::kBool:                                     \
+    {                                                      \
+      typedef bool DType;                                  \
+      typedef int64_t AType;                               \
+      {__VA_ARGS__}                                        \
+    }                                                      \
+    break;                                                 \
   default:                                                 \
     LOG(FATAL) << "Unknown type enum " << type;            \
   }
@@ -608,16 +615,11 @@ template <typename xpu>
 MSHADOW_CINLINE void copy(mshadow::Stream<xpu> *s, const TBlob& to, const TBlob& from) {
   CHECK_EQ(from.Size(), to.Size());
   CHECK_EQ(from.dev_mask(), to.dev_mask());
-  if (from.type_flag_ == mshadow::kBool || to.type_flag_ == mshadow::kBool) {
-    CHECK_EQ(from.type_flag_, to.type_flag_) << "Only supports copying between boolean ndarrays.";
-    mshadow::Copy(to.FlatTo1D<xpu, bool>(s), from.FlatTo1D<xpu, bool>(s), s);
-    return;
-  }
   MSHADOW_TYPE_SWITCH(to.type_flag_, DType, {
     if (to.type_flag_ == from.type_flag_) {
       mshadow::Copy(to.FlatTo1D<xpu, DType>(s), from.FlatTo1D<xpu, DType>(s), s);
     } else {
-      MSHADOW_TYPE_SWITCH(from.type_flag_, SrcDType, {
+      MSHADOW_TYPE_SWITCH_WITH_BOOL(from.type_flag_, SrcDType, {
         to.FlatTo1D<xpu, DType>(s) = mshadow::expr::tcast<DType>(from.FlatTo1D<xpu, SrcDType>(s));
       })
     }
@@ -695,6 +697,26 @@ struct op_with_req {
     KERNEL_ASSIGN(out[i], req, OP::Map(input_1[i], input_2[i], value));
   }
 
+  template<typename DType,
+           typename std::enable_if<!std::is_same<DType, bool>::value, int>::type = 0>
+  MSHADOW_XINLINE static void Map(index_t i, bool *out, const DType *in) {
+    KERNEL_ASSIGN(out[i], req, OP::Map(in[i]));
+  }
+
+  /*! \brief inputs are two tensors with a boolean output tensor */
+  template<typename DType,
+           typename std::enable_if<!std::is_same<DType, bool>::value, int>::type = 0>
+  MSHADOW_XINLINE static void Map(index_t i, bool *out, const DType *lhs, const DType *rhs) {
+    KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], rhs[i]));
+  }
+
+  /*! \brief input is tensor and two scalar value with a boolean output tensor */
+  template<typename DType,
+           typename std::enable_if<!std::is_same<DType, bool>::value, int>::type = 0>
+  MSHADOW_XINLINE static void Map(index_t i, bool *out, const DType *in, const DType value) {
+    KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value));
+  }
+
   /*! \brief inputs are three tensors (ie backward grad with binary grad function) */
   template<typename DType>
   MSHADOW_XINLINE static void Map(index_t i, DType *out,

diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h
@@ -226,7 +226,7 @@ void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs,
   if (param.initial.has_value()) {
     LOG(FATAL) << "initial is not supported yet";
   }
-  if (inputs[0].shape_.Size() == 0) {
+  if (inputs[0].shape_.Size() == 0 && outputs[0].shape_.Size() != 0) {
     using namespace mxnet_op;
     using namespace mshadow;
     Stream<xpu>* s = ctx.get_stream<xpu>();
@@ -236,6 +236,7 @@ void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs,
     return;
   }
   CHECK_NE(req[0], kWriteInplace) << "Reduce does not support write in-place";
+#if MXNET_USE_TVM_OP
   // If boolean ndarray, use the kernel generated by TVM
   if (inputs[0].type_flag_ == mshadow::kBool) {
     std::string reducer_name;
@@ -247,6 +248,7 @@ void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs,
     TVMOpReduce(ctx, inputs[0], param.axis, outputs[0], req[0], reducer_name);
     return;
   }
+#endif
   if (param.axis.has_value() && param.axis.value().ndim() == 0) {
     UnaryOp::IdentityCompute<xpu>(attrs, ctx, inputs, req, outputs);
   }

diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
@@ -616,7 +616,7 @@ void ReduceAxesComputeImpl(const OpContext& ctx,
   mxnet::TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+  MSHADOW_TYPE_SWITCH_WITH_BOOL(inputs[0].type_flag_, DType, {
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, {
       const TBlob in_data = inputs[0].reshape(src_shape);
       const TBlob out_data = outputs[0].reshape(dst_shape);

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
@@ -32,7 +32,8 @@
 from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, retry
 from mxnet.runtime import Features
 from mxnet.numpy_op_signature import _get_builtin_op
-from mxnet.test_utils import verify_generator, gen_buckets_probs_with_ppf, has_tvm_ops
+from mxnet.test_utils import current_context, verify_generator, gen_buckets_probs_with_ppf
+from mxnet.test_utils import built_with_tvm_op, has_tvm_ops
 import platform
 
 
@@ -518,7 +519,7 @@ def is_int(dtype):
                         expected_ret = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims)
                         expected_ret = expected_ret.astype(dtype)
                         if itype == 'bool':  # special handling of boolean ndarray
-                            if has_tvm_ops():
+                            if current_context().device_type == 'cpu' or (not built_with_tvm_op()) or has_tvm_ops():
                                 y = test_sum(x)
                                 assert y.dtype == expected_ret.dtype
                                 assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-4, atol=1e-5,