disable mixed precision binary for windows

apache · Nov 1, 2019 · 6b5b420 · 6b5b420
1 parent 1fcc31b
commit 6b5b420
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 12 deletions.
diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc
@@ -54,6 +54,7 @@ bool NumpyBinaryScalarType(const nnvm::NodeAttrs& attrs,
   .add_argument("data", "NDArray-or-Symbol", "source input")        \
   .add_argument("scalar", "float", "scalar input")
 
+#ifndef _WIN32
 bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
                                    std::vector<int>* in_attrs,
                                    std::vector<int>* out_attrs) {
@@ -86,6 +87,7 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
     })                                                                         \
   .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")     \
   .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
+#endif
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_add)
 .set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::plus>)
@@ -95,6 +97,7 @@ MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_subtract)
 .set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::minus>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"});
 
+#ifndef _WIN32
 MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply)
 .set_attr<FCompute>(
   "FCompute<cpu>",
@@ -116,6 +119,11 @@ NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
   })
 .set_attr<FCompute>("FCompute<cpu>", MixedBinaryBackwardUseIn<cpu, mshadow_op::right,
                                                               mshadow_op::left>);
+#else
+MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_multiply)
+.set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::mul>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"});
+#endif
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod)
 .set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, mshadow_op::mod>)

diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu
@@ -36,6 +36,7 @@ NNVM_REGISTER_OP(_npi_subtract)
 .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, op::mshadow_op::minus>);
 
 NNVM_REGISTER_OP(_npi_multiply)
+#ifndef _WIN32
 .set_attr<FCompute>(
   "FCompute<gpu>",
   MixedBinaryBroadcastCompute<gpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
@@ -44,6 +45,9 @@ NNVM_REGISTER_OP(_npi_multiply)
 NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
 .set_attr<FCompute>("FCompute<gpu>", MixedBinaryBackwardUseIn<gpu, mshadow_op::right,
                                                               mshadow_op::left>);
+#else
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, op::mshadow_op::mul>);
+#endif
 
 NNVM_REGISTER_OP(_npi_mod)
 .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, mshadow_op::mod>);

diff --git a/src/operator/numpy/np_elemwise_broadcast_op.h b/src/operator/numpy/np_elemwise_broadcast_op.h
@@ -39,6 +39,8 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs,
                                 const std::vector<TBlob>& inputs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<TBlob>& outputs) {
+  // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues.
+#ifndef _WIN32
   using namespace mshadow;
   using namespace mxnet_op;
   CHECK_EQ(inputs.size(), 2U);
@@ -68,6 +70,9 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs,
       }
     });
   });
+#else
+  LOG(ERROR) << "mixed precision multiply is not supported on windows yet...";
+#endif
 }
 
 template<typename xpu, typename OP, typename LOP, typename ROP>
@@ -97,6 +102,8 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
     return;
   }
 
+  // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues.
+#ifndef _WIN32
   CHECK((lhs.type_flag_ == mshadow::kBool) || (rhs.type_flag_ == mshadow::kBool))
     << "now supports bool with another type only";
 
@@ -122,6 +129,9 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
       });
     });
   }
+#else
+  LOG(ERROR) << "mixed precision multiply is not supported on windows yet...";
+#endif
 }
 
 template<typename xpu, typename LOP, typename ROP>

diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h
@@ -85,6 +85,8 @@ void TrueDivideElemwiseCompute(const nnvm::NodeAttrs &attrs,
   const TBlob& lhs = inputs[0];
   const TBlob& rhs = inputs[1];
   const TBlob& out = outputs[0];
+  // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues.
+#ifndef _WIN32
   MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
     if (lhs.type_flag_ == rhs.type_flag_) {
       // Case when types of the 2 input tensors are the same
@@ -137,6 +139,31 @@ void TrueDivideElemwiseCompute(const nnvm::NodeAttrs &attrs,
       }
     }
   });
+#else
+  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+    if (lhs.type_flag_ == rhs.type_flag_) {
+      // Case when types of the 2 input tensors are the same
+      if (common::is_float(lhs.type_flag_)) {
+        // If both are the same floats, normal launch
+        MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
+          Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
+            s, out.Size(), out.dptr<DType>(), lhs.dptr<DType>(), rhs.dptr<DType>());
+        });
+      } else {
+        // If both are the same integers, output is float32
+        CHECK_EQ(out.type_flag_, kFloat32) << "true_divide only supports float32 output "
+                                              "when input's dtype is "
+                                           << type_string(lhs.type_flag_);
+        MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
+          Kernel<op_with_req<mshadow_op::true_divide, Req>, xpu>::Launch(
+            s, out.Size(), out.dptr<float>(), lhs.dptr<DType>(), rhs.dptr<DType>());
+        });
+      }
+    } else {
+      LOG(ERROR) << "mixed precision true_divide is not supported on windows yet...";
+    }
+  });
+#endif
 }
 
 template<typename xpu>
@@ -159,6 +186,8 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs,
     const TBlob& lhs = inputs[0];
     const TBlob& rhs = inputs[1];
     const TBlob& out = outputs[0];
+    // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues.
+#ifndef _WIN32
     BROADCAST_NDIM_SWITCH(ndim, NDim, {
       mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
       mshadow::Shape<NDim> lstride = calc_stride(new_lshape.get<NDim>());
@@ -218,6 +247,36 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs,
         }
       }
     });
+#else
+    BROADCAST_NDIM_SWITCH(ndim, NDim, {
+      mshadow::Shape<NDim> oshape = new_oshape.get<NDim>();
+      mshadow::Shape<NDim> lstride = calc_stride(new_lshape.get<NDim>());
+      mshadow::Shape<NDim> rstride = calc_stride(new_rshape.get<NDim>());
+      if (lhs.type_flag_ == rhs.type_flag_) {
+        // When the both inputs have the same data types
+        if (common::is_float(lhs.type_flag_)) {
+          // If both inputs are the same float types, output is the same float type
+          MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, {
+            Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::
+              template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape,
+                                lhs.dptr<DType>(), rhs.dptr<DType>(), out.dptr<DType>());
+          });
+        } else {
+          CHECK_EQ(out.type_flag_, mshadow::kFloat32)
+            << "true_divide only supports float32 output when input's dtype is "
+            << type_string(lhs.type_flag_);
+          MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, {
+            // If both inputs are the same integer types, output is float type
+            Kernel<binary_broadcast_kernel<NDim, mshadow_op::true_divide>, xpu>::
+              template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape,
+                                lhs.dptr<DType>(), rhs.dptr<DType>(), out.dptr<float>());
+          });
+        }
+      } else {
+        LOG(ERROR) << "mixed precision true_divide is not supported on windows yet...";
+      }
+    });
+#endif
   }
 }
 

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
@@ -1684,6 +1684,9 @@ def hybrid_forward(self, F, a, b, *args, **kwargs):
         assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=1e-3, atol=1e-5,
                             use_broadcast=False, equal_nan=True)
 
+    if sys.platform.startswith('win'):
+        return
+
     funcs = {
         'multiply': (-1.0, 1.0),
     }
@@ -3919,26 +3922,26 @@ def test_np_true_divide():
         val = _np.random.randint(3, 50)
         out_mx = a / val
         out_np = _np.true_divide(a.asnumpy(), val)
-        print(dtype, a, val, type(out_mx), out_mx, type(out_np), out_np)
         assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
 
         out_mx = val / a
         out_np = _np.true_divide(val, a.asnumpy())
         assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
 
-    for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes):
-        i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype)
-        f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype)
+    if not sys.platform.startswith('win'):
+        for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes):
+            i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype)
+            f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype)
 
-        out_mx = i_ / f_
-        assert out_mx.dtype == ftype
-        out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy())
-        assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
+            out_mx = i_ / f_
+            assert out_mx.dtype == ftype
+            out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy())
+            assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
 
-        out_mx = f_ / i_
-        assert out_mx.dtype == ftype
-        out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy())
-        assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
+            out_mx = f_ / i_
+            assert out_mx.dtype == ftype
+            out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy())
+            assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False)
 
 
 @with_seed()