diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc index a458e5213fbb..592ea9e84ea9 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op.cc @@ -54,6 +54,7 @@ bool NumpyBinaryScalarType(const nnvm::NodeAttrs& attrs, .add_argument("data", "NDArray-or-Symbol", "source input") \ .add_argument("scalar", "float", "scalar input") +#ifndef _WIN32 bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, std::vector* out_attrs) { @@ -86,6 +87,7 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs, }) \ .add_argument("lhs", "NDArray-or-Symbol", "First input to the function") \ .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function") +#endif MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_add) .set_attr("FCompute", BinaryBroadcastCompute) @@ -95,6 +97,7 @@ MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_subtract) .set_attr("FCompute", BinaryBroadcastCompute) .set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"}); +#ifndef _WIN32 MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply) .set_attr( "FCompute", @@ -116,6 +119,11 @@ NNVM_REGISTER_OP(_backward_npi_broadcast_mul) }) .set_attr("FCompute", MixedBinaryBackwardUseIn); +#else +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_multiply) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"}); +#endif MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod) .set_attr("FCompute", BinaryBroadcastCompute) diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu index 11ceb16f35c3..a184c0b84a35 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op.cu @@ -36,6 +36,7 @@ NNVM_REGISTER_OP(_npi_subtract) .set_attr("FCompute", BinaryBroadcastCompute); NNVM_REGISTER_OP(_npi_multiply) +#ifndef _WIN32 .set_attr( "FCompute", MixedBinaryBroadcastCompute("FCompute", MixedBinaryBackwardUseIn); +#else +.set_attr("FCompute", BinaryBroadcastCompute); +#endif NNVM_REGISTER_OP(_npi_mod) .set_attr("FCompute", BinaryBroadcastCompute); diff --git a/src/operator/numpy/np_elemwise_broadcast_op.h b/src/operator/numpy/np_elemwise_broadcast_op.h index b0b8833dad17..081af396bd6b 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.h +++ b/src/operator/numpy/np_elemwise_broadcast_op.h @@ -39,6 +39,8 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { + // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues. +#ifndef _WIN32 using namespace mshadow; using namespace mxnet_op; CHECK_EQ(inputs.size(), 2U); @@ -68,6 +70,9 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs, } }); }); +#else + LOG(ERROR) << "mixed precision multiply is not supported on windows yet..."; +#endif } template @@ -97,6 +102,8 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs, return; } + // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues. +#ifndef _WIN32 CHECK((lhs.type_flag_ == mshadow::kBool) || (rhs.type_flag_ == mshadow::kBool)) << "now supports bool with another type only"; @@ -122,6 +129,9 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs, }); }); } +#else + LOG(ERROR) << "mixed precision multiply is not supported on windows yet..."; +#endif } template diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h index 2237a20d6029..8aa32661fd3c 100644 --- a/src/operator/numpy/np_true_divide-inl.h +++ b/src/operator/numpy/np_true_divide-inl.h @@ -85,6 +85,8 @@ void TrueDivideElemwiseCompute(const nnvm::NodeAttrs &attrs, const TBlob& lhs = inputs[0]; const TBlob& rhs = inputs[1]; const TBlob& out = outputs[0]; + // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues. +#ifndef _WIN32 MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { if (lhs.type_flag_ == rhs.type_flag_) { // Case when types of the 2 input tensors are the same @@ -137,6 +139,31 @@ void TrueDivideElemwiseCompute(const nnvm::NodeAttrs &attrs, } } }); +#else + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + if (lhs.type_flag_ == rhs.type_flag_) { + // Case when types of the 2 input tensors are the same + if (common::is_float(lhs.type_flag_)) { + // If both are the same floats, normal launch + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + }); + } else { + // If both are the same integers, output is float32 + CHECK_EQ(out.type_flag_, kFloat32) << "true_divide only supports float32 output " + "when input's dtype is " + << type_string(lhs.type_flag_); + MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + }); + } + } else { + LOG(ERROR) << "mixed precision true_divide is not supported on windows yet..."; + } + }); +#endif } template @@ -159,6 +186,8 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, const TBlob& lhs = inputs[0]; const TBlob& rhs = inputs[1]; const TBlob& out = outputs[0]; + // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues. +#ifndef _WIN32 BROADCAST_NDIM_SWITCH(ndim, NDim, { mshadow::Shape oshape = new_oshape.get(); mshadow::Shape lstride = calc_stride(new_lshape.get()); @@ -218,6 +247,36 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, } } }); +#else + BROADCAST_NDIM_SWITCH(ndim, NDim, { + mshadow::Shape oshape = new_oshape.get(); + mshadow::Shape lstride = calc_stride(new_lshape.get()); + mshadow::Shape rstride = calc_stride(new_rshape.get()); + if (lhs.type_flag_ == rhs.type_flag_) { + // When the both inputs have the same data types + if (common::is_float(lhs.type_flag_)) { + // If both inputs are the same float types, output is the same float type + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, { + Kernel, xpu>:: + template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape, + lhs.dptr(), rhs.dptr(), out.dptr()); + }); + } else { + CHECK_EQ(out.type_flag_, mshadow::kFloat32) + << "true_divide only supports float32 output when input's dtype is " + << type_string(lhs.type_flag_); + MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, { + // If both inputs are the same integer types, output is float type + Kernel, xpu>:: + template LaunchEx(s, new_oshape.Size(), req[0], lstride, rstride, oshape, + lhs.dptr(), rhs.dptr(), out.dptr()); + }); + } + } else { + LOG(ERROR) << "mixed precision true_divide is not supported on windows yet..."; + } + }); +#endif } } diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index d2fa14252b55..fd86545014ee 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1684,6 +1684,9 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=1e-3, atol=1e-5, use_broadcast=False, equal_nan=True) + if sys.platform.startswith('win'): + return + funcs = { 'multiply': (-1.0, 1.0), } @@ -3919,26 +3922,26 @@ def test_np_true_divide(): val = _np.random.randint(3, 50) out_mx = a / val out_np = _np.true_divide(a.asnumpy(), val) - print(dtype, a, val, type(out_mx), out_mx, type(out_np), out_np) assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) out_mx = val / a out_np = _np.true_divide(val, a.asnumpy()) assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) - for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes): - i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype) - f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype) + if not sys.platform.startswith('win'): + for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes): + i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype) + f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype) - out_mx = i_ / f_ - assert out_mx.dtype == ftype - out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy()) - assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) + out_mx = i_ / f_ + assert out_mx.dtype == ftype + out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy()) + assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) - out_mx = f_ / i_ - assert out_mx.dtype == ftype - out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy()) - assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) + out_mx = f_ / i_ + assert out_mx.dtype == ftype + out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy()) + assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) @with_seed()