diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 765bfc2588b8..e586a1f4ad49 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -133,6 +133,7 @@ struct true_divide : public mxnet_op::tunable { return static_cast(a) / static_cast(b); } +#ifndef _WIN32 template::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { @@ -150,6 +151,7 @@ struct true_divide : public mxnet_op::tunable { MSHADOW_XINLINE static double Map(DType a, double b) { return static_cast(a) / b; } +#endif }; struct rtrue_divide : public mxnet_op::tunable { @@ -165,6 +167,7 @@ struct rtrue_divide : public mxnet_op::tunable { return static_cast(b) / static_cast(a); } +#ifndef _WIN32 template::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { @@ -182,6 +185,7 @@ struct rtrue_divide : public mxnet_op::tunable { MSHADOW_XINLINE static double Map(DType a, double b) { return b / static_cast(a); } +#endif }; MXNET_BINARY_MATH_OP_NC(left, a); @@ -190,6 +194,7 @@ MXNET_BINARY_MATH_OP_NC(right, b); MXNET_BINARY_MATH_OP_NC(mul, a * b); +#ifndef _WIN32 struct mixed_mul { template::value, int>::type = 0> @@ -197,6 +202,7 @@ struct mixed_mul { return static_cast(a) * b; } }; +#endif MXNET_BINARY_MATH_OP_NC(div, a / b); diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc index 592ea9e84ea9..70943f0bab7f 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op.cc @@ -54,7 +54,6 @@ bool NumpyBinaryScalarType(const nnvm::NodeAttrs& attrs, .add_argument("data", "NDArray-or-Symbol", "source input") \ .add_argument("scalar", "float", "scalar input") -#ifndef _WIN32 bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, std::vector* out_attrs) { @@ -71,6 +70,28 @@ bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs, return true; } +#ifdef _WIN32 +#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs(2) \ + .set_num_outputs(1) \ + .set_attr("FListInputNames", \ + [](const NodeAttrs& attrs) { \ + return std::vector{"lhs", "rhs"}; \ + }) \ + .set_attr("FInferShape", BinaryBroadcastShape) \ + .set_attr("FInferType", NumpyBinaryMixedPrecisionType) \ + .set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs){ \ + return std::vector >{{0, 0}, {1, 0}}; \ + }) \ + .set_attr("FResourceRequest", \ + [](const NodeAttrs& attrs) { \ + return std::vector{ResourceRequest::kTempSpace}; \ + }) \ + .add_argument("lhs", "NDArray-or-Symbol", "First input to the function") \ + .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function") +#else #define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name) \ NNVM_REGISTER_OP(name) \ .set_num_inputs(2) \ @@ -97,12 +118,18 @@ MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_subtract) .set_attr("FCompute", BinaryBroadcastCompute) .set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"}); -#ifndef _WIN32 MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply) +#ifndef _WIN32 .set_attr( "FCompute", MixedBinaryBroadcastCompute) +#else +.set_attr( + "FCompute", + MixedBinaryBroadcastCompute) +#endif .set_attr("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mul"}); NNVM_REGISTER_OP(_backward_npi_broadcast_mul) @@ -119,11 +146,6 @@ NNVM_REGISTER_OP(_backward_npi_broadcast_mul) }) .set_attr("FCompute", MixedBinaryBackwardUseIn); -#else -MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_multiply) -.set_attr("FCompute", BinaryBroadcastCompute) -.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"}); -#endif MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod) .set_attr("FCompute", BinaryBroadcastCompute) diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu index a184c0b84a35..66c9b8e74e7c 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op.cu @@ -41,13 +41,16 @@ NNVM_REGISTER_OP(_npi_multiply) "FCompute", MixedBinaryBroadcastCompute); +#else +.set_attr( + "FCompute", + MixedBinaryBroadcastCompute); +#endif NNVM_REGISTER_OP(_backward_npi_broadcast_mul) .set_attr("FCompute", MixedBinaryBackwardUseIn); -#else -.set_attr("FCompute", BinaryBroadcastCompute); -#endif NNVM_REGISTER_OP(_npi_mod) .set_attr("FCompute", BinaryBroadcastCompute); diff --git a/src/operator/numpy/np_elemwise_broadcast_op.h b/src/operator/numpy/np_elemwise_broadcast_op.h index 081af396bd6b..55e637158613 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op.h +++ b/src/operator/numpy/np_elemwise_broadcast_op.h @@ -39,7 +39,6 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { - // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues. #ifndef _WIN32 using namespace mshadow; using namespace mxnet_op; @@ -71,7 +70,7 @@ void MixedBinaryElemwiseCompute(const nnvm::NodeAttrs& attrs, }); }); #else - LOG(ERROR) << "mixed precision multiply is not supported on windows yet..."; + LOG(ERROR) << "windows should not reach here..."; #endif } @@ -92,22 +91,18 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs, if ((out.shape_.Size() == 0U) || (req[0] == kNullOp)) return; - mxnet::TShape new_lshape, new_rshape, new_oshape; - int ndim = BinaryBroadcastShapeCompact(lhs.shape_, rhs.shape_, out.shape_, - &new_lshape, &new_rshape, &new_oshape); - - if (lhs.type_flag_ == rhs.type_flag_) { BinaryBroadcastCompute(attrs, ctx, inputs, req, outputs); return; } - // TODO(haojin2): No mixed-precision multiply on windows temporarily due to CI issues. -#ifndef _WIN32 CHECK((lhs.type_flag_ == mshadow::kBool) || (rhs.type_flag_ == mshadow::kBool)) << "now supports bool with another type only"; - +#ifndef _WIN32 + mxnet::TShape new_lshape, new_rshape, new_oshape; + int ndim = BinaryBroadcastShapeCompact(lhs.shape_, rhs.shape_, out.shape_, + &new_lshape, &new_rshape, &new_oshape); if (!ndim) { MixedBinaryElemwiseCompute(attrs, ctx, inputs, req, outputs); } else { @@ -130,7 +125,37 @@ void MixedBinaryBroadcastCompute(const nnvm::NodeAttrs& attrs, }); } #else - LOG(ERROR) << "mixed precision multiply is not supported on windows yet..."; + mshadow::Stream *s = ctx.get_stream(); + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + LOG(ERROR) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + TBlob temp_tblob; + // one is float, the other is bool + CHECK_EQ(out.type_flag_, + common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) + << "This case out type should be same as the float type"; + if (common::is_float(lhs.type_flag_)) { + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(Shape1(rhs.Size()), s); + temp_tblob = TBlob(temp_tensor); + }); + CastCompute(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob}); + BinaryBroadcastCompute( + attrs, ctx, {lhs, temp_tblob.reshape(rhs.shape_)}, req, outputs); + } else { + MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(Shape1(lhs.Size()), s); + temp_tblob = TBlob(temp_tensor); + }); + CastCompute(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob}); + BinaryBroadcastCompute( + attrs, ctx, {temp_tblob.reshape(lhs.shape_), rhs}, req, outputs); + } + } else { + LOG(ERROR) << "not implemented yet..."; + } #endif } diff --git a/src/operator/numpy/np_true_divide-inl.h b/src/operator/numpy/np_true_divide-inl.h index 8aa32661fd3c..0bc60a08803e 100644 --- a/src/operator/numpy/np_true_divide-inl.h +++ b/src/operator/numpy/np_true_divide-inl.h @@ -57,6 +57,7 @@ void TrueDivideScalarCompute(const nnvm::NodeAttrs &attrs, }); }); } else { +#ifndef _WIN32 CHECK_EQ(outputs[0].type_flag_, kFloat32) << "true_divide only supports float32 output " "when input's dtype is " << type_string(inputs[0].type_flag_); @@ -67,6 +68,13 @@ void TrueDivideScalarCompute(const nnvm::NodeAttrs &attrs, static_cast(alpha)); }); }); +#else + Tensor temp_tensor = + ctx.requested[0].get_space_typed(mshadow::Shape1(data.Size()), s); + TBlob temp_tblob(temp_tensor); + CastCompute(attrs, ctx, {data}, {kWriteTo}, {temp_tblob}); + TrueDivideScalarCompute(attrs, ctx, {temp_tblob}, req, outputs); +#endif } } @@ -85,85 +93,104 @@ void TrueDivideElemwiseCompute(const nnvm::NodeAttrs &attrs, const TBlob& lhs = inputs[0]; const TBlob& rhs = inputs[1]; const TBlob& out = outputs[0]; - // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues. -#ifndef _WIN32 - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - if (lhs.type_flag_ == rhs.type_flag_) { - // Case when types of the 2 input tensors are the same - if (common::is_float(lhs.type_flag_)) { - // If both are the same floats, normal launch + if (lhs.type_flag_ == rhs.type_flag_) { + // Case when types of the 2 input tensors are the same + if (common::is_float(lhs.type_flag_)) { + // If both are the same floats, normal launch + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, { Kernel, xpu>::Launch( s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); }); - } else { - // If both are the same integers, output is float32 - CHECK_EQ(out.type_flag_, kFloat32) << "true_divide only supports float32 output " - "when input's dtype is " - << type_string(lhs.type_flag_); + }); + } else { + // If both are the same integers, output is float32 + CHECK_EQ(out.type_flag_, kFloat32) << "true_divide only supports float32 output " + "when input's dtype is " + << type_string(lhs.type_flag_); + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, { Kernel, xpu>::Launch( s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); }); - } - } else { - // Case when types of the 2 input tensors are different - if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { - // both lhs and rhs are float types, output type is the more precise one - LOG(ERROR) << "not implemented yet..."; - } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { - // lhs is float type, rhs is integer type, the output type should be the same as lhs - CHECK_EQ(out.type_flag_, - common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) - << "This case out type should be same as the float type"; - if (common::is_float(lhs.type_flag_)) { - // lhs is the float one + }); + } + } else { +#ifndef _WIN32 + // Non-windows case: no usage of temporary space + // Case when types of the 2 input tensors are different + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + // both lhs and rhs are float types, output type is the more precise one + LOG(ERROR) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + // one is float type, the other is integer type, the output type should be the same as float + CHECK_EQ(out.type_flag_, + common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) + << "This case out type should be same as the float type"; + if (common::is_float(lhs.type_flag_)) { + // lhs is the float one + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, { Kernel, xpu>::Launch( s, out.Size(), out.dptr(), rhs.dptr(), lhs.dptr()); }); }); - } else { - // rhs is the float one + }); + } else { + // rhs is the float one + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, { MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { Kernel, xpu>::Launch( s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); }); }); - } - } else { - // lhs is integer type, rhs is integer type, output type should be float - LOG(ERROR) << "not implemented yet..."; + }); } + } else { + // lhs is integer type, rhs is integer type, output type should be float + LOG(ERROR) << "not implemented yet..."; } - }); #else - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - if (lhs.type_flag_ == rhs.type_flag_) { - // Case when types of the 2 input tensors are the same + // Windows case: using temp space for casting the type + // Case when types of the 2 input tensors are different + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + // both lhs and rhs are float types, output type is the more precise one + LOG(ERROR) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + // lhs is float type, rhs is integer type, the output type should be the same as lhs + CHECK_EQ(out.type_flag_, + common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) + << "This case out type should be same as the float type"; + TBlob temp_tblob; if (common::is_float(lhs.type_flag_)) { - // If both are the same floats, normal launch - MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, DType, { - Kernel, xpu>::Launch( - s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + // lhs is the float one + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(mshadow::Shape1(rhs.Size()), s); + temp_tblob = TBlob(temp_tensor); }); + CastCompute(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob}); + TrueDivideElemwiseCompute( + attrs, ctx, {lhs, temp_tblob.reshape(rhs.shape_)}, req, outputs); } else { - // If both are the same integers, output is float32 - CHECK_EQ(out.type_flag_, kFloat32) << "true_divide only supports float32 output " - "when input's dtype is " - << type_string(lhs.type_flag_); - MXNET_INT_TYPE_SWITCH(lhs.type_flag_, DType, { - Kernel, xpu>::Launch( - s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + // rhs is the float one + MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(mshadow::Shape1(lhs.Size()), s); + temp_tblob = TBlob(temp_tensor); }); + CastCompute(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob}); + TrueDivideElemwiseCompute( + attrs, ctx, {temp_tblob.reshape(lhs.shape_), rhs}, req, outputs); } } else { - LOG(ERROR) << "mixed precision true_divide is not supported on windows yet..."; + // lhs is integer type, rhs is integer type, output type should be float + LOG(ERROR) << "not implemented yet..."; } - }); #endif + } } template @@ -186,7 +213,6 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, const TBlob& lhs = inputs[0]; const TBlob& rhs = inputs[1]; const TBlob& out = outputs[0]; - // TODO(haojin2): No mixed-precision true_divide on windows temporarily due to CI issues. #ifndef _WIN32 BROADCAST_NDIM_SWITCH(ndim, NDim, { mshadow::Shape oshape = new_oshape.get(); @@ -248,11 +274,11 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, } }); #else - BROADCAST_NDIM_SWITCH(ndim, NDim, { - mshadow::Shape oshape = new_oshape.get(); - mshadow::Shape lstride = calc_stride(new_lshape.get()); - mshadow::Shape rstride = calc_stride(new_rshape.get()); - if (lhs.type_flag_ == rhs.type_flag_) { + if (lhs.type_flag_ == rhs.type_flag_) { + BROADCAST_NDIM_SWITCH(ndim, NDim, { + mshadow::Shape oshape = new_oshape.get(); + mshadow::Shape lstride = calc_stride(new_lshape.get()); + mshadow::Shape rstride = calc_stride(new_rshape.get()); // When the both inputs have the same data types if (common::is_float(lhs.type_flag_)) { // If both inputs are the same float types, output is the same float type @@ -272,10 +298,44 @@ void TrueDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, lhs.dptr(), rhs.dptr(), out.dptr()); }); } + }); + } else { + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + // lhs and rhs have different float types, the output is the more precise one + LOG(ERROR) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + // one of lhs and rhs is float, the output is the same type as the float one + TBlob temp_tblob; + if (common::is_float(lhs.type_flag_)) { + // lhs is float type, output will be the same float type + CHECK_EQ(lhs.type_flag_, out.type_flag_) + << "lhs should have the same type as out, infer type broken?"; + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(mshadow::Shape1(rhs.Size()), s); + temp_tblob = TBlob(temp_tensor); + }); + CastCompute(attrs, ctx, {rhs}, {kWriteTo}, {temp_tblob}); + TrueDivideBroadcastCompute( + attrs, ctx, {lhs, temp_tblob.reshape(rhs.shape_)}, req, outputs); + } else { + // rhs is float type, output will be the same float type + CHECK_EQ(rhs.type_flag_, out.type_flag_) + << "rhs should have the same type as out, infer type broken?"; + MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { + Tensor temp_tensor = + ctx.requested[0].get_space_typed(mshadow::Shape1(lhs.Size()), s); + temp_tblob = TBlob(temp_tensor); + }); + CastCompute(attrs, ctx, {lhs}, {kWriteTo}, {temp_tblob}); + TrueDivideBroadcastCompute( + attrs, ctx, {temp_tblob.reshape(lhs.shape_), rhs}, req, outputs); + } } else { - LOG(ERROR) << "mixed precision true_divide is not supported on windows yet..."; + // lhs and rhs have different integer types, the output is float type + LOG(ERROR) << "not implemented yet..."; } - }); + } #endif } } diff --git a/src/operator/numpy/np_true_divide.cc b/src/operator/numpy/np_true_divide.cc index 83493041dea9..d2135befef42 100644 --- a/src/operator/numpy/np_true_divide.cc +++ b/src/operator/numpy/np_true_divide.cc @@ -73,6 +73,12 @@ NNVM_REGISTER_OP(_npi_true_divide) [](const NodeAttrs& attrs){ return std::vector >{{0, 0}, {1, 0}}; }) +#ifdef _WIN32 +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +#endif .set_attr("FCompute", TrueDivideBroadcastCompute) .set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_div"}) .add_argument("lhs", "NDArray-or-Symbol", "Dividend array") @@ -90,6 +96,12 @@ NNVM_REGISTER_OP(_npi_true_divide_scalar) [](const NodeAttrs& attrs) { return std::vector >{{0, 0}}; }) +#ifdef _WIN32 +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +#endif .set_attr("FCompute", TrueDivideScalarCompute) .set_attr("FGradient", ElemwiseGradUseNone{"_backward_div_scalar"}) .add_argument("data", "NDArray-or-Symbol", "source input") @@ -107,6 +119,12 @@ NNVM_REGISTER_OP(_npi_rtrue_divide_scalar) [](const NodeAttrs& attrs) { return std::vector >{{0, 0}}; }) +#ifdef _WIN32 +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +#endif .set_attr("FCompute", TrueDivideScalarCompute) .set_attr("FGradient", ElemwiseGradUseNone{"_backward_rdiv_scalar"}) .add_argument("data", "NDArray-or-Symbol", "source input") diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index fd86545014ee..5927f1cbffc7 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1684,9 +1684,6 @@ def hybrid_forward(self, F, a, b, *args, **kwargs): assert_almost_equal(mx_out.asnumpy(), np_out.astype(mx_out.dtype), rtol=1e-3, atol=1e-5, use_broadcast=False, equal_nan=True) - if sys.platform.startswith('win'): - return - funcs = { 'multiply': (-1.0, 1.0), } @@ -1998,7 +1995,7 @@ def get_new_shape(shape, axis): with mx.autograd.record(): y = test_concat(a, b, c, d) - + assert y.shape == expected_ret.shape assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) @@ -2991,7 +2988,7 @@ def check_cholesky(L, data_np): test_cholesky = TestCholesky() if hybridize: test_cholesky.hybridize() - + # Numerical issue: # When backpropagating through Cholesky decomposition, we need to compute the inverse # of L according to dA = 0.5 * L**(-T) * copyLTU(L**T * dL) * L**(-1) where A = LL^T. @@ -3928,20 +3925,19 @@ def test_np_true_divide(): out_np = _np.true_divide(val, a.asnumpy()) assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) - if not sys.platform.startswith('win'): - for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes): - i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype) - f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype) + for shape_pair, itype, ftype in itertools.product(shapes, itypes, ftypes): + i_ = np.random.uniform(3, 50, size=shape_pair[0]).astype(itype) + f_ = np.random.uniform(3, 50, size=shape_pair[-1]).astype(ftype) - out_mx = i_ / f_ - assert out_mx.dtype == ftype - out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy()) - assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) + out_mx = i_ / f_ + assert out_mx.dtype == ftype + out_np = _np.true_divide(i_.asnumpy(), f_.asnumpy()) + assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) - out_mx = f_ / i_ - assert out_mx.dtype == ftype - out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy()) - assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) + out_mx = f_ / i_ + assert out_mx.dtype == ftype + out_np = _np.true_divide(f_.asnumpy(), i_.asnumpy()) + assert_almost_equal(out_mx.asnumpy(), out_np, rtol=1e-3, atol=1e-3, use_broadcast=False) @with_seed()