diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index 1b5e9921a62c..945decb9c632 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -71,6 +71,7 @@ template<typename OP, bool negate, typename AType, typename DType, typename OTyp
 inline void Softmax(Stream<cpu> *s, DType *in, OType *out, IType *length,
                     Shape<ndim> shape, int axis, const DType temperature) {
   index_t M = shape[axis];
+  if (M == 0) return;
   index_t N = shape.Size()/M;
   Shape<ndim> stride = calc_stride(shape);
   Shape<ndim> sshape = shape;
@@ -186,6 +187,7 @@ inline void SoftmaxGrad(Stream<cpu> *s, OType *out, OType *ograd,
                         DType *igrad, IType *length, Shape<ndim> shape,
                         int axis, const DType temperature) {
   index_t M = shape[axis];
+  if (M == 0) return;
   index_t N = shape.Size()/M;
   Shape<ndim> stride = calc_stride(shape);
   Shape<ndim> sshape = shape;
@@ -402,6 +404,7 @@ inline void Softmax(Stream<gpu> *s, DType *in, OType *out, IType *length,
   const int x_bits = 7;
   const int x_size = 1 << x_bits;
   index_t M = shape[axis];
+  if (M == 0 || shape.Size() == 0) return;
   index_t N = shape.Size()/M;
   Shape<ndim> stride = calc_stride(shape);
   Shape<ndim> sshape = shape;
@@ -555,6 +558,7 @@ inline void SoftmaxGrad(Stream<gpu> *s, OType *out, OType *ograd,
   const int x_bits = 7;
   const int x_size = 1 << x_bits;
   index_t M = shape[axis];
+  if (M == 0 || shape.Size() == 0) return;
   index_t N = shape.Size()/M;
   Shape<ndim> stride = calc_stride(shape);
   Shape<ndim> sshape = shape;
@@ -798,35 +802,35 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
         type = inputs[1].type_flag_;
       }
       MXNET_INT32_INT64_TYPE_SWITCH(type, IType, {
-          IType* mask_ptr = nullptr;
-          if (param.use_length.value()) {
-            mask_ptr = inputs[1].dptr<IType>();
+        IType* mask_ptr = nullptr;
+        if (param.use_length.value()) {
+          mask_ptr = inputs[1].dptr<IType>();
+        }
+        if (safe_acc) {
+          if (shape.ndim() == 2) {
+            Softmax<OP, negate, AType>(
+              ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
+              outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
+              axis, static_cast<DType>(temperature));
+          } else {
+            Softmax<OP, negate, AType>(
+              ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
+              outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
+              axis, static_cast<DType>(temperature));
           }
-          if (safe_acc) {
-            if (shape.ndim() == 2) {
-              Softmax<OP, negate, AType>(
-                  ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
-                  outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
-                  axis, static_cast<DType>(temperature));
-            } else {
-              Softmax<OP, negate, AType>(
-                  ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
-                  outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
-                  axis, static_cast<DType>(temperature));
-            }
+        } else {
+          if (shape.ndim() == 2) {
+            Softmax<OP, negate, DType>(
+              ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
+              outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
+              axis, static_cast<DType>(temperature));
           } else {
-            if (shape.ndim() == 2) {
-              Softmax<OP, negate, DType>(
-                  ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
-                  outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
-                  axis, static_cast<DType>(temperature));
-            } else {
-              Softmax<OP, negate, DType>(
-                  ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
-                  outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
-                  axis, static_cast<DType>(temperature));
-            }
+            Softmax<OP, negate, DType>(
+              ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
+              outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
+              axis, static_cast<DType>(temperature));
           }
+        }
       });
     });
   });
diff --git a/src/operator/numpy/np_boolean_mask_assign.cc b/src/operator/numpy/np_boolean_mask_assign.cc
index d5ab00835638..defb901c55a6 100644
--- a/src/operator/numpy/np_boolean_mask_assign.cc
+++ b/src/operator/numpy/np_boolean_mask_assign.cc
@@ -220,10 +220,9 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs,
   // If there's no True in mask, return directly
   if (valid_num == 0) return;
 
-  const TShape& vshape = inputs.at(2).shape_;
-
   if (inputs.size() == 3U) {
     // tensor case
+    const TShape& vshape = inputs.at(2).shape_;
     if (inputs[2].shape_.Size() != 1) {
       auto vndim = vshape.ndim();
       auto dndim = dshape.ndim();
@@ -253,6 +252,8 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs,
   }
 
   if (inputs.size() == 3U) {
+    // tensor case
+    const TShape& vshape = inputs.at(2).shape_;
     MSHADOW_TYPE_SWITCH_WITH_BOOL(data.type_flag_, DType, {
       if (inputs[2].shape_.Size() == 1) {
         Kernel<BooleanAssignCPUKernel<true>, cpu>::Launch(
@@ -268,6 +269,7 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs,
       }
     });
   } else {
+    // scalar case
     CHECK(attrs.dict.find("value") != attrs.dict.end()) << "value needs be provided";
     MSHADOW_TYPE_SWITCH_WITH_BOOL(data.type_flag_, DType, {
       Kernel<BooleanAssignCPUKernel<true>, cpu>::Launch(
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index d10e571ce2c8..e06932c837e6 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -1506,7 +1506,45 @@ def gt_grad_batch_dot_numpy(lhs, rhs, ograd, transpose_a, transpose_b, lhs_req,
 
 @with_seed()
 @use_np
-@unittest.skip("NumpyBooleanAssignForwardCPU broken: https://github.com/apache/incubator-mxnet/issues/17990")
+def test_npx_softmax():
+    class TestSoftmax(HybridBlock):
+        def __init__(self, axis):
+            super(TestSoftmax, self).__init__()
+            self._axis = axis
+
+        def hybrid_forward(self, F, a):
+            return F.npx.softmax(a, axis=axis)
+
+    def np_softmax(x, axis=-1):
+        if (x.shape[axis] == 0):
+            return _np.sum(x, axis=axis, keepdims=True)
+        x = x - _np.max(x, axis=axis, keepdims=True)
+        x = _np.exp(x)
+        x /= _np.sum(x, axis=axis, keepdims=True)
+        return x
+
+    # only testing 0-size shaped inputs here, other input cases have been tested in test_opeartor.py
+    for hybridize in [True, False]:
+        for shape in [(3, 0, 4), (0, 0)]:
+            mx_a = np.random.uniform(size=shape)
+            mx_a.attach_grad()
+            for axis in range(-len(shape), len(shape)):
+                test_softmax = TestSoftmax(axis)
+                if hybridize:
+                    test_softmax.hybridize()
+
+                with mx.autograd.record():
+                    mx_out = test_softmax(mx_a)
+
+                np_out = np_softmax(mx_a.asnumpy(), axis)
+                assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True)
+
+                mx_out.backward()
+                assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5)
+
+
+@with_seed()
+@use_np
 def test_npi_boolean_assign():
     class TestBooleanAssignScalar(HybridBlock):
         def __init__(self, val, start_axis):