diff --git a/src/operator/nn/log_softmax.cc b/src/operator/nn/log_softmax.cc
index 16324b51c322..f3ef4abb9f6d 100644
--- a/src/operator/nn/log_softmax.cc
+++ b/src/operator/nn/log_softmax.cc
@@ -40,6 +40,7 @@ static void LogSoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs,
                                    const std::vector<NDArray>& inputs,
                                    const std::vector<OpReqType>& req,
                                    const std::vector<NDArray>& outputs) {
+  if (inputs[0].shape().Size() == 0U) return;
   const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
   if (SupportMKLDNNLogSoftmax(param, inputs[0], outputs[0])) {
     MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index f1f41778a9bd..018d851336d2 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -779,7 +779,7 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
                     const std::vector<OpReqType>& req,
                     const std::vector<TBlob>& outputs) {
   using namespace mxnet_op;
-  if (req[0] == kNullOp) return;
+  if (req[0] == kNullOp || inputs[0].Size() == 0U) return;
   CHECK_NE(req[0], kAddTo);
   const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
   int axis = CheckAxis(param.axis, inputs[0].ndim());
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index 50cfc2f713f4..b95e159f9862 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -41,6 +41,7 @@ static void SoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs,
                                 const std::vector<NDArray>& inputs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<NDArray>& outputs) {
+  if (inputs[0].shape().Size() == 0U) return;
   const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
   if (SupportMKLDNNSoftmax(param, inputs[0], outputs[0])) {
     MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 7dcaf72a4e75..91f84bb27eb0 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -1569,6 +1569,14 @@ def __init__(self, axis):
         def hybrid_forward(self, F, a):
             return F.npx.softmax(a, axis=axis)
 
+    class TestLogSoftmax(HybridBlock):
+        def __init__(self, axis):
+            super(TestLogSoftmax, self).__init__()
+            self._axis = axis
+
+        def hybrid_forward(self, F, a):
+            return F.npx.log_softmax(a, axis=axis)
+
     def np_softmax(x, axis=-1):
         if (x.shape[axis] == 0):
             return _np.sum(x, axis=axis, keepdims=True)
@@ -1577,24 +1585,34 @@ def np_softmax(x, axis=-1):
         x /= _np.sum(x, axis=axis, keepdims=True)
         return x
 
+    def np_log_softmax(x, axis=-1):
+        return _np.log(np_softmax(x, axis))
+
+    #(operator, function) tuples
+    tested_ops = [(TestSoftmax, np_softmax),
+                  (TestLogSoftmax, np_log_softmax)]
+
     # only testing 0-size shaped inputs here, other input cases have been tested in test_opeartor.py
-    for hybridize in [True, False]:
-        for shape in [(3, 0, 4), (0, 0)]:
-            mx_a = np.random.uniform(size=shape)
-            mx_a.attach_grad()
-            for axis in range(-len(shape), len(shape)):
-                test_softmax = TestSoftmax(axis)
-                if hybridize:
-                    test_softmax.hybridize()
+    for SoftmaxOp, softmax_function in tested_ops:
+        for hybridize in [True, False]:
+            for shape in [(3, 0, 4), (0, 0)]:
+                mx_a = np.random.uniform(size=shape)
+                mx_a.attach_grad()
+                for axis in range(-len(shape), len(shape)):
+                    test_softmax_op = SoftmaxOp(axis)
+                    if hybridize:
+                        test_softmax_op.hybridize()
 
-                with mx.autograd.record():
-                    mx_out = test_softmax(mx_a)
+                    with mx.autograd.record():
+                        mx_out = test_softmax_op(mx_a)
 
-                np_out = np_softmax(mx_a.asnumpy(), axis)
-                assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True)
+                    mx_out.wait_to_read()
 
-                mx_out.backward()
-                assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5)
+                    np_out = softmax_function(mx_a.asnumpy(), axis)
+                    assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True)
+
+                    mx_out.backward()
+                    assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5)
 
 
 @with_seed()