diff --git a/src/operator/nn/log_softmax.cc b/src/operator/nn/log_softmax.cc index 16324b51c322..f3ef4abb9f6d 100644 --- a/src/operator/nn/log_softmax.cc +++ b/src/operator/nn/log_softmax.cc @@ -40,6 +40,7 @@ static void LogSoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { + if (inputs[0].shape().Size() == 0U) return; const SoftmaxParam& param = nnvm::get(attrs.parsed); if (SupportMKLDNNLogSoftmax(param, inputs[0], outputs[0])) { MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h index f1f41778a9bd..018d851336d2 100644 --- a/src/operator/nn/softmax-inl.h +++ b/src/operator/nn/softmax-inl.h @@ -779,7 +779,7 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { using namespace mxnet_op; - if (req[0] == kNullOp) return; + if (req[0] == kNullOp || inputs[0].Size() == 0U) return; CHECK_NE(req[0], kAddTo); const SoftmaxParam& param = nnvm::get(attrs.parsed); int axis = CheckAxis(param.axis, inputs[0].ndim()); diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc index 50cfc2f713f4..b95e159f9862 100644 --- a/src/operator/nn/softmax.cc +++ b/src/operator/nn/softmax.cc @@ -41,6 +41,7 @@ static void SoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { + if (inputs[0].shape().Size() == 0U) return; const SoftmaxParam& param = nnvm::get(attrs.parsed); if (SupportMKLDNNSoftmax(param, inputs[0], outputs[0])) { MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs); diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 7dcaf72a4e75..91f84bb27eb0 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1569,6 +1569,14 @@ def __init__(self, axis): def hybrid_forward(self, F, a): return F.npx.softmax(a, axis=axis) + class TestLogSoftmax(HybridBlock): + def __init__(self, axis): + super(TestLogSoftmax, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, a): + return F.npx.log_softmax(a, axis=axis) + def np_softmax(x, axis=-1): if (x.shape[axis] == 0): return _np.sum(x, axis=axis, keepdims=True) @@ -1577,24 +1585,34 @@ def np_softmax(x, axis=-1): x /= _np.sum(x, axis=axis, keepdims=True) return x + def np_log_softmax(x, axis=-1): + return _np.log(np_softmax(x, axis)) + + #(operator, function) tuples + tested_ops = [(TestSoftmax, np_softmax), + (TestLogSoftmax, np_log_softmax)] + # only testing 0-size shaped inputs here, other input cases have been tested in test_opeartor.py - for hybridize in [True, False]: - for shape in [(3, 0, 4), (0, 0)]: - mx_a = np.random.uniform(size=shape) - mx_a.attach_grad() - for axis in range(-len(shape), len(shape)): - test_softmax = TestSoftmax(axis) - if hybridize: - test_softmax.hybridize() + for SoftmaxOp, softmax_function in tested_ops: + for hybridize in [True, False]: + for shape in [(3, 0, 4), (0, 0)]: + mx_a = np.random.uniform(size=shape) + mx_a.attach_grad() + for axis in range(-len(shape), len(shape)): + test_softmax_op = SoftmaxOp(axis) + if hybridize: + test_softmax_op.hybridize() - with mx.autograd.record(): - mx_out = test_softmax(mx_a) + with mx.autograd.record(): + mx_out = test_softmax_op(mx_a) - np_out = np_softmax(mx_a.asnumpy(), axis) - assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True) + mx_out.wait_to_read() - mx_out.backward() - assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5) + np_out = softmax_function(mx_a.asnumpy(), axis) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True) + + mx_out.backward() + assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5) @with_seed()