Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
fix npx.softmax for 0-sized inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
haojin2 committed Mar 13, 2020
1 parent 18c2a26 commit 03307eb
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 26 deletions.
56 changes: 30 additions & 26 deletions src/operator/nn/softmax-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ template<typename OP, bool negate, typename AType, typename DType, typename OTyp
inline void Softmax(Stream<cpu> *s, DType *in, OType *out, IType *length,
Shape<ndim> shape, int axis, const DType temperature) {
index_t M = shape[axis];
if (M == 0) return;
index_t N = shape.Size()/M;
Shape<ndim> stride = calc_stride(shape);
Shape<ndim> sshape = shape;
Expand Down Expand Up @@ -186,6 +187,7 @@ inline void SoftmaxGrad(Stream<cpu> *s, OType *out, OType *ograd,
DType *igrad, IType *length, Shape<ndim> shape,
int axis, const DType temperature) {
index_t M = shape[axis];
if (M == 0) return;
index_t N = shape.Size()/M;
Shape<ndim> stride = calc_stride(shape);
Shape<ndim> sshape = shape;
Expand Down Expand Up @@ -402,6 +404,7 @@ inline void Softmax(Stream<gpu> *s, DType *in, OType *out, IType *length,
const int x_bits = 7;
const int x_size = 1 << x_bits;
index_t M = shape[axis];
if (M == 0 || shape.Size() == 0) return;
index_t N = shape.Size()/M;
Shape<ndim> stride = calc_stride(shape);
Shape<ndim> sshape = shape;
Expand Down Expand Up @@ -555,6 +558,7 @@ inline void SoftmaxGrad(Stream<gpu> *s, OType *out, OType *ograd,
const int x_bits = 7;
const int x_size = 1 << x_bits;
index_t M = shape[axis];
if (M == 0 || shape.Size() == 0) return;
index_t N = shape.Size()/M;
Shape<ndim> stride = calc_stride(shape);
Shape<ndim> sshape = shape;
Expand Down Expand Up @@ -798,35 +802,35 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
type = inputs[1].type_flag_;
}
MXNET_INT32_INT64_TYPE_SWITCH(type, IType, {
IType* mask_ptr = nullptr;
if (param.use_length.value()) {
mask_ptr = inputs[1].dptr<IType>();
IType* mask_ptr = nullptr;
if (param.use_length.value()) {
mask_ptr = inputs[1].dptr<IType>();
}
if (safe_acc) {
if (shape.ndim() == 2) {
Softmax<OP, negate, AType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
axis, static_cast<DType>(temperature));
} else {
Softmax<OP, negate, AType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
axis, static_cast<DType>(temperature));
}
if (safe_acc) {
if (shape.ndim() == 2) {
Softmax<OP, negate, AType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
axis, static_cast<DType>(temperature));
} else {
Softmax<OP, negate, AType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
axis, static_cast<DType>(temperature));
}
} else {
if (shape.ndim() == 2) {
Softmax<OP, negate, DType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
axis, static_cast<DType>(temperature));
} else {
if (shape.ndim() == 2) {
Softmax<OP, negate, DType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<2>(),
axis, static_cast<DType>(temperature));
} else {
Softmax<OP, negate, DType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
axis, static_cast<DType>(temperature));
}
Softmax<OP, negate, DType>(
ctx.get_stream<xpu>(), inputs[0].dptr<DType>(),
outputs[0].dptr<OType>(), mask_ptr, shape.get<3>(),
axis, static_cast<DType>(temperature));
}
}
});
});
});
Expand Down
39 changes: 39 additions & 0 deletions tests/python/unittest/test_numpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,45 @@ def gt_grad_batch_dot_numpy(lhs, rhs, ograd, transpose_a, transpose_b, lhs_req,
transpose_b=transpose_b))


@with_seed()
@use_np
def test_npx_softmax():
class TestSoftmax(HybridBlock):
def __init__(self, axis):
super(TestSoftmax, self).__init__()
self._axis = axis

def hybrid_forward(self, F, a):
return F.npx.softmax(a, axis=axis)

def np_softmax(x, axis=-1):
if (x.shape[axis] == 0):
return _np.sum(x, axis=axis, keepdims=True)
x = x - _np.max(x, axis=axis, keepdims=True)
x = _np.exp(x)
x /= _np.sum(x, axis=axis, keepdims=True)
return x

# only testing 0-size shaped inputs here, other input cases have been tested in test_opeartor.py
for hybridize in [True, False]:
for shape in [(3, 0, 4), (0, 0)]:
mx_a = np.random.uniform(size=shape)
mx_a.attach_grad()
for axis in range(-len(shape), len(shape)):
test_softmax = TestSoftmax(axis)
if hybridize:
test_softmax.hybridize()

with mx.autograd.record():
mx_out = test_softmax(mx_a)

np_out = np_softmax(mx_a.asnumpy(), axis)
assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5, equal_nan=True)

mx_out.backward()
assert_almost_equal(mx_a.grad.asnumpy(), _np.zeros(shape), rtol=1e-3, atol=1e-5)


@with_seed()
@use_np
def test_npi_boolean_assign():
Expand Down

0 comments on commit 03307eb

Please sign in to comment.