diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index c94ac437d789..e0b1deb2d12d 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -1031,12 +1031,12 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs, Tensor grad_in = outputs[0].get_with_shape( Shape2(arrshape[0], arrshape.ProdShape(1, arrshape.ndim())), s); + if (req[take_::kArr] == kWriteTo) { + grad_in = scalar(0.0f); + } // re-using the previous code for axis = 0 case if (actual_axis == 0) { if (req[take_::kArr] == kWriteTo || req[take_::kArr] == kAddTo) { - if (req[take_::kArr] == kWriteTo) { - grad_in = scalar(0.0f); - } if (safe_acc) { // Temporary storage for safe accumulation size_t temp_space_size = grad_in.size(0) * grad_in.size(1) * sizeof(AType); diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 88bd4c3be04c..07487579d775 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9453,3 +9453,85 @@ def seq_reverse(): seq_last() seq_reverse() seq_mask() + +def test_take_grads(): + # Test for https://github.com/apache/incubator-mxnet/issues/19817 + from mxnet.gluon.nn import HybridBlock, Conv1D, HybridSequential, HybridLambda, Dense + from mxnet import autograd, nd + from mxnet.gluon.loss import L2Loss + + def get_grads(model, grads, ctx=mx.cpu()): + pd = model.collect_params() + total_grad_l2 = 0 + total_grad_l1 = 0 + total_grad_linf = 0 + for p in pd: + try: + g = pd[p].grad(ctx) / N + g2 = (g**2).sum().as_in_context(mx.cpu()).asscalar() + g1 = g.abs().sum().as_in_context(mx.cpu()).asscalar() + ginf = g.max().as_in_context(mx.cpu()).asscalar() + total_grad_linf = max(total_grad_linf, ginf) + total_grad_l2 += g2 + total_grad_l1 += g1 + except Exception: + pass + + grads.append(total_grad_l1) + grads.append(total_grad_l2) + grads.append(total_grad_linf) + + def run_model(model, loss, X, Y, num_iters=5): + grads = [] + for i in range(num_iters): + with autograd.record(): + Y_hat = model(X) + ll = loss(Y_hat, Y) + ll = ll.sum() + ll.backward() + get_grads(model, grads) + return grads + + def dense_layer(): + den = HybridSequential() + den.add(Dense(10, flatten=True, activation='tanh')) + return den + + class Model(HybridBlock): + def __init__(self, use_take=False, **kwargs): + super().__init__() + self.use_take = use_take + self.den = dense_layer() + + def hybrid_forward(self, F, X, axis=1): + X1 = self.den(X) + if self.use_take: + X2 = F.take(X1, nd.array([0]), axis=axis) + else: + X2 = F.slice_axis(X1, begin=0, end=1, axis=axis) + return X2 + + N = 30 + T = 20 + C = 10 + + X = np.random.normal(size=(N, T, C)) + Y = np.random.normal(size=(N, 1)) + X, Y = nd.array(X), nd.array(Y) + seed = np.random.randint(1000) + + # Using F.take + mx.random.seed(seed) + model = Model(use_take=True) + model.initialize() + loss = L2Loss() + grads1 = run_model(model, loss, X, Y) + + # Using F.slice_axis + mx.random.seed(seed) + model2 = Model(use_take=False) + model2.initialize() + grads2 = run_model(model2, loss, X, Y) + + for i in range(len(grads1)): + assert_almost_equal(grads1[i], grads2[i])