From ded8e475b1955ce69481dba15a7bad2500a3c276 Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Thu, 22 Apr 2021 15:50:56 -0700 Subject: [PATCH 1/5] fix take --- src/operator/tensor/indexing_op.h | 6 +- tests/python/unittest/test_operator.py | 90 ++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index c94ac437d789..e0b1deb2d12d 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -1031,12 +1031,12 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs, Tensor grad_in = outputs[0].get_with_shape( Shape2(arrshape[0], arrshape.ProdShape(1, arrshape.ndim())), s); + if (req[take_::kArr] == kWriteTo) { + grad_in = scalar(0.0f); + } // re-using the previous code for axis = 0 case if (actual_axis == 0) { if (req[take_::kArr] == kWriteTo || req[take_::kArr] == kAddTo) { - if (req[take_::kArr] == kWriteTo) { - grad_in = scalar(0.0f); - } if (safe_acc) { // Temporary storage for safe accumulation size_t temp_space_size = grad_in.size(0) * grad_in.size(1) * sizeof(AType); diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 88bd4c3be04c..694478c11766 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9453,3 +9453,93 @@ def seq_reverse(): seq_last() seq_reverse() seq_mask() + +def test_take_grads(): + # Test for https://github.com/apache/incubator-mxnet/issues/19817 + from mxnet.gluon.nn import HybridBlock, Conv1D, HybridSequential, HybridLambda, Dense + from mxnet import autograd, nd + from mxnet.gluon.loss import L2Loss + + def get_grads(model, grads, ctx=mx.cpu()): + pd = model.collect_params() + total_grad_l2 = 0 + total_grad_l1 = 0 + total_grad_linf = 0 + for p in pd: + try: + g = pd[p].grad(ctx) / N + g2 = (g**2).sum().as_in_context(mx.cpu()).asscalar() + g1 = g.abs().sum().as_in_context(mx.cpu()).asscalar() + ginf = g.max().as_in_context(mx.cpu()).asscalar() + total_grad_linf = max(total_grad_linf, ginf) + total_grad_l2 += g2 + total_grad_l1 += g1 + print(f"||g_param||_2: {g2**0.5:.2E} | Param: {p}") + except Exception: + pass + + grads.append(total_grad_l1) + grads.append(total_grad_l2) + grads.append(total_grad_linf) + + def run_model(model, loss, X, Y, num_iters=5): + grads = [] + for i in range(num_iters): + with autograd.record(): + Y_hat = model(X) + ll = loss(Y_hat, Y) + ll = ll.sum() + ll.backward() + get_grads(model, grads) + return grads + + def conv_layer(atrous_rates, num_channels): + convs = HybridSequential() + for rate in atrous_rates: + convs.add(Conv1D(num_channels, 3, padding=rate, dilation=rate, activation='tanh')) + return convs + + class Model(HybridBlock): + def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs): + super().__init__(prefix=kwargs.get('prefix', None), params=kwargs.get('params', None)) + self.use_take = use_take + with self.name_scope(): + self.convs = conv_layer(atrous_rates, conv_units) + self.dense_out = Dense(1, flatten=False, activation='tanh') + + def hybrid_forward(self, F, X, axis=-1): + X1 = X + X2 = self.convs(X1) + if self.use_take: + X3 = F.take(X2, nd.array([0]), axis=axis) + else: + X3 = F.slice_axis(X2, begin=0, end=1, axis=axis) + return X3 + + N = 30 + T = 20 + C = 8 + conv_units = 5 + atrous_rates = [1] + + X = np.random.normal(size=(N, T, C)) + Y = np.random.normal(size=(N, T)) + Y = np.random.normal(size=(N, conv_units)) + X, Y = nd.array(X), nd.array(Y) + seed = np.random.randint(1000) + + # Using F.take + mx.random.seed(seed) + model = Model(conv_units, atrous_rates, use_take=True) + model.initialize() + loss = L2Loss() + grads1 = run_model(model, loss, X, Y) + + # Using F.slice_axis + mx.random.seed(seed) + model2 = Model(conv_units, atrous_rates, use_take=False) + model2.initialize() + grads2 = run_model(model2, loss, X, Y) + + for i in range(len(grads1)): + assert_almost_equal(grads1[i], grads2[i]) From a41fa6927f37d55e85c4a0a81addfecc6aa5ff37 Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Thu, 22 Apr 2021 17:59:00 -0700 Subject: [PATCH 2/5] remove inputs in hybridblock --- tests/python/unittest/test_operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 694478c11766..02bcff618d67 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9501,7 +9501,7 @@ def conv_layer(atrous_rates, num_channels): class Model(HybridBlock): def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs): - super().__init__(prefix=kwargs.get('prefix', None), params=kwargs.get('params', None)) + super().__init__() self.use_take = use_take with self.name_scope(): self.convs = conv_layer(atrous_rates, conv_units) From 8e011d519edea5e96ed76d5ab0286535fde0b281 Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Fri, 23 Apr 2021 11:06:36 -0700 Subject: [PATCH 3/5] remove name scope --- tests/python/unittest/test_operator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 02bcff618d67..99eb0c750c84 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9503,9 +9503,7 @@ class Model(HybridBlock): def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs): super().__init__() self.use_take = use_take - with self.name_scope(): - self.convs = conv_layer(atrous_rates, conv_units) - self.dense_out = Dense(1, flatten=False, activation='tanh') + self.convs = conv_layer(atrous_rates, conv_units) def hybrid_forward(self, F, X, axis=-1): X1 = X From fbfeb2b9e0cd3962f36263b3ae616ca8c2c416db Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Fri, 23 Apr 2021 15:07:25 -0700 Subject: [PATCH 4/5] adjust data size --- tests/python/unittest/test_operator.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 99eb0c750c84..6edd95062eef 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9474,7 +9474,6 @@ def get_grads(model, grads, ctx=mx.cpu()): total_grad_linf = max(total_grad_linf, ginf) total_grad_l2 += g2 total_grad_l1 += g1 - print(f"||g_param||_2: {g2**0.5:.2E} | Param: {p}") except Exception: pass @@ -9506,22 +9505,20 @@ def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs): self.convs = conv_layer(atrous_rates, conv_units) def hybrid_forward(self, F, X, axis=-1): - X1 = X - X2 = self.convs(X1) + X1 = self.convs(X) if self.use_take: - X3 = F.take(X2, nd.array([0]), axis=axis) + X2 = F.take(X1, nd.array([0]), axis=axis) else: - X3 = F.slice_axis(X2, begin=0, end=1, axis=axis) - return X3 + X2 = F.slice_axis(X1, begin=0, end=1, axis=axis) + return X2 N = 30 T = 20 - C = 8 + C = 20 conv_units = 5 atrous_rates = [1] X = np.random.normal(size=(N, T, C)) - Y = np.random.normal(size=(N, T)) Y = np.random.normal(size=(N, conv_units)) X, Y = nd.array(X), nd.array(Y) seed = np.random.randint(1000) From cdb2d51cfdfd6c342aa3ab7d12982cd171fbf8df Mon Sep 17 00:00:00 2001 From: Wei Chu Date: Fri, 23 Apr 2021 20:21:09 -0700 Subject: [PATCH 5/5] test with dense --- tests/python/unittest/test_operator.py | 27 ++++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 6edd95062eef..07487579d775 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -9492,20 +9492,19 @@ def run_model(model, loss, X, Y, num_iters=5): get_grads(model, grads) return grads - def conv_layer(atrous_rates, num_channels): - convs = HybridSequential() - for rate in atrous_rates: - convs.add(Conv1D(num_channels, 3, padding=rate, dilation=rate, activation='tanh')) - return convs + def dense_layer(): + den = HybridSequential() + den.add(Dense(10, flatten=True, activation='tanh')) + return den class Model(HybridBlock): - def __init__(self, conv_units, atrous_rates, use_take=False, **kwargs): + def __init__(self, use_take=False, **kwargs): super().__init__() self.use_take = use_take - self.convs = conv_layer(atrous_rates, conv_units) + self.den = dense_layer() - def hybrid_forward(self, F, X, axis=-1): - X1 = self.convs(X) + def hybrid_forward(self, F, X, axis=1): + X1 = self.den(X) if self.use_take: X2 = F.take(X1, nd.array([0]), axis=axis) else: @@ -9514,25 +9513,23 @@ def hybrid_forward(self, F, X, axis=-1): N = 30 T = 20 - C = 20 - conv_units = 5 - atrous_rates = [1] + C = 10 X = np.random.normal(size=(N, T, C)) - Y = np.random.normal(size=(N, conv_units)) + Y = np.random.normal(size=(N, 1)) X, Y = nd.array(X), nd.array(Y) seed = np.random.randint(1000) # Using F.take mx.random.seed(seed) - model = Model(conv_units, atrous_rates, use_take=True) + model = Model(use_take=True) model.initialize() loss = L2Loss() grads1 = run_model(model, loss, X, Y) # Using F.slice_axis mx.random.seed(seed) - model2 = Model(conv_units, atrous_rates, use_take=False) + model2 = Model(use_take=False) model2.initialize() grads2 = run_model(model2, loss, X, Y)