Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[BUGFIX] fix issue in elemwise_add #20380

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/mxnet/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def __init__(self, sym, ctx, args, args_grad, grad_req, aux_states):
with self._ctx:
self._args[i].attach_grad(req, stype=g.stype)
self._args[i].grad[:] = g
self._cached_op = ndarray.CachedOp(sym)
self._cached_op = ndarray.CachedOp(sym, flags=[("static_alloc", True)])

def get_optimized_symbol(self):
"""Get an optimized version of the symbol from the executor.
Expand Down
11 changes: 10 additions & 1 deletion src/operator/tensor/elemwise_binary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,16 @@ The storage type of ``elemwise_add`` output depends on storage types of inputs
- otherwise, ``elemwise_add`` generates output with default storage

)code")
.set_attr<nnvm::FGradient>("FGradient", CloneGradient{"_backward_add"});
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::ObjectPtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
std::vector<nnvm::NodeEntry> ret;
const size_t input_count = n->inputs.size();
ret.reserve(input_count);
for (size_t i = 0; i < input_count; ++i) {
ret.emplace_back(MakeNode("ones_like", n->attrs.name + "_grad_ones", {n->inputs[i]}, nullptr, &n));
}
return ret;
});

// specialized gradient add function to do add to optimization
// this must differ from elemwise_add to prevent add to optimization in forward pass.
Expand Down
3 changes: 1 addition & 2 deletions tests/python/unittest/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,5 +517,4 @@ def test_gradient():
dx, = mx.autograd.grad(z, [x], create_graph=True)
assert abs(dx.asscalar() - 3.71828175) < 1e-7
dx.backward()
assert abs(x.grad.asscalar() - 2.71828175) < 1e-7

assert abs(x.grad.asscalar() - 2.71828175) < 1e-7
46 changes: 46 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9570,3 +9570,49 @@ def forward(self, X, axis=1):

for i in range(len(grads1)):
assert_almost_equal(grads1[i], grads2[i])


def test_elemwise_add():
json = """
{
"nodes": [
{
\"op\":\"null\",
\"name\":\".Inputs.Input1\",
\"inputs\":[]
},
{
\"op\":\"null\",
\"name\":\".Inputs.Input2\",
\"inputs\":[]
},
{
\"op\":\"elemwise_add\",
\"name\":\".$0\",
\"inputs\":[[0,0,0],[1,0,0]]
},
{
\"op\":\"_copy\",
\"name\":\".Outputs.Output\",
\"inputs\":[[2,0,0]]
}
],
\"arg_nodes\":[0,1],
\"heads\":[[3,0,0]]
}
"""
sym = mx.symbol.fromjson(json)

ex = sym._bind(
mx.cpu(),
{'.Inputs.Input1': mx.nd.array([0.4]), '.Inputs.Input2': mx.nd.array([0.5])},
args_grad={
'.Inputs.Input1': mx.ndarray.zeros((1)),
'.Inputs.Input2': mx.ndarray.zeros((1))
},
grad_req={'.Inputs.Input1': 'null', '.Inputs.Input2': 'write'}
)

ex.forward(is_train=True)
ex.backward(out_grads=mx.nd.array([2021]))
assert ex.grad_arrays[1] == 2021