-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-978] Higher Order Gradient Support reciprocal
, abs
.
#15413
Changes from all commits
0a08a3b
32c9346
ced9e30
8878d7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -717,7 +717,38 @@ Example:: | |
|
||
MXNET_OPERATOR_REGISTER_BINARY(_backward_reciprocal) | ||
.set_attr<FCompute>("FCompute<cpu>", | ||
ElemwiseBinaryOp::Compute<cpu, unary_bwd<mshadow_op::reciprocal_grad> >); | ||
ElemwiseBinaryOp::Compute<cpu, unary_bwd<mshadow_op::reciprocal_grad> >) | ||
.set_attr<nnvm::FGradient>("FGradient", | ||
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) { | ||
// ograds[0]: dL/dxgrad | ||
// inputs[0]: dL/dy | ||
// inputs[1]: x | ||
// f(x) = y = 1/x | ||
// f'(x) = -1/x^2 | ||
// f''(x) = 2/x^3 = -2 * (f'(x) * f(x)) | ||
|
||
const std::unordered_map<std::string, std::string> args = {{"scalar", "-2.0"}}; | ||
|
||
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads | ||
auto dydx = MakeNode("elemwise_div", n->attrs.name + "_dydx", | ||
{dydx_mul_dldy, n->inputs[0]}, nullptr, &n); | ||
auto fx = MakeNode("reciprocal", n->attrs.name + "_fx", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Small thing, Could we get fx from the first backward (node->inputs) if we do ElemwiseGradUseInOut ? I guess we would avoid additional divisions if so. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can use it as that would work as our |
||
{n->inputs[1]}, nullptr, &n); | ||
|
||
auto d2ydx2_mid = MakeNode("elemwise_mul", n->attrs.name + "_d2ydx2_mid", | ||
{dydx_mul_dldy, nnvm::NodeEntry{fx}}, nullptr, &n); | ||
|
||
auto d2ydx2 = MakeNode("_mul_scalar", n->attrs.name + "_d2ydx2", | ||
{nnvm::NodeEntry{d2ydx2_mid}}, &args, &n); | ||
|
||
std::vector<nnvm::NodeEntry> ret; | ||
|
||
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a comment would help here, this one is the output corresponding to dL/dy from the first backward right? I'm still unclear since the previous PRs on what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even I am not sure of its significance in literature. But if you look at There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This term will be useful when you calculate the third order (and above) gradient. |
||
{ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n)); | ||
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_inp", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems ok. |
||
{ograds[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n)); | ||
return ret; | ||
}); | ||
|
||
// abs | ||
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(abs, cpu, mshadow_op::abs) | ||
|
@@ -736,7 +767,26 @@ The storage type of ``abs`` output depends upon the input storage type: | |
)code" ADD_FILELINE) | ||
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_abs"}); | ||
|
||
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_abs, unary_bwd<mshadow_op::sign>); | ||
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_abs, unary_bwd<mshadow_op::sign>) | ||
.set_attr<nnvm::FGradient>("FGradient", | ||
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) { | ||
// ograds[0]: dL/dxgrad | ||
// inputs[0]: dL/dy | ||
// inputs[1]: x | ||
// f(x) -> abs(x) | ||
// f'(x) = 1 if x > 0 else -1 | ||
// f''(x) = 0 | ||
auto dydx = MakeNode("elemwise_div", n->attrs.name + "_dydx", | ||
{nnvm::NodeEntry{n}, n->inputs[0]}, nullptr, &n); | ||
|
||
std::vector<nnvm::NodeEntry> ret; | ||
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same question as above. |
||
{ograds[0], nnvm::NodeEntry(dydx)}, nullptr, &n)); | ||
ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok. |
||
{n->inputs[1]}, nullptr, &n)); | ||
return ret; | ||
}); | ||
|
||
|
||
// sign | ||
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(sign, cpu, mshadow_op::sign) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,6 +107,33 @@ def grad_grad_op(x): | |
|
||
|
||
@with_seed() | ||
def test_reciprocal(): | ||
def reciprocal(x): | ||
return nd.reciprocal(x) | ||
|
||
def grad_grad_op(x): | ||
return 2 / x**3 | ||
|
||
for dim in range(1, 5): | ||
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, reciprocal, grad_grad_op) | ||
|
||
|
||
@with_seed() | ||
def test_abs(): | ||
def abs(x): | ||
return nd.abs(x) | ||
|
||
def grad_grad_op(x): | ||
return nd.zeros_like(x) | ||
|
||
for dim in range(1, 5): | ||
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, abs, grad_grad_op) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: please remove extra line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two lines between functions as per pep8: https://stackoverflow.com/questions/2953250/python-pep8-blank-lines-convention There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is fixed actually. I guess I removed the lower line so it is not showing up here. |
||
|
||
def test_sigmoid(): | ||
def sigmoid(x): | ||
return nd.sigmoid(x) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to divide this explicitly here? I think the final _backward_grad_grad_input will also carry the term head_grads in the output, we may not need this extra node?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now I see that you need this node for the first output "_backward_grad_grad"