-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-978] n-th order gradient test support. #15611
Changes from 2 commits
1b5d96a
1f74614
4e5c4cc
5523d4a
daa77f1
f7cd885
dccc2e8
c1c14d2
82174a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,10 +30,16 @@ def sin(x): | |
def grad_grad_op(x): | ||
return -nd.sin(x) | ||
|
||
def grad_grad_grad_op(x): | ||
return -nd.cos(x) | ||
|
||
for dim in range(1, 5): | ||
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, sin, grad_grad_op) | ||
# TODO(kshitij12345): Remove | ||
check_nth_order_unary(array, sin, | ||
[grad_grad_op, grad_grad_grad_op], [2, 3]) | ||
|
||
|
||
@with_seed() | ||
|
@@ -44,10 +50,16 @@ def cos(x): | |
def grad_grad_op(x): | ||
return -nd.cos(x) | ||
|
||
def grad_grad_grad_op(x): | ||
return nd.sin(x) | ||
|
||
for dim in range(1, 5): | ||
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, cos, grad_grad_op) | ||
# TODO(kshitij12345): Remove | ||
check_nth_order_unary(array, cos, | ||
[grad_grad_op, grad_grad_grad_op], [2, 3]) | ||
|
||
|
||
@with_seed() | ||
|
@@ -69,13 +81,18 @@ def test_log(): | |
def log(x): | ||
return nd.log(x) | ||
|
||
def grad_op(x): | ||
return 1/x | ||
|
||
def grad_grad_op(x): | ||
return -1/(x**2) | ||
|
||
for dim in range(1, 5): | ||
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, log, grad_grad_op) | ||
# TODO(kshitij12345): Remove | ||
check_nth_order_unary(array, log, [grad_op, grad_grad_op], [1, 2]) | ||
|
||
|
||
@with_seed() | ||
|
@@ -148,30 +165,48 @@ def grad_grad_op(x): | |
shape = rand_shape_nd(dim) | ||
array = random_arrays(shape) | ||
check_second_order_unary(array, sigmoid, grad_grad_op) | ||
# TODO(kshitij12345): Remove | ||
check_nth_order_unary(array, sigmoid, [grad_op, grad_grad_op], [1, 2]) | ||
check_nth_order_unary(array, sigmoid, grad_grad_op, 2) | ||
|
||
|
||
def check_second_order_unary(x, op, grad_grad_op): | ||
check_nth_order_unary(x, op, grad_grad_op, 2) | ||
|
||
|
||
def check_nth_order_unary(x, op, grad_ops, orders): | ||
if isinstance(orders, int): | ||
orders = [orders] | ||
grad_ops = [grad_ops] | ||
|
||
x = nd.array(x) | ||
grad_grad_x = grad_grad_op(x) | ||
x.attach_grad() | ||
|
||
# Manual head_grads. | ||
y_grad = nd.random.normal(shape=x.shape) | ||
head_grad_grads = nd.random.normal(shape=x.shape) | ||
order = max(orders) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If orders is monotonically increasing, should this just be orders[-1]? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. That will work as well. But I felt |
||
expected_grads = [grad_op(x) for grad_op in grad_ops] | ||
computed_grads = [] | ||
head_grads = [] | ||
|
||
# Perform compute. | ||
with autograd.record(): | ||
y = op(x) | ||
x_grad = autograd.grad(heads=y, variables=x, head_grads=y_grad, | ||
create_graph=True, retain_graph=True)[0] | ||
x_grad.backward(head_grad_grads) | ||
|
||
# Compute expected values. | ||
expected_grad_grad = grad_grad_x.asnumpy() * head_grad_grads.asnumpy() * \ | ||
y_grad.asnumpy() | ||
|
||
# Validate the gradients. | ||
assert_almost_equal(expected_grad_grad, x.grad.asnumpy()) | ||
for current_order in range(1, order+1): | ||
head_grad = nd.random.normal(shape=x.shape) | ||
y = autograd.grad(heads=y, variables=x, head_grads=head_grad, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. better name instead of mutating y? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One option I can think of is, at the start of for loop we'll have |
||
create_graph=True, retain_graph=True)[0] | ||
if current_order in orders: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If current_order is not in orders we might have problem zipping? Is there a case where you wou want 1st and 3rd order but not second? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think there would be an issue in that case (will confirm it later though). The main thing is that elements in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have confirmed, that
following works. |
||
computed_grads.append(y) | ||
head_grads.append(head_grad) | ||
|
||
# Validate all the gradients. | ||
for order, grad, computed_grad in \ | ||
zip(orders, expected_grads, computed_grads): | ||
# Compute expected values. | ||
expected_grad = grad.asnumpy() | ||
for head_grad in head_grads[:order]: | ||
expected_grad *= head_grad.asnumpy() | ||
|
||
assert_almost_equal(expected_grad, computed_grad.asnumpy()) | ||
|
||
|
||
if __name__ == '__main__': | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add docstring?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have added the docstring. Could you please take a look to see if it needs more polishing.
Thank You.