Skip to content

Commit

Permalink
Elemwise binary op backward fix (apache#19106)
Browse files Browse the repository at this point in the history
* fix backward

* add tests
  • Loading branch information
Zha0q1 authored Sep 13, 2020
1 parent f1acda7 commit 6cbdfa5
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/operator/tensor/elemwise_binary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class ElemwiseBinaryOp : public OpBase {
const std::vector<TBlob> &outputs) {
MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
using namespace mxnet_op;
const int size = static_cast<int>((outputs[0].Size() + DataType<DType>::kLanes - 1)
const size_t size = static_cast<size_t>((outputs[0].Size() + DataType<DType>::kLanes - 1)
/ DataType<DType>::kLanes);
const DType *ograd_dptr = inputs[0].dptr<DType>();
if (std::is_same<LOP, mshadow_op::identity>::value && req[0] == kWriteInplace) {
Expand Down Expand Up @@ -150,7 +150,7 @@ class ElemwiseBinaryOp : public OpBase {
const DType *lhs_dptr = inputs[1].dptr<DType>();
const DType *rhs_dptr = inputs[2].dptr<DType>();
MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
const int size = static_cast<int>(
const size_t size = static_cast<size_t>(
(outputs[0].Size() + mxnet_op::DataType<DType>::kLanes - 1)
/ mxnet_op::DataType<DType>::kLanes);
DType * lgrad_dptr = outputs[0].dptr<DType>();
Expand All @@ -159,7 +159,7 @@ class ElemwiseBinaryOp : public OpBase {
s, size, lgrad_dptr, ograd_dptr, lhs_dptr, rhs_dptr);
});
MXNET_ASSIGN_REQ_SWITCH(req[1], Req, {
const int size = static_cast<int>(
const size_t size = static_cast<size_t>(
(outputs[1].Size() + mxnet_op::DataType<DType>::kLanes - 1)
/ mxnet_op::DataType<DType>::kLanes);
DType * rgrad_dptr = outputs[1].dptr<DType>();
Expand Down
96 changes: 96 additions & 0 deletions tests/nightly/test_np_large_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,102 @@ def test_slice_assign():
B[-1] = 2
assert B[-1, 0] == 2 and B[-1, 1] == 2

@use_np
def test_add():
A = np.ones((INT_OVERFLOW, 2))
B = np.ones((INT_OVERFLOW, 2))
A[-1, -1] = 2
A.attach_grad()
with mx.autograd.record():
C = np.add(A, B)
C.backward()
assert C.shape == (INT_OVERFLOW, 2)
assert C[-1, -1] == 3
assert A.grad.shape == (INT_OVERFLOW, 2)
assert A.grad[-1, -1] == 1

@use_np
def test_hypot():
A = np.ones((INT_OVERFLOW, 2))
B = np.ones((INT_OVERFLOW, 2))
A[-1, -1], B[-1, -1] = 3, 4
A.attach_grad()
with mx.autograd.record():
C = np.hypot(A, B)
C.backward()
assert C.shape == A.shape
assert C[-1, -1] == 5
assert A.grad.shape == A.shape
assert_almost_equal(A.grad[-1, -1], np.array([0.6]), rtol=1e-5, atol=1e-5)

@use_np
def test_power():
A = np.full((2, INT_OVERFLOW), 2)
B = np.ones((2, INT_OVERFLOW))
B[-1, -1] = 3
A.attach_grad()
B.attach_grad()
with mx.autograd.record():
C = np.power(A, B)
C.backward()
assert C.shape == A.shape
assert C[-1, -1] == 8
assert A.grad.shape == A.shape
assert A.grad[-1, -1] == 12
assert B.grad.shape == B.shape
assert_almost_equal(B.grad[-1, -1], 2**3 * np.log(2), rtol=1e-5, atol=1e-5)

@use_np
def test_ldexp():
A = np.ones((2, INT_OVERFLOW))
B = np.ones((2, INT_OVERFLOW))
A[-1, -1], B[-1, -1] = 5, 2
A.attach_grad()
B.attach_grad()
with mx.autograd.record():
C = np.ldexp(A, B)
C.backward()
assert C.shape == A.shape
assert C[-1, -1] == 20
assert A.grad.shape == A.shape
assert A.grad[-1, -1] == 4
assert B.grad.shape == B.shape
assert_almost_equal(B.grad[-1, -1], A[-1, -1] * 2**B[-1, -1] * np.log(2), \
rtol=1e-5, atol=1e-5)

@use_np
def test_multiply():
A = np.ones((2, INT_OVERFLOW))
B = np.ones((2, INT_OVERFLOW))
A[-1, -1], B[-1, -1] = 2, 3
A.attach_grad()
B.attach_grad()
with mx.autograd.record():
C = np.multiply(A, B)
C.backward()
assert C.shape == A.shape
assert C[0, 0] == 1 and C[-1, -1] == 6
assert A.grad.shape == A.shape
assert A.grad[-1, -1] == B[-1, -1]
assert B.grad.shape == B.shape
assert B.grad[-1, -1] == A[-1, -1]

@use_np
def test_subtract():
A = np.zeros((INT_OVERFLOW, 2))
B = np.ones((INT_OVERFLOW, 2))
A[-1, -1] = 3
A.attach_grad()
B.attach_grad()
with mx.autograd.record():
C = np.subtract(A, B)
C.backward()
assert C.shape == (INT_OVERFLOW, 2)
assert C[0, 0] == -1 and C[-1][-1] == 2
assert A.grad.shape == (INT_OVERFLOW, 2)
assert A.grad[0][0] == 1
assert B.grad.shape == (INT_OVERFLOW, 2)
assert B.grad[0][0] == -1

'''
_ _
Expand Down

0 comments on commit 6cbdfa5

Please sign in to comment.