Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
* Fix einsum gradient (#18482)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanke580 committed Jun 5, 2020
1 parent 4a830db commit b1bb307
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/operator/numpy/np_einsum_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,9 @@ inline void NumpyEinsumProcess(const std::vector<TBlob>& inputs,
int j = 0;
for (idim = 0; idim < ndim_iter; ++idim) {
if (op_axes_arrays[i][idim] == -1 ||
opshape[i][op_axes_arrays[i][idim]] == 1) {
(iop != nop && opshape[i][op_axes_arrays[i][idim]] == 1 &&
op_axes_arrays[iop][idim] != -1 &&
opshape[iop][op_axes_arrays[iop][idim]] != 1)) {
remainstride[iop][j++] = iterstride[iop][idim];
} else {
opstride[iop][op_axes_arrays[i][idim]] = iterstride[iop][idim];
Expand Down
20 changes: 20 additions & 0 deletions tests/python/unittest/test_numpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -6785,6 +6785,26 @@ def dbg(name, data):
# broadcast bug
('ij, ij -> i', [(1, 4), (2, 4)], lambda *args: (_np.sum(args[1], axis=0)[None, :],
_np.tile(args[0], [2, 1]))),
# one dimensim bug
('...ij, ...jk -> ...ik', [(1, 4), (4, 2)], lambda *args: (args[1].sum(axis=1)[None, :],
_np.tile(args[0].sum(axis=0)[: ,None], [1, 2]))),
('...ij, ...jk -> ...ik', [(2, 4), (4, 2)], lambda *args: (_np.tile(args[1].sum(axis=1)[None, :], [2, 1]),
_np.tile(args[0].sum(axis=0)[: ,None], [1, 2]))),
('...ij, ...jk -> ...ik', [(3, 2, 1, 4), (3, 2, 4, 2)], lambda *args: (
args[1].sum(axis=3)[:, :, None, :],
_np.tile(args[0].sum(axis=2)[:, :, :, None], [1, 1, 1, 2]))),
('...ij, ...ik -> ...jk', [(1, 1, 1, 4), (1, 1, 1, 3)], lambda *args: (
_np.tile(args[1].sum(axis=3)[:, :, :, None], [1, 1, 1, 4]),
_np.tile(args[0].sum(axis=3)[:, :, : ,None], [1, 1, 1, 3]))),
('...ij, ...jc -> ...ic', [(1, 1, 5, 3), (1, 1, 3, 2)], lambda *args: (
_np.tile(args[1].sum(axis=3)[:, :, None, :], [1, 1, 5, 1]),
_np.tile(args[0].sum(axis=2)[:, :, : ,None], [1, 1, 1, 2]))),
('...ij, ...jc -> ...ic', [(1, 2, 5, 4), (1, 2, 4, 2)], lambda *args: (
_np.tile(args[1].sum(axis=3)[:, :, None, :], [1, 1, 5, 1]),
_np.tile(args[0].sum(axis=2)[:, :, : ,None], [1, 1, 1, 2]))),
('...ij, ...jc -> ...ic', [(2, 1, 5, 4), (2, 1, 4, 2)], lambda *args: (
_np.tile(args[1].sum(axis=3)[:, :, None, :], [1, 1, 5, 1]),
_np.tile(args[0].sum(axis=2)[:, :, : ,None], [1, 1, 1, 2]))),
# issue #16576
# commented due to long running time
# ('abiz,abjz->abij', [(64, 8, 128, 512), (64, 8, 128, 512)], lambda *args: (_np.matmul(_np.ones((64, 8, 128, 128)), args[1]),
Expand Down

0 comments on commit b1bb307

Please sign in to comment.