Skip to content

Commit

Permalink
Add index_copy() operator (apache#12810)
Browse files Browse the repository at this point in the history
* add index_copy operator

* add index_copy op

* update index_copy op

* add unittest for index_copy()

* update index_copy

* update index_copy

* use mxnet_op::copy

* update index_copy

* update index_copy

* update index_copy

* update index_copy test

* update index_copy test
  • Loading branch information
aksnzhy authored and azai91 committed Dec 1, 2018
1 parent c408a44 commit 56ff657
Show file tree
Hide file tree
Showing 4 changed files with 305 additions and 0 deletions.
173 changes: 173 additions & 0 deletions src/operator/contrib/index_copy-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file index_copy-inl.h
* \brief implementation of index_copy tensor operation
*/

#ifndef MXNET_OPERATOR_CONTRIB_INDEX_COPY_INL_H_
#define MXNET_OPERATOR_CONTRIB_INDEX_COPY_INL_H_

#include <mxnet/operator_util.h>
#include <vector>
#include <limits>
#include <algorithm>
#include "../elemwise_op_common.h"
#include "../mshadow_op.h"
#include "../mxnet_op.h"

namespace mxnet {
namespace op {

template<int req>
struct index_copy_forward {
template<typename DType, typename IType>
MSHADOW_XINLINE static void Map(int i,
int dim,
IType* index,
DType* new_tensor,
DType* out_tensor) {
DType* out_ptr = out_tensor + static_cast<int>(index[i]) * dim;
DType* new_ptr = new_tensor + i * dim;
for (int idx = 0; idx < dim; ++idx) {
KERNEL_ASSIGN(out_ptr[idx], req, new_ptr[idx]);
}
}
};

template<typename xpu>
void IndexCopyForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 3U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const TBlob& out = outputs[0];
const TBlob& original_tensor = inputs[0];
const TBlob& idx_vector = inputs[1];
const TBlob& copied_tensor = inputs[2];
int dim = inputs[2].Size() / inputs[1].Size();
// copy original tensor to output
mxnet_op::copy(s, out, original_tensor);
// index copy
MSHADOW_TYPE_SWITCH(out.type_flag_, DType, {
MSHADOW_TYPE_SWITCH(idx_vector.type_flag_, IType, {
MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
mxnet_op::Kernel<index_copy_forward<req_type>, xpu>::Launch(s,
idx_vector.Size(), dim,
idx_vector.dptr<IType>(),
copied_tensor.dptr<DType>(),
out.dptr<DType>());
});
});
});
}

template<int req>
struct index_copy_backward {
template<typename DType, typename IType>
MSHADOW_XINLINE static void Map(int i,
int dim,
int index_size,
DType* out_grad,
IType* index,
DType* in_grad_1,
DType* in_grad_2) {
// Copy to in_grad_2
for (int p = 0; p < index_size; ++p) {
int idx = static_cast<int>(index[p]);
if (i >= idx*dim && i < (idx+1)*dim) {
int offset = i - idx*dim;
KERNEL_ASSIGN(in_grad_2[p*dim+offset], req, out_grad[i]);
return;
}
}
// Copy to in_grad_1
KERNEL_ASSIGN(in_grad_1[i], req, out_grad[i]);
}
};

template<typename xpu>
void IndexCopyBackward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 4U);
CHECK_EQ(outputs.size(), 3U);
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const TBlob& out_grad = inputs[0];
const TBlob& index = inputs[2];
const TBlob& in_grad_1 = outputs[0];
const TBlob& in_grad_2 = outputs[2];
int dim = inputs[3].Size() / inputs[2].Size();
int index_size = inputs[2].Size();
// index_copy_backward
MSHADOW_TYPE_SWITCH(out_grad.type_flag_, DType, {
MSHADOW_TYPE_SWITCH(index.type_flag_, IType, {
MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
mxnet_op::Kernel<index_copy_backward<req_type>, xpu>::Launch(s,
out_grad.Size(),
dim, index_size,
out_grad.dptr<DType>(),
index.dptr<IType>(),
in_grad_1.dptr<DType>(),
in_grad_2.dptr<DType>());
});
});
});
}

inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
// inputs[0]: original tensor
// inputs[1]: index vector
// inputs[2]: copied tensor
CHECK_EQ(in_attrs->size(), 3U);
// outputs[0]: a new tensor
CHECK_EQ(out_attrs->size(), 1U);
// inputs[1] must be a vector
CHECK_EQ(in_attrs->at(1).ndim(), 1);
// Shape matching
CHECK_EQ(in_attrs->at(0).ndim(), in_attrs->at(2).ndim());
for (size_t i = 0; i < in_attrs->at(0).ndim(); ++i) {
if (i == 0) {
CHECK_GE(in_attrs->at(0)[i], in_attrs->at(2)[i]);
} else {
CHECK_EQ(in_attrs->at(0)[i], in_attrs->at(2)[i]);
}
}
// The the length of the fitrst dim of copied tensor
// must equal to the size of index vector
CHECK_EQ(in_attrs->at(1)[0], in_attrs->at(2)[0]);
SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
return out_attrs->at(0).ndim() != 0U &&
out_attrs->at(0).Size() != 0U;
}

} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_CONTRIB_INDEX_COPY_INL_H_
73 changes: 73 additions & 0 deletions src/operator/contrib/index_copy.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file index_copy.cc
* \brief
*/
#include "./index_copy-inl.h"

namespace mxnet {
namespace op {

NNVM_REGISTER_OP(_contrib_index_copy)
.describe(R"code(Copies the elements of a `new_tensor` into the `old_tensor` by
selecting the indices in the order given in `index`. The output will be a new tensor
contains the rest elements of old tensor and the copied elements of new tensor.
For example, if `index[i] == j`, then the `i`th row of `new_tensor` is copied to the
`j`th row of output.
The `index` must be a vector and it must have the same size with the `0`th dimimention of
`new_tensor`. Also, the `0`th dimimention of old_tensor must `>=` the `0`th dimimention of
`new_tensor`, or an error will be raised.
Examples::
x = mx.nd.zeros((5,3))
t = mx.nd.array([[1,2,3],[4,5,6],[7,8,9]])
index = mx.nd.array([0,4,2])
mx.nd.contrib.index_copy(x, index, t)
[[1. 2. 3.]
[0. 0. 0.]
[7. 8. 9.]
[0. 0. 0.]
[4. 5. 6.]]
<NDArray 5x3 @cpu(0)>
)code" ADD_FILELINE)
.set_num_inputs(3)
.set_num_outputs(1)
.set_attr<nnvm::FInferShape>("FInferShape", IndexCopyShape)
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_contrib_backward_index_copy"})
.set_attr<FCompute>("FCompute<cpu>", IndexCopyForward<cpu>)
.add_argument("old_tensor", "NDArray-or-Symbol", "Old tensor")
.add_argument("index_vector", "NDArray-or-Symbol", "Index vector")
.add_argument("new_tensor", "NDArray-or-Symbol", "New tensor to be copied");

NNVM_REGISTER_OP(_contrib_backward_index_copy)
.set_num_inputs(4)
.set_num_outputs(3)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FCompute>("FCompute<cpu>", IndexCopyBackward<cpu>);

} // namespace op
} // namespace mxnet
36 changes: 36 additions & 0 deletions src/operator/contrib/index_copy.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file index_copy.cc
* \brief
*/
#include "./index_copy-inl.h"

namespace mxnet {
namespace op {

NNVM_REGISTER_OP(_contrib_index_copy)
.set_attr<FCompute>("FCompute<gpu>", IndexCopyForward<gpu>);

NNVM_REGISTER_OP(_contrib_backward_index_copy)
.set_attr<FCompute>("FCompute<gpu>", IndexCopyBackward<gpu>);

} // namespace op
} // namespace mxnet
23 changes: 23 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4762,6 +4762,29 @@ def test_quantization_op():
assert same(qa.asnumpy(), qa_real.asnumpy())
assert same(a_.asnumpy(), a_real.asnumpy())

@with_seed()
def test_index_copy():
x = mx.nd.zeros((5,3))
t = mx.nd.array([[1,2,3],[4,5,6],[7,8,9]])
index = mx.nd.array([0,4,2])

x.attach_grad()
t.attach_grad()
index.attach_grad()

with mx.autograd.record():
out = mx.nd.contrib.index_copy(x, index, t)
out.backward()

tensor = mx.nd.array([[1,2,3],[0,0,0],[7,8,9],[0,0,0],[4,5,6]])
x_grad = mx.nd.array([[0,0,0],[1,1,1],[0,0,0],[1,1,1],[0,0,0]])
t_grad = mx.nd.array([[1,1,1],[1,1,1],[1,1,1]])
index_grad = mx.nd.array([0,0,0])

assert same(out.asnumpy(), tensor.asnumpy())
assert same(x.grad.asnumpy(), x_grad.asnumpy())
assert same(t.grad.asnumpy(), t_grad.asnumpy())
assert same(index.grad.asnumpy(), index_grad.asnumpy())

@with_seed()
def test_div_sqrt_dim():
Expand Down

0 comments on commit 56ff657

Please sign in to comment.