Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sparse_retain op #66

Merged
merged 4 commits into from
Jun 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,44 +115,44 @@ class NDArray {
}
/*! \brief constructor for NDArray with storage type
*/
NDArray(const NDArrayStorageType storage_type, const TShape &shape, Context ctx,
NDArray(const NDArrayStorageType stype, const TShape &shape, Context ctx,
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
TShape storage_shape = TShape(mshadow::Shape1(0)))
: shape_(shape), offset_(0), dtype_(dtype), entry_({nullptr, 0, 0}) {
// Assign default aux types if not given
if (aux_types.size() == 0) {
if (storage_type == kRowSparseStorage) {
if (stype == kRowSparseStorage) {
aux_types = {ROW_SPARSE_IDX_TYPE};
} else if (storage_type == kCSRStorage) {
} else if (stype == kCSRStorage) {
aux_types = {CSR_IND_PTR_TYPE, CSR_IDX_DTYPE};
} else {
LOG(FATAL) << "Unknown storage type" << storage_type;
LOG(FATAL) << "Unknown storage type " << stype;
}
}
// Assign default shapes if not given
// unknown shapes are intialized as {0} such that Size() would return 0
if (aux_shapes.size() == 0) {
if (storage_type == kRowSparseStorage) {
if (stype == kRowSparseStorage) {
aux_shapes = {TShape(mshadow::Shape1(0))};
} else if (storage_type == kCSRStorage) {
} else if (stype == kCSRStorage) {
// aux shapes for indptr and indices
aux_shapes = {TShape(mshadow::Shape1(0)), TShape(mshadow::Shape1(0))};
} else {
LOG(FATAL) << "Unknown storage type" << storage_type;
LOG(FATAL) << "Unknown storage type " << stype;
}
}
if (storage_shape.Size() == 0) {
if (storage_type == kRowSparseStorage) {
if (stype == kRowSparseStorage) {
storage_shape = shape;
storage_shape[0] = aux_shapes[rowsparse::kIdx][0];
} else if (storage_type == kCSRStorage) {
} else if (stype == kCSRStorage) {
storage_shape = aux_shapes[csr::kIdx];
} else {
LOG(FATAL) << "Unknown storage type" << storage_type;
LOG(FATAL) << "Unknown storage type " << stype;
}
}
ptr_ = std::make_shared<Chunk>(storage_type, storage_shape, ctx, delay_alloc,
ptr_ = std::make_shared<Chunk>(stype, storage_shape, ctx, delay_alloc,
dtype, aux_types, aux_shapes);
#if MKL_EXPERIMENTAL == 1
Mkl_mem_ = std::make_shared<MKLMemHolder>();
Expand Down
25 changes: 21 additions & 4 deletions python/mxnet/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy.random as rnd
import mxnet as mx
from .context import Context
from .ndarray import array
from .ndarray import array, _STORAGE_TYPE_STR_TO_ID
from .symbol import Symbol
try:
import requests
Expand Down Expand Up @@ -67,6 +67,15 @@ def random_arrays(*shapes):
return arrays[0]
return arrays


def random_sample(population, k):
"""Return a k length list of the elements chosen from the population sequence."""
assert 0 <= k <= len(population)
population_copy = population[:]
np.random.shuffle(population_copy)
return population_copy[0:k]


# TODO(haibin) also include types in arguments
def rand_sparse_ndarray(shape, storage_type, density=None):
"""Generate a random sparse ndarray. Returns the ndarray, value(np) and indices(np) """
Expand Down Expand Up @@ -457,7 +466,8 @@ def numeric_grad(executor, location, aux_states=None, eps=1e-4, use_forward_trai


def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rtol=1e-2,
atol=None, grad_nodes=None, use_forward_train=True, ctx=None):
atol=None, grad_nodes=None, use_forward_train=True, ctx=None,
grad_stype_dict=None):
"""Verify an operation by checking backward pass via finite difference method.

Based on Theano's `theano.gradient.verify_grad` [1]
Expand All @@ -474,7 +484,7 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
- if type is dict of str -> numpy.ndarray
maps the name of arguments to the corresponding numpy.ndarray.
*In either case, value of all the arguments must be provided.*
aux_states : ist or tuple or dict, optional
aux_states : list or tuple or dict, optional
The auxiliary states required when generating the executor for the symbol.
numeric_eps : float, optional
Delta for the finite difference method that approximates the gradient.
Expand All @@ -486,6 +496,8 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
Whether to use is_train=True when computing the finite-difference.
ctx : Context, optional
Check the gradient computation on the specified device.
grad_stype_dict : dict of str->str, optional
Storage type dictionary for gradient ndarrays.
References
---------
..[1] https://github.com/Theano/Theano/blob/master/theano/gradient.py
Expand All @@ -509,7 +521,7 @@ def random_projection(shape):
location_npy = {k:v.asnumpy() for k, v in location.items()}
aux_states = _parse_aux_states(sym=sym, aux_states=aux_states, ctx=ctx)
if aux_states is not None:
aux_states_npy = {k:v.asnumpy() for k, v in aux_states.items()}
aux_states_npy = {k: v.asnumpy() for k, v in aux_states.items()}
else:
aux_states_npy = None
if grad_nodes is None:
Expand All @@ -536,6 +548,11 @@ def random_projection(shape):
+ [("__random_proj", _rng.normal(0, 0.01, size=out_shape[0]))])

args_grad = {k: mx.nd.array(v, ctx=ctx) for k, v in args_grad_npy.items()}
if grad_stype_dict is not None:
assert isinstance(grad_stype_dict, dict), "grad_stype_dict must be a dict"
for k, v in grad_stype_dict.items():
if k in args_grad and v in _STORAGE_TYPE_STR_TO_ID and v != 'default':
args_grad[k] = mx.nd.cast_storage(args_grad[k], storage_type=v)

executor = out.bind(ctx, grad_req=grad_req,
args=location, args_grad=args_grad, aux_states=aux_states)
Expand Down
41 changes: 41 additions & 0 deletions src/operator/tensor/indexing_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,5 +264,46 @@ Examples::
.add_argument("indices", "NDArray-or-Symbol", "array of locations where to set on_value")
.add_arguments(OneHotParam::__FIELDS__());

NNVM_REGISTER_OP(sparse_retain)
.describe(R"code(pick rows specified by user input index array from a row sparse matrix
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an example here? Similar to the one in https://www.tensorflow.org/api_docs/python/tf/sparse_retain

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

and save them in the output sparse matrix.

Example::

data = [[1, 2], [3, 4], [5, 6]]
indices = [0, 1, 3]
shape = (4, 2)
rsp_in = row_sparse(data, indices)
to_retain = [0, 3]
rsp_out = sparse_retain(rsp_in, to_retain)
rsp_out.values = [[1, 2], [5, 6]]
rsp_out.indices = [0, 3]

)code" ADD_FILELINE)
.set_num_inputs(2)
.set_num_outputs(1)
.set_attr<nnvm::FListInputNames>("FListInputNames",
[](const NodeAttrs& attrs) {
return std::vector<std::string>{"data", "indices"};
})
.set_attr<nnvm::FInferShape>("FInferShape", SparseRetainOpShape)
.set_attr<nnvm::FInferType>("FInferType", SparseRetainOpType)
.set_attr<nnvm::FInferStorageType>("FInferStorageType", SparseRetainForwardInferStorageType)
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseRetainOpForwardEx<cpu>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
return MakeNonlossGradNode("_backward_sparse_retain", n, ograds,
{n->inputs[sr::kIdx]}, n->attrs.dict);
})
.add_argument("data", "NDArray-or-Symbol", "The input array for sparse_retain operator.")
.add_argument("indices", "NDArray-or-Symbol", "The index array of rows ids that will be retained.");

NNVM_REGISTER_OP(_backward_sparse_retain)
.set_num_inputs(2)
.set_num_outputs(2)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<nnvm::FInferStorageType>("FInferStorageType", SparseRetainBackwardInferStorageType)
.set_attr<FComputeEx>("FComputeEx<cpu>", SparseRetainOpBackwardEx<cpu>);

} // namespace op
} // namespace mxnet
6 changes: 6 additions & 0 deletions src/operator/tensor/indexing_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ NNVM_REGISTER_OP(batch_take)
NNVM_REGISTER_OP(one_hot)
.set_attr<FCompute>("FCompute<gpu>", OneHotOpForward<gpu>);

NNVM_REGISTER_OP(sparse_retain)
.set_attr<FComputeEx>("FComputeEx<gpu>", SparseRetainOpForwardEx<gpu>);

NNVM_REGISTER_OP(_backward_sparse_retain)
.set_attr<FComputeEx>("FComputeEx<gpu>", SparseRetainOpBackwardEx<gpu>);

} // namespace op
} // namespace mxnet

193 changes: 193 additions & 0 deletions src/operator/tensor/indexing_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,199 @@ void OneHotOpForward(const nnvm::NodeAttrs& attrs,
});
}

/*!
* \brief sparse retain namespace
*/
namespace sr {
enum SparseRetainOpInputs {kArr, kIdx};
enum SparseRetainOpOutputs {kOut};
} // namespace sr

inline bool SparseRetainOpShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U)
<< "sparse_retain operator takes 2 arguments (" << in_attrs->size() << " given)";
CHECK_EQ(out_attrs->size(), 1U);

TShape tshape((*in_attrs)[sr::kArr]);
shape_assign(&tshape, (*out_attrs)[sr::kOut]);
SHAPE_ASSIGN_CHECK(*in_attrs, sr::kArr, tshape);
SHAPE_ASSIGN_CHECK(*out_attrs, sr::kOut, tshape);
return true;
}

inline bool SparseRetainOpType(const nnvm::NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
CHECK_NE((*in_attrs)[sr::kIdx], -1) << "Index type must be set for sparse_retain operator";

TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[sr::kArr]);
TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[sr::kOut]);
return (*in_attrs)[0] != -1;
}

inline bool SparseRetainForwardInferStorageType(const nnvm::NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 1U);
if (kRowSparseStorage == in_attrs->at(sr::kArr)) {
out_attrs->at(sr::kOut) = kRowSparseStorage;
}
return true;
}

inline bool SparseRetainBackwardInferStorageType(const nnvm::NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
CHECK_EQ(in_attrs->size(), 2U);
CHECK_EQ(out_attrs->size(), 2U);
out_attrs->at(sr::kArr) = kRowSparseStorage;
out_attrs->at(sr::kIdx) = kDefaultStorage;
return true;
}

struct SparseRetainRspForward {
template<typename DType, typename RType, typename IType>
MSHADOW_XINLINE static void Map(int i, DType* out_data, RType* out_idx,
const DType* in_data, const RType* in_idx,
const IType* idx, const size_t nnr,
const size_t num_cols) {
const RType irow = idx[i];
int j = -1, left = 0, right = nnr - 1;
while (left <= right) {
int m = left + (right - left) / 2;
const auto in_idx_m = in_idx[m];
if (in_idx_m == irow) {
j = m;
break;
} else if (in_idx_m < irow) {
left = m + 1;
} else {
right = m - 1;
}
}
out_idx[i] = idx[i];
if (j >= 0) {
const size_t in_offset = j * num_cols;
const size_t out_offset = i * num_cols;
for (size_t k = 0; k < num_cols; ++k) {
out_data[out_offset+k] = in_data[in_offset+k];
}
}
}
};

template<typename xpu>
void SparseRetainOpForwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 1U);
CHECK_EQ(req.size(), 1U);
CHECK_EQ(req[sr::kOut], kWriteTo) << "sparse_retain only supports req=\'write\'";

CHECK_EQ(inputs[sr::kArr].storage_type(), kRowSparseStorage)
<< "sparse_retain operator only takes row sparse NDArray as input";
CHECK_EQ(inputs[sr::kIdx].storage_type(), kDefaultStorage)
<< "sparse_retain operator only takes default NDArray as its index array";
CHECK_EQ(outputs[sr::kOut].storage_type(), kRowSparseStorage)
<< "sparse_retain operator only outputs row sparse NDArray";

const NDArray& input_nd = inputs[sr::kArr];
const TBlob idx_data = inputs[sr::kIdx].data();

if (req[sr::kOut] == kNullOp
|| !input_nd.storage_initialized()
|| idx_data.Size() == 0U) return;

const TBlob input_data = input_nd.data();
if (input_data.shape_[0] == 0) return;
const TBlob input_idx = input_nd.aux_data(rowsparse::kIdx);

NDArray output_nd = outputs[sr::kOut];
output_nd.CheckAndAlloc({mshadow::Shape1(idx_data.Size())});
TBlob output_data = output_nd.data();
TBlob output_idx = output_nd.aux_data(rowsparse::kIdx);

using namespace mxnet_op;
Stream<xpu> *s = ctx.get_stream<xpu>();
MSHADOW_TYPE_SWITCH(output_data.type_flag_, DType, { // output data type
MSHADOW_INT_TYPE_SWITCH(output_idx.type_flag_, RType, { // row index data type
MSHADOW_TYPE_SWITCH(idx_data.type_flag_, IType, { // index array data type
Kernel<set_zero, xpu>::Launch(s, output_data.Size(), output_data.dptr<DType>());
Kernel<SparseRetainRspForward, xpu>::Launch(s, idx_data.Size(), output_data.dptr<DType>(),
output_idx.dptr<RType>(), input_data.dptr<DType>(), input_idx.dptr<RType>(),
idx_data.dptr<IType>(), input_data.shape_[0], input_data.shape_[1]);
});
});
});
}

template<int req>
struct SparseRetainRspBackward {
template<typename DType, typename RType, typename IType>
MSHADOW_XINLINE static void Map(int i, DType* in_grad, RType* in_grad_idx,
const DType* out_grad, const IType* idx,
const size_t num_cols) {
const RType irow = idx[i];
in_grad_idx[i] = irow;
const size_t out_offset = irow * num_cols;
const size_t in_offset = i * num_cols;
for (size_t j = 0; j < num_cols; ++j) {
KERNEL_ASSIGN(in_grad[in_offset+j], req, out_grad[out_offset+j]);
}
}
};

template<typename xpu>
void SparseRetainOpBackwardEx(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 2U);
CHECK_EQ(outputs.size(), 2U);
CHECK_EQ(req.size(), 2U);
CHECK_NE(req[sr::kArr], kWriteInplace);
CHECK_EQ(req[sr::kIdx], kNullOp)
<< "sparse_retain does not support calculating gradients of indices";

CHECK_EQ(inputs[sr::kOut].storage_type(), kDefaultStorage)
<< "sparse_retain backward only takes default NDArray as ograd";
CHECK_EQ(inputs[sr::kIdx].storage_type(), kDefaultStorage)
<< "sparse_retain backward only takes default NDArray as its index array";
CHECK_EQ(outputs[sr::kArr].storage_type(), kRowSparseStorage)
<< "sparse_retain backward only outputs row sparse NDArray as grad of input";

const TBlob out_grad_data = inputs[sr::kOut].data();
const TBlob idx_data = inputs[sr::kIdx].data();

NDArray in_grad_nd = outputs[sr::kArr];
in_grad_nd.CheckAndAlloc({mshadow::Shape1(idx_data.Size())});
TBlob in_grad_data = in_grad_nd.data();
TBlob in_grad_idx = in_grad_nd.aux_data(rowsparse::kIdx);

using namespace mxnet_op;
Stream<xpu> *s = ctx.get_stream<xpu>();
MSHADOW_TYPE_SWITCH(out_grad_data.type_flag_, DType, { // output data type
MSHADOW_INT_TYPE_SWITCH(in_grad_idx.type_flag_, RType, { // row index data type
MSHADOW_TYPE_SWITCH(idx_data.type_flag_, IType, { // index array data type
MXNET_ASSIGN_REQ_SWITCH(req[sr::kArr], req_type, {
Kernel<SparseRetainRspBackward<req_type>, xpu>::Launch(
s, in_grad_idx.Size(), in_grad_data.dptr<DType>(), in_grad_idx.dptr<RType>(),
out_grad_data.dptr<DType>(), idx_data.dptr<IType>(), out_grad_data.shape_[1]);
});
});
});
});
}

} // namespace op
} // namespace mxnet
#ifdef __CUDACC__
Expand Down
Loading