Skip to content

Commit

Permalink
Adding integer index support to Sequence* family of operators.
Browse files Browse the repository at this point in the history
Adding ability to use int32 arrays, or any castable-to-int type, as
the sequence_length array to SequenceMask, SequenceLast, and
SequenceReverse. Previously these operaters all requred sequence_length
to be the same data type as the input array.

See MxNet Jira ticket here:
  https://issues.apache.org/jira/browse/MXNET-1295

See also GitHub issues here:
   apache#12649
   dmlc/gluon-nlp#346
  • Loading branch information
stephenrawls committed Jan 15, 2019
1 parent 22e2ef8 commit 63c5b1e
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 61 deletions.
30 changes: 14 additions & 16 deletions src/operator/sequence_last-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ struct SequenceLastParam : public dmlc::Parameter<SequenceLastParam> {

template <int req>
struct SequenceLastKernel {
template <typename DType>
template <typename DType, typename IType>
MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
const DType *idx, int offset1, int offset2,
const IType *idx, int offset1, int offset2,
mshadow::Shape<2> oshape) {
const auto opos = mxnet_op::unravel(i, oshape);
const int seqpos = static_cast<int>(idx[opos[0]]) - 1;
Expand All @@ -77,9 +77,9 @@ struct SequenceLastKernel {
};

struct SequenceLastGradKernel {
template <typename DType>
template <typename DType, typename IType>
MSHADOW_XINLINE static void Map(int i, DType *in_grad, const DType *out_grad,
const DType *idx, int offset1, int offset2,
const IType *idx, int offset1, int offset2,
mshadow::Shape<2> oshape) {
const auto opos = mxnet_op::unravel(i, oshape);
const int seqpos = static_cast<int>(idx[opos[0]]) - 1;
Expand All @@ -88,14 +88,14 @@ struct SequenceLastGradKernel {
}
};

template <typename xpu, typename DType>
template <typename xpu, typename DType, typename IType>
class SequenceLastOp : public Operator {
public:
explicit SequenceLastOp(SequenceLastParam p) { this->param_ = p; }

void sequence_last(const mshadow::Tensor<xpu, 3, DType> &data,
const mshadow::Tensor<xpu, 2, DType> &out,
const mshadow::Tensor<xpu, 1, DType> &indices,
const mshadow::Tensor<xpu, 1, IType> &indices,
const OpReqType req, mshadow::Stream<xpu> *const s) {
using namespace mshadow;
using namespace mshadow::expr;
Expand All @@ -115,7 +115,7 @@ class SequenceLastOp : public Operator {

void sequence_last_grad(const mshadow::Tensor<xpu, 3, DType> &in_grad,
const mshadow::Tensor<xpu, 2, DType> &out_grad,
const mshadow::Tensor<xpu, 1, DType> &indices,
const mshadow::Tensor<xpu, 1, IType> &indices,
mshadow::Stream<xpu> *const s) {
using namespace mshadow;
using namespace mshadow::expr;
Expand Down Expand Up @@ -163,11 +163,11 @@ class SequenceLastOp : public Operator {
Tensor<xpu, 2, DType> out =
out_data[seq_last::kOut].get_with_shape<xpu, 2, DType>(
Shape2(batch, rest_size), s);
Tensor<xpu, 1, DType> indices =
Tensor<xpu, 1, IType> indices =
param_.use_sequence_length
? in_data[seq_last::kSequenceLength].get<xpu, 1, DType>(s)
? in_data[seq_last::kSequenceLength].get<xpu, 1, IType>(s)
: ctx.requested[seq_last::kTempSpace]
.get_space_typed<xpu, 1, DType>(Shape1(batch), s);
.get_space_typed<xpu, 1, IType>(Shape1(batch), s);
if (!param_.use_sequence_length) indices = max_seq_len;

sequence_last(data, out, indices, req[seq_last::kOut], s);
Expand Down Expand Up @@ -206,11 +206,11 @@ class SequenceLastOp : public Operator {
Tensor<xpu, 2, DType> output_grad =
out_grad[seq_last::kOut].get_with_shape<xpu, 2, DType>(
Shape2(batch, rest_size), s);
Tensor<xpu, 1, DType> indices =
Tensor<xpu, 1, IType> indices =
param_.use_sequence_length
? in_data[seq_last::kSequenceLength].get<xpu, 1, DType>(s)
? in_data[seq_last::kSequenceLength].get<xpu, 1, IType>(s)
: ctx.requested[seq_last::kTempSpace]
.get_space_typed<xpu, 1, DType>(Shape1(batch), s);
.get_space_typed<xpu, 1, IType>(Shape1(batch), s);

if (req[seq_last::kData] == kWriteTo) data_grad = 0.0f;
sequence_last_grad(data_grad, output_grad, indices, s);
Expand All @@ -221,7 +221,7 @@ class SequenceLastOp : public Operator {
}; // class SequenceLastOp

template <typename xpu>
Operator *CreateOp(SequenceLastParam param, int dtype);
Operator *CreateOp(SequenceLastParam param, int dtype, int itype);

#if DMLC_USE_CXX11
class SequenceLastProp : public OperatorProperty {
Expand Down Expand Up @@ -281,8 +281,6 @@ class SequenceLastProp : public OperatorProperty {
for (size_t i = 0; i < in_type->size(); ++i) {
if ((*in_type)[i] == -1) {
(*in_type)[i] = dtype;
} else {
UNIFORM_TYPE_CHECK((*in_type)[i], dtype, ListArguments()[i]);
}
}
out_type->clear();
Expand Down
15 changes: 11 additions & 4 deletions src/operator/sequence_last.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@
namespace mxnet {
namespace op {
template <>
Operator *CreateOp<cpu>(SequenceLastParam param, int dtype) {
Operator *CreateOp<cpu>(SequenceLastParam param, int dtype, int itype) {
Operator *op = nullptr;
MSHADOW_TYPE_SWITCH(dtype, DType,
{ op = new SequenceLastOp<cpu, DType>(param); })
MSHADOW_TYPE_SWITCH(dtype, DType, {
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceLastOp<cpu, DType, IType>(param);
});
});
return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *SequenceLastProp::CreateOperatorEx(Context ctx,
std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
if (in_type->size() >= 2 && (*in_type)[1] != -1)
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]);

// sequence_length not passed in, so fall back to using input array dtype for second input argument
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[0]);
}

DMLC_REGISTER_PARAMETER(SequenceLastParam);
Expand Down
9 changes: 6 additions & 3 deletions src/operator/sequence_last.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@

namespace mxnet {
namespace op {
template <> Operator *CreateOp<gpu>(SequenceLastParam param, int dtype) {
template <> Operator *CreateOp<gpu>(SequenceLastParam param, int dtype, int itype) {
Operator *op = NULL;
MSHADOW_TYPE_SWITCH(dtype, DType,
{ op = new SequenceLastOp<gpu, DType>(param); })
MSHADOW_TYPE_SWITCH(dtype, DType, {
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceLastOp<gpu, DType, IType>(param);
});
});
return op;
}

Expand Down
24 changes: 11 additions & 13 deletions src/operator/sequence_mask-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ struct SequenceMaskParam : public dmlc::Parameter<SequenceMaskParam> {
// (seqlen, batch, rest) case
template <int req>
struct SequenceMask0Kernel {
template <typename DType>
MSHADOW_XINLINE static void Map(int b, DType *in, const DType *idx,
template <typename DType, typename IType>
MSHADOW_XINLINE static void Map(int b, DType *in, const IType *idx,
index_t max_s_len, index_t batch_size,
index_t restsize, DType value) {
const index_t seqpos = static_cast<int>(idx[b]);
Expand All @@ -86,8 +86,8 @@ struct SequenceMask0Kernel {
// (batch, seqlen, rest) case
template <int req>
struct SequenceMask1Kernel {
template <typename DType>
MSHADOW_XINLINE static void Map(int b, DType *in, const DType *idx,
template <typename DType, typename IType>
MSHADOW_XINLINE static void Map(int b, DType *in, const IType *idx,
index_t max_s_len, index_t batch_size,
index_t restsize, DType value) {
const index_t seqpos = static_cast<int>(idx[b]);
Expand All @@ -101,13 +101,13 @@ struct SequenceMask1Kernel {
}
};

template <typename xpu, typename DType>
template <typename xpu, typename DType, typename IType>
class SequenceMaskOp : public Operator {
public:
explicit SequenceMaskOp(SequenceMaskParam p) { this->param_ = p; }

void sequence_mask(const mshadow::Tensor<xpu, 3, DType> &data,
const mshadow::Tensor<xpu, 1, DType> &indices,
const mshadow::Tensor<xpu, 1, IType> &indices,
const OpReqType req, mshadow::Stream<xpu> *const s,
DType val) {
using namespace mshadow;
Expand Down Expand Up @@ -153,8 +153,8 @@ class SequenceMaskOp : public Operator {
// Actual implementation of masking
Assign(out, req[seq_mask::kOut], F<mshadow_op::identity>(data));
if (param_.use_sequence_length) {
Tensor<xpu, 1, DType> indices =
in_data[seq_mask::kSequenceLength].get<xpu, 1, DType>(s);
Tensor<xpu, 1, IType> indices =
in_data[seq_mask::kSequenceLength].get<xpu, 1, IType>(s);
sequence_mask(out, indices, req[seq_mask::kOut], s,
static_cast<DType>(param_.value));
}
Expand Down Expand Up @@ -190,8 +190,8 @@ class SequenceMaskOp : public Operator {
if (!param_.use_sequence_length) {
Assign(data_g, req[seq_mask::kData], F<mshadow_op::identity>(out_g));
} else {
Tensor<xpu, 1, DType> indices =
in_data[seq_mask::kSequenceLength].get<xpu, 1, DType>(s);
Tensor<xpu, 1, IType> indices =
in_data[seq_mask::kSequenceLength].get<xpu, 1, IType>(s);
if (req[seq_mask::kData] == kAddTo) {
Tensor<xpu, 3, DType> out_g_temp =
ctx.requested[seq_mask::kTempSpace].get_space_typed<xpu, 3, DType>(
Expand All @@ -212,7 +212,7 @@ class SequenceMaskOp : public Operator {
}; // class SequenceMaskOp

template <typename xpu>
Operator *CreateOp(SequenceMaskParam param, int dtype);
Operator *CreateOp(SequenceMaskParam param, int dtype, int itype);

#if DMLC_USE_CXX11
class SequenceMaskProp : public OperatorProperty {
Expand Down Expand Up @@ -270,8 +270,6 @@ class SequenceMaskProp : public OperatorProperty {
for (size_t i = 0; i < in_type->size(); ++i) {
if ((*in_type)[i] == -1) {
(*in_type)[i] = dtype;
} else {
UNIFORM_TYPE_CHECK((*in_type)[i], dtype, ListArguments()[i]);
}
}
out_type->clear();
Expand Down
16 changes: 12 additions & 4 deletions src/operator/sequence_mask.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,26 @@
namespace mxnet {
namespace op {
template <>
Operator *CreateOp<cpu>(SequenceMaskParam param, int dtype) {
Operator *CreateOp<cpu>(SequenceMaskParam param, int dtype, int itype) {
Operator *op = nullptr;
MSHADOW_TYPE_SWITCH(dtype, DType,
{ op = new SequenceMaskOp<cpu, DType>(param); })
MSHADOW_TYPE_SWITCH(dtype, DType, {
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceMaskOp<cpu, DType, IType>(param);
});
});
return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *SequenceMaskProp::CreateOperatorEx(Context ctx,
std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
if (in_type->size() >= 2 && (*in_type)[1] != -1) {
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]);
}

// sequence_length not passed in, so fall back to using input array dtype for second input argument
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[0]);
}

DMLC_REGISTER_PARAMETER(SequenceMaskParam);
Expand Down
9 changes: 6 additions & 3 deletions src/operator/sequence_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@
namespace mxnet {
namespace op {

template <> Operator *CreateOp<gpu>(SequenceMaskParam param, int dtype) {
template <> Operator *CreateOp<gpu>(SequenceMaskParam param, int dtype, int itype) {
Operator *op = NULL;
MSHADOW_TYPE_SWITCH(dtype, DType,
{ op = new SequenceMaskOp<gpu, DType>(param); })
MSHADOW_TYPE_SWITCH(dtype, DType, {
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceMaskOp<gpu, DType>(param);
});
});
return op;
}

Expand Down
20 changes: 9 additions & 11 deletions src/operator/sequence_reverse-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,14 @@ struct SequenceReverseParam : public dmlc::Parameter<SequenceReverseParam> {
};

struct ReverseKernel {
template <typename DType>
template <typename DType, typename IType>
MSHADOW_XINLINE static void Map(const int i, DType *const out_data,
const DType *const in_data,
const OpReqType req,
const index_t max_seq_len,
const index_t batch_size,
const index_t other_dim, const index_t numel,
const DType *const indices) {
const IType *const indices) {
for (index_t batch = 0; batch < batch_size; ++batch) {
const index_t num_seq =
indices ? static_cast<index_t>(indices[batch]) : max_seq_len;
Expand Down Expand Up @@ -102,13 +102,13 @@ struct ReverseKernel {
}
};

template <typename xpu, typename DType>
template <typename xpu, typename DType, typename IType>
class SequenceReverseOp : public Operator {
public:
explicit SequenceReverseOp(SequenceReverseParam p) { this->param_ = p; }
void sequence_reverse(const mshadow::Tensor<xpu, 3, DType> &data,
const mshadow::Tensor<xpu, 3, DType> &out,
const OpReqType req, const DType *const indices,
const OpReqType req, const IType *const indices,
mshadow::Stream<xpu> *const s) {
using namespace mshadow;
using namespace mshadow::expr;
Expand Down Expand Up @@ -145,9 +145,9 @@ class SequenceReverseOp : public Operator {
Tensor<xpu, 3, DType> out =
out_data[seq_reverse::kOut].get_with_shape<xpu, 3, DType>(s3, s);

const DType *const indices =
const IType *const indices =
param_.use_sequence_length
? in_data[seq_reverse::kSequenceLength].dptr<DType>()
? in_data[seq_reverse::kSequenceLength].dptr<IType>()
: nullptr;

sequence_reverse(data, out, req[seq_reverse::kOut], indices, s);
Expand Down Expand Up @@ -179,9 +179,9 @@ class SequenceReverseOp : public Operator {
Tensor<xpu, 3, DType> output_grad =
out_grad[seq_reverse::kOut].get_with_shape<xpu, 3, DType>(s3, s);

const DType *const indices =
const IType *const indices =
param_.use_sequence_length
? in_data[seq_reverse::kSequenceLength].dptr<DType>()
? in_data[seq_reverse::kSequenceLength].dptr<IType>()
: nullptr;

sequence_reverse(output_grad, data_grad, req[seq_reverse::kData], indices,
Expand All @@ -193,7 +193,7 @@ class SequenceReverseOp : public Operator {
}; // class SequenceReverseOp

template <typename xpu>
Operator *CreateOp(SequenceReverseParam param, int dtype);
Operator *CreateOp(SequenceReverseParam param, int dtype, int itype);

#if DMLC_USE_CXX11
class SequenceReverseProp : public OperatorProperty {
Expand Down Expand Up @@ -249,8 +249,6 @@ class SequenceReverseProp : public OperatorProperty {
for (size_t i = 0; i < in_type->size(); ++i) {
if ((*in_type)[i] == -1) {
(*in_type)[i] = dtype;
} else {
UNIFORM_TYPE_CHECK((*in_type)[i], dtype, ListArguments()[i]);
}
}
out_type->clear();
Expand Down
16 changes: 12 additions & 4 deletions src/operator/sequence_reverse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,26 @@
namespace mxnet {
namespace op {
template <>
Operator *CreateOp<cpu>(SequenceReverseParam param, int dtype) {
Operator *CreateOp<cpu>(SequenceReverseParam param, int dtype, int itype) {
Operator *op = nullptr;
MSHADOW_TYPE_SWITCH(dtype, DType,
{ op = new SequenceReverseOp<cpu, DType>(param); })
MSHADOW_TYPE_SWITCH(dtype, DType, {
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceReverseOp<cpu, DType, IType>(param);
});
});
return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *SequenceReverseProp::CreateOperatorEx(
Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);

if (in_type->size() >= 2 && (*in_type)[1] != -1)
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]);

// sequence_length not passed in, so fall back to using input array dtype for second input argument
DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[0]);
}

DMLC_REGISTER_PARAMETER(SequenceReverseParam);
Expand Down
8 changes: 5 additions & 3 deletions src/operator/sequence_reverse.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@

namespace mxnet {
namespace op {
template <> Operator *CreateOp<gpu>(SequenceReverseParam param, int dtype) {
template <> Operator *CreateOp<gpu>(SequenceReverseParam param, int dtype, int itype) {
Operator *op = nullptr;
MSHADOW_TYPE_SWITCH(dtype, DType, {
op = new SequenceReverseOp<gpu, DType>(param);
})
MSHADOW_TYPE_SWITCH(itype, IType, {
op = new SequenceReverseOp<gpu, DType, IType>(param);
});
});
return op;
}

Expand Down

0 comments on commit 63c5b1e

Please sign in to comment.