Skip to content

Commit

Permalink
Merge pull request apache#2 from Seanlinx/master
Browse files Browse the repository at this point in the history
modify smooth_l1 and softmax_output
  • Loading branch information
winstywang committed May 24, 2016
2 parents 4f15ffb + ba52895 commit c638511
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 16 deletions.
47 changes: 34 additions & 13 deletions src/operator/smooth_l1_unary-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,22 @@ void SmoothL1Forward_(const TBlob& src,
CHECK_EQ(ret->type_flag_, src.type_flag_)
<< "Unary function only support input/output with the same type";
real_t sigma2 = env.scalar * env.scalar;
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
mshadow::Tensor<xpu, 2, DType> out = ret->get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> in = src.get<xpu, 2, DType>(s);
ASSIGN_DISPATCH(out, req,
F<mshadow_op::smooth_l1_loss>(in, ScalarExp<DType>(sigma2)));
});
const int ndim = ret[0].shape_.ndim();
if (ndim == 4) {
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
mshadow::Tensor<xpu, 4, DType> out = ret->get<xpu, 4, DType>(s);
mshadow::Tensor<xpu, 4, DType> in = src.get<xpu, 4, DType>(s);
ASSIGN_DISPATCH(out, req,
F<mshadow_op::smooth_l1_loss>(in, ScalarExp<DType>(sigma2)));
});
} else if (ndim == 2) {
MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, {
mshadow::Tensor<xpu, 2, DType> out = ret->get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> in = src.get<xpu, 2, DType>(s);
ASSIGN_DISPATCH(out, req,
F<mshadow_op::smooth_l1_loss>(in, ScalarExp<DType>(sigma2)));
});
}
}

template<typename xpu>
Expand All @@ -94,13 +104,24 @@ void SmoothL1BackwardUseIn_(const OutputGrad& out_grad,
CHECK_EQ(in_grad->type_flag_, in_data0.data.type_flag_)
<< "Unary function only support input/output with the same type";
real_t sigma2 = env.scalar * env.scalar;
MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, {
mshadow::Tensor<xpu, 2, DType> src = in_data0.data.get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> ograd = out_grad.data.get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> igrad = in_grad->get<xpu, 2, DType>(s);
ASSIGN_DISPATCH(igrad, req,
ograd * F<mshadow_op::smooth_l1_gradient>(src, ScalarExp<DType>(sigma2)));
});
const int ndim = in_grad[0].shape_.ndim();
if (ndim == 4) {
MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, {
mshadow::Tensor<xpu, 4, DType> src = in_data0.data.get<xpu, 4, DType>(s);
mshadow::Tensor<xpu, 4, DType> ograd = out_grad.data.get<xpu, 4, DType>(s);
mshadow::Tensor<xpu, 4, DType> igrad = in_grad->get<xpu, 4, DType>(s);
ASSIGN_DISPATCH(igrad, req,
ograd * F<mshadow_op::smooth_l1_gradient>(src, ScalarExp<DType>(sigma2)));
});
} else if (ndim == 2) {
MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, {
mshadow::Tensor<xpu, 2, DType> src = in_data0.data.get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> ograd = out_grad.data.get<xpu, 2, DType>(s);
mshadow::Tensor<xpu, 2, DType> igrad = in_grad->get<xpu, 2, DType>(s);
ASSIGN_DISPATCH(igrad, req,
ograd * F<mshadow_op::smooth_l1_gradient>(src, ScalarExp<DType>(sigma2)));
});
}
}

MXNET_REGISTER_SIMPLE_OP(smooth_l1, XPU)
Expand Down
21 changes: 18 additions & 3 deletions src/operator/softmax_output-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct SoftmaxOutputParam : public dmlc::Parameter<SoftmaxOutputParam> {
float ignore_label;
bool multi_output;
bool use_ignore;
bool is_hidden_layer;
DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) {
DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f)
.describe("Scale the gradient by a float factor");
Expand All @@ -43,6 +44,8 @@ struct SoftmaxOutputParam : public dmlc::Parameter<SoftmaxOutputParam> {
DMLC_DECLARE_FIELD(use_ignore).set_default(false)
.describe("If set to true, the ignore_label value will not contribute "
"to the backward gradient");
DMLC_DECLARE_FIELD(is_hidden_layer).set_default(false)
.describe("If set to true, out_grad is needed in backward");
};
};

Expand Down Expand Up @@ -105,7 +108,15 @@ class SoftmaxOutputOp : public Operator {
} else {
SoftmaxGrad(grad, out, label);
}
grad *= DType(param_.grad_scale/s3[2]);
if (!param_.is_hidden_layer) {
grad *= DType(param_.grad_scale/s3[2]);
}
else {
Tensor<xpu, 3, DType> o_grad =
out_grad[softmaxout_enum::kOut].get_with_shape<xpu, 3, DType>(s3, s);
grad *= DType(param_.grad_scale);
grad *= o_grad;
}
} else {
const TShape& label_shape = in_data[softmaxout_enum::kLabel].shape_;
Tensor<xpu, 1, DType> label = in_data[softmaxout_enum::kLabel].get_with_shape<xpu, 1, DType>(
Expand Down Expand Up @@ -199,9 +210,13 @@ class SoftmaxOutputProp : public OperatorProperty {
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]};
if (param_.is_hidden_layer) {
return {out_grad[softmaxout_enum::kOut], in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]};
}
else {
return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]};
}
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
Expand Down

0 comments on commit c638511

Please sign in to comment.