From 88e8861ef198432b811d9eac1e2ceccf4842ec39 Mon Sep 17 00:00:00 2001 From: Seanlinx <515364970@qq.com> Date: Sat, 21 May 2016 21:26:56 +0800 Subject: [PATCH 1/2] modify smooth_l1 and softmax_output --- src/operator/smooth_l1_unary-inl.h | 10 +++++----- src/operator/softmax_output-inl.h | 8 ++++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/operator/smooth_l1_unary-inl.h b/src/operator/smooth_l1_unary-inl.h index 2b81c765f62e..8acdf026d0eb 100644 --- a/src/operator/smooth_l1_unary-inl.h +++ b/src/operator/smooth_l1_unary-inl.h @@ -72,8 +72,8 @@ void SmoothL1Forward_(const TBlob& src, << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { - mshadow::Tensor out = ret->get(s); - mshadow::Tensor in = src.get(s); + mshadow::Tensor out = ret->get(s); + mshadow::Tensor in = src.get(s); ASSIGN_DISPATCH(out, req, F(in, ScalarExp(sigma2))); }); @@ -95,9 +95,9 @@ void SmoothL1BackwardUseIn_(const OutputGrad& out_grad, << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { - mshadow::Tensor src = in_data0.data.get(s); - mshadow::Tensor ograd = out_grad.data.get(s); - mshadow::Tensor igrad = in_grad->get(s); + mshadow::Tensor src = in_data0.data.get(s); + mshadow::Tensor ograd = out_grad.data.get(s); + mshadow::Tensor igrad = in_grad->get(s); ASSIGN_DISPATCH(igrad, req, ograd * F(src, ScalarExp(sigma2))); }); diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index 2d1d8f6d12b6..141b54e35659 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -98,6 +98,8 @@ class SoftmaxOutputOp : public Operator { Tensor label = in_data[softmaxout_enum::kLabel].FlatTo2D(s); Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); + Tensor o_grad = + out_grad[softmaxout_enum::kOut].get_with_shape(s3, s); Tensor grad = in_grad[softmaxout_enum::kData].get_with_shape(s3, s); if (param_.use_ignore) { @@ -105,7 +107,9 @@ class SoftmaxOutputOp : public Operator { } else { SoftmaxGrad(grad, out, label); } - grad *= DType(param_.grad_scale/s3[2]); +// grad *= DType(param_.grad_scale/s3[2]); + grad *= DType(param_.grad_scale); + grad *= o_grad; } else { const TShape& label_shape = in_data[softmaxout_enum::kLabel].shape_; Tensor label = in_data[softmaxout_enum::kLabel].get_with_shape( @@ -199,7 +203,7 @@ class SoftmaxOutputProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + return {out_grad[softmaxout_enum::kOut], in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; } std::vector > BackwardInplaceOption( From ba52895a38525baaf5393931add2a2fe09edbbe9 Mon Sep 17 00:00:00 2001 From: Seanlinx <515364970@qq.com> Date: Sun, 22 May 2016 00:06:05 +0800 Subject: [PATCH 2/2] modify softmax_output and smooth_l1 operator --- src/operator/smooth_l1_unary-inl.h | 47 +++++++++++++++++++++--------- src/operator/softmax_output-inl.h | 25 +++++++++++----- 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/operator/smooth_l1_unary-inl.h b/src/operator/smooth_l1_unary-inl.h index 8acdf026d0eb..0094abba0013 100644 --- a/src/operator/smooth_l1_unary-inl.h +++ b/src/operator/smooth_l1_unary-inl.h @@ -71,12 +71,22 @@ void SmoothL1Forward_(const TBlob& src, CHECK_EQ(ret->type_flag_, src.type_flag_) << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; - MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { - mshadow::Tensor out = ret->get(s); - mshadow::Tensor in = src.get(s); - ASSIGN_DISPATCH(out, req, - F(in, ScalarExp(sigma2))); - }); + const int ndim = ret[0].shape_.ndim(); + if (ndim == 4) { + MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { + mshadow::Tensor out = ret->get(s); + mshadow::Tensor in = src.get(s); + ASSIGN_DISPATCH(out, req, + F(in, ScalarExp(sigma2))); + }); + } else if (ndim == 2) { + MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { + mshadow::Tensor out = ret->get(s); + mshadow::Tensor in = src.get(s); + ASSIGN_DISPATCH(out, req, + F(in, ScalarExp(sigma2))); + }); + } } template @@ -94,13 +104,24 @@ void SmoothL1BackwardUseIn_(const OutputGrad& out_grad, CHECK_EQ(in_grad->type_flag_, in_data0.data.type_flag_) << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; - MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { - mshadow::Tensor src = in_data0.data.get(s); - mshadow::Tensor ograd = out_grad.data.get(s); - mshadow::Tensor igrad = in_grad->get(s); - ASSIGN_DISPATCH(igrad, req, - ograd * F(src, ScalarExp(sigma2))); - }); + const int ndim = in_grad[0].shape_.ndim(); + if (ndim == 4) { + MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { + mshadow::Tensor src = in_data0.data.get(s); + mshadow::Tensor ograd = out_grad.data.get(s); + mshadow::Tensor igrad = in_grad->get(s); + ASSIGN_DISPATCH(igrad, req, + ograd * F(src, ScalarExp(sigma2))); + }); + } else if (ndim == 2) { + MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { + mshadow::Tensor src = in_data0.data.get(s); + mshadow::Tensor ograd = out_grad.data.get(s); + mshadow::Tensor igrad = in_grad->get(s); + ASSIGN_DISPATCH(igrad, req, + ograd * F(src, ScalarExp(sigma2))); + }); + } } MXNET_REGISTER_SIMPLE_OP(smooth_l1, XPU) diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index 141b54e35659..6bac3049b2a9 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -30,6 +30,7 @@ struct SoftmaxOutputParam : public dmlc::Parameter { float ignore_label; bool multi_output; bool use_ignore; + bool is_hidden_layer; DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) { DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f) .describe("Scale the gradient by a float factor"); @@ -43,6 +44,8 @@ struct SoftmaxOutputParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(use_ignore).set_default(false) .describe("If set to true, the ignore_label value will not contribute " "to the backward gradient"); + DMLC_DECLARE_FIELD(is_hidden_layer).set_default(false) + .describe("If set to true, out_grad is needed in backward"); }; }; @@ -98,8 +101,6 @@ class SoftmaxOutputOp : public Operator { Tensor label = in_data[softmaxout_enum::kLabel].FlatTo2D(s); Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); - Tensor o_grad = - out_grad[softmaxout_enum::kOut].get_with_shape(s3, s); Tensor grad = in_grad[softmaxout_enum::kData].get_with_shape(s3, s); if (param_.use_ignore) { @@ -107,9 +108,15 @@ class SoftmaxOutputOp : public Operator { } else { SoftmaxGrad(grad, out, label); } -// grad *= DType(param_.grad_scale/s3[2]); - grad *= DType(param_.grad_scale); - grad *= o_grad; + if (!param_.is_hidden_layer) { + grad *= DType(param_.grad_scale/s3[2]); + } + else { + Tensor o_grad = + out_grad[softmaxout_enum::kOut].get_with_shape(s3, s); + grad *= DType(param_.grad_scale); + grad *= o_grad; + } } else { const TShape& label_shape = in_data[softmaxout_enum::kLabel].shape_; Tensor label = in_data[softmaxout_enum::kLabel].get_with_shape( @@ -203,9 +210,13 @@ class SoftmaxOutputProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {out_grad[softmaxout_enum::kOut], in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + if (param_.is_hidden_layer) { + return {out_grad[softmaxout_enum::kOut], in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + } + else { + return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + } } - std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data,