diff --git a/src/operator/smooth_l1_unary-inl.h b/src/operator/smooth_l1_unary-inl.h index 2b81c765f62e..0094abba0013 100644 --- a/src/operator/smooth_l1_unary-inl.h +++ b/src/operator/smooth_l1_unary-inl.h @@ -71,12 +71,22 @@ void SmoothL1Forward_(const TBlob& src, CHECK_EQ(ret->type_flag_, src.type_flag_) << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; - MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { - mshadow::Tensor out = ret->get(s); - mshadow::Tensor in = src.get(s); - ASSIGN_DISPATCH(out, req, - F(in, ScalarExp(sigma2))); - }); + const int ndim = ret[0].shape_.ndim(); + if (ndim == 4) { + MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { + mshadow::Tensor out = ret->get(s); + mshadow::Tensor in = src.get(s); + ASSIGN_DISPATCH(out, req, + F(in, ScalarExp(sigma2))); + }); + } else if (ndim == 2) { + MSHADOW_TYPE_SWITCH(ret->type_flag_, DType, { + mshadow::Tensor out = ret->get(s); + mshadow::Tensor in = src.get(s); + ASSIGN_DISPATCH(out, req, + F(in, ScalarExp(sigma2))); + }); + } } template @@ -94,13 +104,24 @@ void SmoothL1BackwardUseIn_(const OutputGrad& out_grad, CHECK_EQ(in_grad->type_flag_, in_data0.data.type_flag_) << "Unary function only support input/output with the same type"; real_t sigma2 = env.scalar * env.scalar; - MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { - mshadow::Tensor src = in_data0.data.get(s); - mshadow::Tensor ograd = out_grad.data.get(s); - mshadow::Tensor igrad = in_grad->get(s); - ASSIGN_DISPATCH(igrad, req, - ograd * F(src, ScalarExp(sigma2))); - }); + const int ndim = in_grad[0].shape_.ndim(); + if (ndim == 4) { + MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { + mshadow::Tensor src = in_data0.data.get(s); + mshadow::Tensor ograd = out_grad.data.get(s); + mshadow::Tensor igrad = in_grad->get(s); + ASSIGN_DISPATCH(igrad, req, + ograd * F(src, ScalarExp(sigma2))); + }); + } else if (ndim == 2) { + MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, { + mshadow::Tensor src = in_data0.data.get(s); + mshadow::Tensor ograd = out_grad.data.get(s); + mshadow::Tensor igrad = in_grad->get(s); + ASSIGN_DISPATCH(igrad, req, + ograd * F(src, ScalarExp(sigma2))); + }); + } } MXNET_REGISTER_SIMPLE_OP(smooth_l1, XPU) diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index 2d1d8f6d12b6..6bac3049b2a9 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -30,6 +30,7 @@ struct SoftmaxOutputParam : public dmlc::Parameter { float ignore_label; bool multi_output; bool use_ignore; + bool is_hidden_layer; DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) { DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f) .describe("Scale the gradient by a float factor"); @@ -43,6 +44,8 @@ struct SoftmaxOutputParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(use_ignore).set_default(false) .describe("If set to true, the ignore_label value will not contribute " "to the backward gradient"); + DMLC_DECLARE_FIELD(is_hidden_layer).set_default(false) + .describe("If set to true, out_grad is needed in backward"); }; }; @@ -105,7 +108,15 @@ class SoftmaxOutputOp : public Operator { } else { SoftmaxGrad(grad, out, label); } - grad *= DType(param_.grad_scale/s3[2]); + if (!param_.is_hidden_layer) { + grad *= DType(param_.grad_scale/s3[2]); + } + else { + Tensor o_grad = + out_grad[softmaxout_enum::kOut].get_with_shape(s3, s); + grad *= DType(param_.grad_scale); + grad *= o_grad; + } } else { const TShape& label_shape = in_data[softmaxout_enum::kLabel].shape_; Tensor label = in_data[softmaxout_enum::kLabel].get_with_shape( @@ -199,9 +210,13 @@ class SoftmaxOutputProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + if (param_.is_hidden_layer) { + return {out_grad[softmaxout_enum::kOut], in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + } + else { + return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; + } } - std::vector > BackwardInplaceOption( const std::vector &out_grad, const std::vector &in_data,