Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Add rrelu, dropout #85

Merged
merged 1 commit into from
Sep 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions src/operator/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class CuDNNConvolutionOp : public Operator {
Init(s, in_data, out_data);
}
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(workspace_), s);
mshadow::Shape1(forward_workspace_), s);
CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_,
&alpha,
in_desc_,
Expand All @@ -65,7 +65,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
algo_,
workspace.dptr_,
workspace_size_,
forward_workspace_byte_,
&beta,
out_desc_,
out.dptr_), CUDNN_STATUS_SUCCESS);
Expand Down Expand Up @@ -106,7 +106,7 @@ class CuDNNConvolutionOp : public Operator {
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> gdata = in_grad[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(workspace_), s);
mshadow::Shape1(backward_workspace_), s);
if (!param_.no_bias) {
Tensor<gpu, 1> gbias = in_grad[kBias].get<gpu, 1, real_t>(s);
CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
Expand All @@ -126,7 +126,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_w_,
workspace.dptr_,
workspace_size_,
backward_workspace_byte_,
&beta,
filter_desc_,
gwmat.dptr_), CUDNN_STATUS_SUCCESS);
Expand All @@ -139,7 +139,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_,
workspace.dptr_,
workspace_size_,
backward_workspace_byte_,
&beta,
in_desc_,
gdata.dptr_), CUDNN_STATUS_SUCCESS);
Expand All @@ -155,7 +155,7 @@ class CuDNNConvolutionOp : public Operator {
CHECK_EQ(out_data.size(), 1);
if (!init_cudnn_) {
init_cudnn_ = true;
size_t workspace = static_cast<size_t>(param_.workspace * sizeof(real_t));
size_t workspace_byte = static_cast<size_t>(param_.workspace * sizeof(real_t));
size_t back_size = 0;
size_t back_size_w = 0;
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Expand Down Expand Up @@ -210,23 +210,23 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
out_desc_,
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
workspace,
workspace_byte,
&algo_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardFilterAlgorithm(s->dnn_handle_,
in_desc_,
out_desc_,
conv_desc_,
filter_desc_,
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
workspace,
workspace_byte,
&back_algo_w_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardDataAlgorithm(s->dnn_handle_,
filter_desc_,
out_desc_,
conv_desc_,
in_desc_,
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
workspace,
workspace_byte,
&back_algo_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_,
filter_desc_,
Expand All @@ -242,26 +242,24 @@ class CuDNNConvolutionOp : public Operator {
filter_desc_,
back_algo_w_,
&back_size_w), CUDNN_STATUS_SUCCESS);
back_size = std::max(back_size, back_size_w);
backward_workspace_byte_ = std::max(back_size, back_size_w);
CHECK_EQ(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_,
in_desc_,
filter_desc_,
conv_desc_,
out_desc_,
algo_,
&workspace), CUDNN_STATUS_SUCCESS);
workspace = std::max(workspace, back_size);
CHECK_GE(param_.workspace * sizeof(real_t), workspace + sizeof(real_t))
<< "\nMinimum workspace: " << workspace << "\n"
<< "Given: " << param_.workspace * sizeof(real_t);
workspace_ = workspace / sizeof(real_t) + 1;
workspace_size_ = workspace_ * sizeof(real_t);
&forward_workspace_byte_), CUDNN_STATUS_SUCCESS);
forward_workspace_ = forward_workspace_byte_ / sizeof(real_t) + 1;
backward_workspace_ = backward_workspace_byte_ / sizeof(real_t) + 1;
}
}

bool init_cudnn_;
size_t workspace_;
size_t workspace_size_;
size_t forward_workspace_;
size_t backward_workspace_;
size_t forward_workspace_byte_;
size_t backward_workspace_byte_;
cudnnDataType_t dtype_;
cudnnTensorDescriptor_t in_desc_;
cudnnTensorDescriptor_t out_desc_;
Expand Down
192 changes: 192 additions & 0 deletions src/operator/dropout-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout-inl.h
* \brief
* \author Bing Xu
*/

#ifndef MXNET_OPERATOR_DROPOUT_INL_H_
#define MXNET_OPERATOR_DROPOUT_INL_H_
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <map>
#include <vector>
#include <string>
#include <utility>
#include "./operator_common.h"
#include "./mshadow_op.h"

enum DropoutOpInputs {kData};
enum DropoutOpOutputs {kOut, kMask};
enum DropoutOpForwardResource {kRandom};

namespace mxnet {
namespace op {

struct DropoutParam : public dmlc::Parameter<DropoutParam> {
float p;
DMLC_DECLARE_PARAMETER(DropoutParam) {
DMLC_DECLARE_FIELD(p).set_default(0.5)
.set_range(0, 1)
.describe("Fraction of the input that gets dropped out at training time");
}
}; // struct DropoutParam

template<typename xpu>
class DropoutOp : public Operator {
public:
explicit DropoutOp(DropoutParam param) {
this->pkeep_ = 1.0f - param.p;
}

virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_states) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 1);
if (ctx.is_train) {
CHECK_EQ(out_data.size(), 2);
}
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> data, out, mask;
if (in_data[kData].ndim() == 2) {
uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
data = in_data[kData].get_with_shape<xpu, 4, real_t>(dshape, s);
out = out_data[kOut].get_with_shape<xpu, 4, real_t>(dshape, s);
if (ctx.is_train) {
mask = out_data[kMask].get_with_shape<xpu, 4, real_t>(dshape, s);
}
} else {
data = in_data[kData].get<xpu, 4, real_t>(s);
out = out_data[kOut].get<xpu, 4, real_t>(s);
if (ctx.is_train) {
mask = out_data[kMask].get<xpu, 4, real_t>(s);
}
}
if (ctx.is_train) {
Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu>(s);
mask = F<mshadow_op::threshold>(prnd->uniform(mask.shape_), pkeep_) * (1.0f / pkeep_);
Assign(out, req[kOut], data * mask);
} else {
Assign(out, req[kOut], data + 0.0f);
}
}

virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_states) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(out_grad.size(), 1);
CHECK_EQ(in_grad.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> grad, gdata, mask;
if (out_grad[kOut].ndim() == 2) {
uint32_t ds[] = {out_grad[kOut].shape_[0], out_grad[kOut].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
gdata = in_grad[kData].get_with_shape<xpu, 4, real_t>(dshape, s);
grad = out_grad[kOut].get_with_shape<xpu, 4, real_t>(dshape, s);
mask = out_data[kMask].get_with_shape<xpu, 4, real_t>(dshape, s);
} else {
grad = out_grad[kOut].get<xpu, 4, real_t>(s);
gdata = in_grad[kData].get<xpu, 4, real_t>(s);
mask = out_data[kMask].get<xpu, 4, real_t>(s);
}
Assign(gdata, req[kData], grad * mask);
}

private:
real_t pkeep_;
}; // class DropoutOp


template<typename xpu>
Operator *CreateOp(DropoutParam param);

#if DMLC_USE_CXX11
class DropoutProp : public OperatorProperty {
public:
void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
param_.Init(kwargs);
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1);
const TShape &dshape = in_shape->at(0);
if (dshape.ndim() == 0) return false;
out_shape->clear();
out_shape->push_back(dshape);
out_shape->push_back(dshape);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new DropoutProp();
ptr->param_ = param_;
return ptr;
}

std::string TypeString() const override {
return "Dropout";
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {out_grad[kOut], out_data[kMask]};
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
return {{out_grad[kOut], in_grad[kData]}};
}

std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[kData], out_data[kOut]}};
}

std::vector<ResourceRequest> ForwardResource(
const std::vector<TShape> &in_shape) const override {
return {ResourceRequest::kRandom};
}

int NumVisibleOutputs() const override {
return 1;
}

int NumOutputs() const override {
return 2;
}

std::vector<std::string> ListOutputs() const override {
return {"output", "mask"};
}

Operator* CreateOperator(Context ctx) const;

private:
DropoutParam param_;
}; // class DropoutProp
#endif // DMLC_USE_CXX11
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_DROPOUT_INL_H_

32 changes: 32 additions & 0 deletions src/operator/dropout.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout.cc
* \brief
* \author Bing Xu
*/

#include "./dropout-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<cpu>(DropoutParam param) {
return new DropoutOp<cpu>(param);
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *DropoutProp::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateOp, param_);
}

DMLC_REGISTER_PARAMETER(DropoutParam);

MXNET_REGISTER_OP_PROPERTY(Dropout, DropoutProp)
.describe("Apply dropout to input")
.add_argument("data", "Symbol", "Input data to dropout.")
.add_arguments(DropoutParam::__FIELDS__());

} // namespace op
} // namespace mxnet


19 changes: 19 additions & 0 deletions src/operator/dropout.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout.cc
* \brief
* \author Bing Xu
*/

#include "./dropout-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<gpu>(DropoutParam param) {
return new DropoutOp<gpu>(param);
}
} // namespace op
} // namespace mxnet


Loading