diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index c803cd72449..fc178e4c904 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -16,6 +16,101 @@ namespace caffe { +/** + * @brief Abstract base class that factors out the BLAS code common to + * ConvolutionLayer and DeconvolutionLayer. + */ +template +class BaseConvolutionLayer : public Layer { + public: + explicit BaseConvolutionLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MinTopBlobs() const { return 1; } + virtual inline bool EqualNumBottomTopBlobs() const { return true; } + + protected: + // Helper functions that abstract away the column buffer and gemm arguments. + // The last argument in forward_cpu_gemm is so that we can skip the im2col if + // we just called weight_cpu_gemm with the same input. + void forward_cpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* output, bool skip_im2col = false); + void forward_cpu_bias(Dtype* output, const Dtype* bias); + void backward_cpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* output); + void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype* + weights); + void backward_cpu_bias(Dtype* bias, const Dtype* input); + +#ifndef CPU_ONLY + void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights, + Dtype* output, bool skip_im2col = false); + void forward_gpu_bias(Dtype* output, const Dtype* bias); + void backward_gpu_gemm(const Dtype* input, const Dtype* weights, + Dtype* col_output); + void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype* + weights); + void backward_gpu_bias(Dtype* bias, const Dtype* input); +#endif + + // reverse_dimensions should return true iff we are implementing deconv, so + // that conv helpers know which dimensions are which. + virtual bool reverse_dimensions() = 0; + // Compute height_out_ and width_out_ from other parameters. + virtual void compute_output_shape() = 0; + + int kernel_h_, kernel_w_; + int stride_h_, stride_w_; + int num_; + int channels_; + int pad_h_, pad_w_; + int height_, width_; + int group_; + int num_output_; + int height_out_, width_out_; + bool bias_term_; + bool is_1x1_; + + private: + // wrap im2col/col2im so we don't have to remember the (long) argument lists + inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) { + im2col_cpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); + } + inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) { + col2im_cpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); + } +#ifndef CPU_ONLY + inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) { + im2col_gpu(data, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, col_buff); + } + inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) { + col2im_gpu(col_buff, conv_in_channels_, conv_in_height_, conv_in_width_, + kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, data); + } +#endif + + int conv_out_channels_; + int conv_in_channels_; + int conv_out_spatial_dim_; + int conv_in_height_; + int conv_in_width_; + int kernel_dim_; + int weight_offset_; + int col_offset_; + int output_offset_; + + Blob col_buffer_; + Blob bias_multiplier_; +}; + /** * @brief Convolves the input image with a bank of learned filters, * and (optionally) adds biases. @@ -33,7 +128,7 @@ namespace caffe { * the output channel N' columns of the output matrix. */ template -class ConvolutionLayer : public Layer { +class ConvolutionLayer : public BaseConvolutionLayer { public: /** * @param param provides ConvolutionParameter convolution_param, @@ -64,18 +159,10 @@ class ConvolutionLayer : public Layer { * kernels + stream parallelism) engines. */ explicit ConvolutionLayer(const LayerParameter& param) - : Layer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - + : BaseConvolutionLayer(param) {} virtual inline LayerParameter_LayerType type() const { return LayerParameter_LayerType_CONVOLUTION; } - virtual inline int MinBottomBlobs() const { return 1; } - virtual inline int MinTopBlobs() const { return 1; } - virtual inline bool EqualNumBottomTopBlobs() const { return true; } protected: virtual void Forward_cpu(const vector*>& bottom, @@ -86,30 +173,44 @@ class ConvolutionLayer : public Layer { const vector& propagate_down, const vector*>& bottom); virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); + virtual inline bool reverse_dimensions() { return false; } + virtual void compute_output_shape(); +}; - int kernel_h_, kernel_w_; - int stride_h_, stride_w_; - int num_; - int channels_; - int pad_h_, pad_w_; - int height_, width_; - int group_; - int num_output_; - int height_out_, width_out_; - bool bias_term_; - bool is_1x1_; +/** + * @brief Convolve the input with a bank of learned filters, and (optionally) + * add biases, treating filters and convolution parameters in the + * opposite sense as ConvolutionLayer. + * + * ConvolutionLayer computes each output value by dotting an input window with + * a filter; DeconvolutionLayer multiplies each input value by a filter + * elementwise, and sums over the resulting output windows. In other words, + * DeconvolutionLayer is ConvolutionLayer with the forward and backward passes + * reversed. DeconvolutionLayer reuses ConvolutionParameter for its + * parameters, but they take the opposite sense as in ConvolutionLayer (so + * padding is removed from the output rather than added to the input, and + * stride results in upsampling rather than downsampling). + */ +template +class DeconvolutionLayer : public BaseConvolutionLayer { + public: + explicit DeconvolutionLayer(const LayerParameter& param) + : BaseConvolutionLayer(param) {} + virtual inline LayerParameter_LayerType type() const { + return LayerParameter_LayerType_DECONVOLUTION; + } - /// M_ is the channel dimension of the output for a single group, which is the - /// leading dimension of the filter matrix. - int M_; - /// K_ is the dimension of an unrolled input for a single group, which is the - /// leading dimension of the data matrix. - int K_; - /// N_ is the spatial dimension of the output, the H x W, which are the last - /// dimensions of the data and filter matrices. - int N_; - Blob col_buffer_; - Blob bias_multiplier_; + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual inline bool reverse_dimensions() { return true; } + virtual void compute_output_shape(); }; #ifdef USE_CUDNN diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp new file mode 100644 index 00000000000..dccd5170c11 --- /dev/null +++ b/src/caffe/layers/base_conv_layer.cpp @@ -0,0 +1,293 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void BaseConvolutionLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + // Configure the kernel size, padding, stride, and inputs. + ConvolutionParameter conv_param = this->layer_param_.convolution_param(); + CHECK(!conv_param.has_kernel_size() != + !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; + CHECK(conv_param.has_kernel_size() || + (conv_param.has_kernel_h() && conv_param.has_kernel_w())) + << "For non-square filters both kernel_h and kernel_w are required."; + CHECK((!conv_param.has_pad() && conv_param.has_pad_h() + && conv_param.has_pad_w()) + || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) + << "pad is pad OR pad_h and pad_w are required."; + CHECK((!conv_param.has_stride() && conv_param.has_stride_h() + && conv_param.has_stride_w()) + || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) + << "Stride is stride OR stride_h and stride_w are required."; + if (conv_param.has_kernel_size()) { + kernel_h_ = kernel_w_ = conv_param.kernel_size(); + } else { + kernel_h_ = conv_param.kernel_h(); + kernel_w_ = conv_param.kernel_w(); + } + CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; + CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; + if (!conv_param.has_pad_h()) { + pad_h_ = pad_w_ = conv_param.pad(); + } else { + pad_h_ = conv_param.pad_h(); + pad_w_ = conv_param.pad_w(); + } + if (!conv_param.has_stride_h()) { + stride_h_ = stride_w_ = conv_param.stride(); + } else { + stride_h_ = conv_param.stride_h(); + stride_w_ = conv_param.stride_w(); + } + // Special case: im2col is the identity for 1x1 convolution with stride 1 + // and no padding, so flag for skipping the buffer and transformation. + is_1x1_ = kernel_w_ == 1 && kernel_h_ == 1 + && stride_h_ == 1 && stride_w_ == 1 && pad_h_ == 0 && pad_w_ == 0; + // Configure output channels and groups. + channels_ = bottom[0]->channels(); + num_output_ = this->layer_param_.convolution_param().num_output(); + CHECK_GT(num_output_, 0); + group_ = this->layer_param_.convolution_param().group(); + CHECK_EQ(channels_ % group_, 0); + CHECK_EQ(num_output_ % group_, 0) + << "Number of output should be multiples of group."; + if (reverse_dimensions()) { + conv_out_channels_ = channels_; + conv_in_channels_ = num_output_; + } else { + conv_out_channels_ = num_output_; + conv_in_channels_ = channels_; + } + // Handle the parameters: weights and biases. + // - blobs_[0] holds the filter weights + // - blobs_[1] holds the biases (optional) + bias_term_ = this->layer_param_.convolution_param().bias_term(); + if (this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else { + if (bias_term_) { + this->blobs_.resize(2); + } else { + this->blobs_.resize(1); + } + // Initialize and fill the weights: + // output channels x input channels per-group x kernel height x kernel width + this->blobs_[0].reset(new Blob( + conv_out_channels_, conv_in_channels_ / group_, kernel_h_, kernel_w_)); + shared_ptr > weight_filler(GetFiller( + this->layer_param_.convolution_param().weight_filler())); + weight_filler->Fill(this->blobs_[0].get()); + // If necessary, initialize and fill the biases: + // 1 x 1 x 1 x output channels + if (bias_term_) { + this->blobs_[1].reset(new Blob(1, 1, 1, num_output_)); + shared_ptr > bias_filler(GetFiller( + this->layer_param_.convolution_param().bias_filler())); + bias_filler->Fill(this->blobs_[1].get()); + } + } + // Propagate gradients to the parameters (as directed by backward pass). + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void BaseConvolutionLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + num_ = bottom[0]->num(); + height_ = bottom[0]->height(); + width_ = bottom[0]->width(); + CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" + " convolution kernel."; + // TODO: generalize to handle inputs of different shapes. + for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { + CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; + CHECK_EQ(channels_, bottom[bottom_id]->channels()) + << "Inputs must have same channels."; + CHECK_EQ(height_, bottom[bottom_id]->height()) + << "Inputs must have same height."; + CHECK_EQ(width_, bottom[bottom_id]->width()) + << "Inputs must have same width."; + } + // Shape the tops. + compute_output_shape(); + for (int top_id = 0; top_id < top.size(); ++top_id) { + top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); + } + if (reverse_dimensions()) { + conv_in_height_ = height_out_; + conv_in_width_ = width_out_; + conv_out_spatial_dim_ = height_ * width_; + } else { + conv_in_height_ = height_; + conv_in_width_ = width_; + conv_out_spatial_dim_ = height_out_ * width_out_; + } + kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; + weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; + col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; + output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; + // The im2col result buffer will only hold one image at a time to avoid + // overly large memory usage. In the special case of 1x1 convolution + // it goes lazily unused to save memory. + if (reverse_dimensions()) { + col_buffer_.Reshape(1, kernel_dim_, height_, width_); + } else { + col_buffer_.Reshape(1, kernel_dim_, height_out_, width_out_); + } + // Set up the all ones "bias multiplier" for adding biases by BLAS + if (bias_term_) { + bias_multiplier_.Reshape(1, 1, 1, height_out_ * width_out_); + caffe_set(bias_multiplier_.count(), Dtype(1), + bias_multiplier_.mutable_cpu_data()); + } +} + +template +void BaseConvolutionLayer::forward_cpu_gemm(const Dtype* input, + const Dtype* weights, Dtype* output, bool skip_im2col) { + const Dtype* col_buff = input; + if (!is_1x1_) { + if (!skip_im2col) { + conv_im2col_cpu(input, col_buffer_.mutable_cpu_data()); + } + col_buff = col_buffer_.cpu_data(); + } + for (int g = 0; g < group_; ++g) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / + group_, conv_out_spatial_dim_, kernel_dim_ / group_, + (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, + (Dtype)0., output + output_offset_ * g); + } +} + +template +void BaseConvolutionLayer::forward_cpu_bias(Dtype* output, + const Dtype* bias) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, + height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(), + (Dtype)1., output); +} + +template +void BaseConvolutionLayer::backward_cpu_gemm(const Dtype* output, + const Dtype* weights, Dtype* input) { + Dtype* col_buff = col_buffer_.mutable_cpu_data(); + if (is_1x1_) { + col_buff = input; + } + for (int g = 0; g < group_; ++g) { + caffe_cpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, + conv_out_spatial_dim_, conv_out_channels_ / group_, + (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, + (Dtype)0., col_buff + col_offset_ * g); + } + if (!is_1x1_) { + conv_col2im_cpu(col_buff, input); + } +} + +template +void BaseConvolutionLayer::weight_cpu_gemm(const Dtype* input, + const Dtype* output, Dtype* weights) { + const Dtype* col_buff = input; + if (!is_1x1_) { + conv_im2col_cpu(input, col_buffer_.mutable_cpu_data()); + col_buff = col_buffer_.cpu_data(); + } + for (int g = 0; g < group_; ++g) { + caffe_cpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, + kernel_dim_ / group_, conv_out_spatial_dim_, + (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, + (Dtype)1., weights + weight_offset_ * g); + } +} + +template +void BaseConvolutionLayer::backward_cpu_bias(Dtype* bias, + const Dtype* input) { + caffe_cpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., + input, bias_multiplier_.cpu_data(), 1., bias); +} + +#ifndef CPU_ONLY + +template +void BaseConvolutionLayer::forward_gpu_gemm(const Dtype* input, + const Dtype* weights, Dtype* output, bool skip_im2col) { + const Dtype* col_buff = input; + if (!is_1x1_) { + if (!skip_im2col) { + conv_im2col_gpu(input, col_buffer_.mutable_gpu_data()); + } + col_buff = col_buffer_.gpu_data(); + } + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ / + group_, conv_out_spatial_dim_, kernel_dim_ / group_, + (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, + (Dtype)0., output + output_offset_ * g); + } +} + +template +void BaseConvolutionLayer::forward_gpu_bias(Dtype* output, + const Dtype* bias) { + caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, + height_out_ * width_out_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(), + (Dtype)1., output); +} + +template +void BaseConvolutionLayer::backward_gpu_gemm(const Dtype* output, + const Dtype* weights, Dtype* input) { + Dtype* col_buff = col_buffer_.mutable_gpu_data(); + if (is_1x1_) { + col_buff = input; + } + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm(CblasTrans, CblasNoTrans, kernel_dim_ / group_, + conv_out_spatial_dim_, conv_out_channels_ / group_, + (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g, + (Dtype)0., col_buff + col_offset_ * g); + } + if (!is_1x1_) { + conv_col2im_gpu(col_buff, input); + } +} + +template +void BaseConvolutionLayer::weight_gpu_gemm(const Dtype* input, + const Dtype* output, Dtype* weights) { + const Dtype* col_buff = input; + if (!is_1x1_) { + conv_im2col_gpu(input, col_buffer_.mutable_gpu_data()); + col_buff = col_buffer_.gpu_data(); + } + for (int g = 0; g < group_; ++g) { + caffe_gpu_gemm(CblasNoTrans, CblasTrans, conv_out_channels_ / group_, + kernel_dim_ / group_, conv_out_spatial_dim_, + (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g, + (Dtype)1., weights + weight_offset_ * g); + } +} + +template +void BaseConvolutionLayer::backward_gpu_bias(Dtype* bias, + const Dtype* input) { + caffe_gpu_gemv(CblasNoTrans, num_output_, height_out_ * width_out_, 1., + input, bias_multiplier_.gpu_data(), 1., bias); +} + +#endif // !CPU_ONLY + +INSTANTIATE_CLASS(BaseConvolutionLayer); + +} // namespace caffe diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp index 0a032025bfb..9fd2fc6a15f 100644 --- a/src/caffe/layers/conv_layer.cpp +++ b/src/caffe/layers/conv_layer.cpp @@ -9,166 +9,26 @@ namespace caffe { template -void ConvolutionLayer::LayerSetUp(const vector*>& bottom, - const vector*>& top) { - // Configure the kernel size, padding, stride, and inputs. - ConvolutionParameter conv_param = this->layer_param_.convolution_param(); - CHECK(!conv_param.has_kernel_size() != - !(conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "Filter size is kernel_size OR kernel_h and kernel_w; not both"; - CHECK(conv_param.has_kernel_size() || - (conv_param.has_kernel_h() && conv_param.has_kernel_w())) - << "For non-square filters both kernel_h and kernel_w are required."; - CHECK((!conv_param.has_pad() && conv_param.has_pad_h() - && conv_param.has_pad_w()) - || (!conv_param.has_pad_h() && !conv_param.has_pad_w())) - << "pad is pad OR pad_h and pad_w are required."; - CHECK((!conv_param.has_stride() && conv_param.has_stride_h() - && conv_param.has_stride_w()) - || (!conv_param.has_stride_h() && !conv_param.has_stride_w())) - << "Stride is stride OR stride_h and stride_w are required."; - if (conv_param.has_kernel_size()) { - kernel_h_ = kernel_w_ = conv_param.kernel_size(); - } else { - kernel_h_ = conv_param.kernel_h(); - kernel_w_ = conv_param.kernel_w(); - } - CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero."; - CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero."; - if (!conv_param.has_pad_h()) { - pad_h_ = pad_w_ = conv_param.pad(); - } else { - pad_h_ = conv_param.pad_h(); - pad_w_ = conv_param.pad_w(); - } - if (!conv_param.has_stride_h()) { - stride_h_ = stride_w_ = conv_param.stride(); - } else { - stride_h_ = conv_param.stride_h(); - stride_w_ = conv_param.stride_w(); - } - // Special case: im2col is the identity for 1x1 convolution with stride 1 - // and no padding, so flag for skipping the buffer and transformation. - is_1x1_ = kernel_w_ == 1 && kernel_h_ == 1 - && stride_h_ == 1 && stride_w_ == 1 && pad_h_ == 0 && pad_w_ == 0; - // Configure output channels and groups. - channels_ = bottom[0]->channels(); - num_output_ = this->layer_param_.convolution_param().num_output(); - CHECK_GT(num_output_, 0); - group_ = this->layer_param_.convolution_param().group(); - CHECK_EQ(channels_ % group_, 0); - CHECK_EQ(num_output_ % group_, 0) - << "Number of output should be multiples of group."; - // Handle the parameters: weights and biases. - // - blobs_[0] holds the filter weights - // - blobs_[1] holds the biases (optional) - bias_term_ = this->layer_param_.convolution_param().bias_term(); - if (this->blobs_.size() > 0) { - LOG(INFO) << "Skipping parameter initialization"; - } else { - if (bias_term_) { - this->blobs_.resize(2); - } else { - this->blobs_.resize(1); - } - // Initialize and fill the weights: - // output channels x input channels per-group x kernel height x kernel width - this->blobs_[0].reset(new Blob( - num_output_, channels_ / group_, kernel_h_, kernel_w_)); - shared_ptr > weight_filler(GetFiller( - this->layer_param_.convolution_param().weight_filler())); - weight_filler->Fill(this->blobs_[0].get()); - // If necessary, initialize and fill the biases: - // 1 x 1 x 1 x output channels - if (bias_term_) { - this->blobs_[1].reset(new Blob(1, 1, 1, num_output_)); - shared_ptr > bias_filler(GetFiller( - this->layer_param_.convolution_param().bias_filler())); - bias_filler->Fill(this->blobs_[1].get()); - } - } - // Propagate gradients to the parameters (as directed by backward pass). - this->param_propagate_down_.resize(this->blobs_.size(), true); -} - -template -void ConvolutionLayer::Reshape(const vector*>& bottom, - const vector*>& top) { - num_ = bottom[0]->num(); - height_ = bottom[0]->height(); - width_ = bottom[0]->width(); - CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" - " convolution kernel."; - // TODO: generalize to handle inputs of different shapes. - for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { - CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; - CHECK_EQ(channels_, bottom[bottom_id]->channels()) - << "Inputs must have same channels."; - CHECK_EQ(height_, bottom[bottom_id]->height()) - << "Inputs must have same height."; - CHECK_EQ(width_, bottom[bottom_id]->width()) - << "Inputs must have same width."; - } - // Shape the tops. - height_out_ = - (height_ + 2 * pad_h_ - kernel_h_) / stride_h_ + 1; - width_out_ = (width_ + 2 * pad_w_ - kernel_w_) / stride_w_ + 1; - for (int top_id = 0; top_id < top.size(); ++top_id) { - top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); - } - // Prepare the matrix multiplication computation. - // Each input will be convolved as a single GEMM. - M_ = num_output_ / group_; - K_ = channels_ * kernel_h_ * kernel_w_ / group_; - N_ = height_out_ * width_out_; - // The im2col result buffer will only hold one image at a time to avoid - // overly large memory usage. In the special case of 1x1 convolution - // it goes lazily unused to save memory. - col_buffer_.Reshape( - 1, channels_ * kernel_h_ * kernel_w_, height_out_, width_out_); - // Set up the all ones "bias multiplier" for adding biases by BLAS - if (bias_term_) { - bias_multiplier_.Reshape(1, 1, 1, N_); - caffe_set(N_, Dtype(1), bias_multiplier_.mutable_cpu_data()); - } +void ConvolutionLayer::compute_output_shape() { + this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_) + / this->stride_h_ + 1; + this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_) + / this->stride_w_ + 1; } template void ConvolutionLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { + const Dtype* weight = this->blobs_[0]->cpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* top_data = top[i]->mutable_cpu_data(); - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_cpu_data(); - } - const Dtype* weight = this->blobs_[0]->cpu_data(); - int weight_offset = M_ * K_; // number of filter parameters in a group - int col_offset = K_ * N_; // number of values in an input region / column - int top_offset = M_ * N_; // number of values in an output region / column - for (int n = 0; n < num_; ++n) { - // im2col transformation: unroll input regions for filtering - // into column matrix for multplication. - if (!is_1x1_) { - im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, - col_buff); - } else { // special case for 1x1 convolution - col_buff = bottom[i]->mutable_cpu_data() + bottom[i]->offset(n); - } - // Take inner products for groups. - for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_buff + col_offset * g, - (Dtype)0., top_data + top[i]->offset(n) + top_offset * g); - } - // Add bias. - if (bias_term_) { - caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(), - bias_multiplier_.cpu_data(), - (Dtype)1., top_data + top[i]->offset(n)); + for (int n = 0; n < this->num_; ++n) { + this->forward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); + if (this->bias_term_) { + const Dtype* bias = this->blobs_[1]->cpu_data(); + this->forward_cpu_bias(top_data + top[i]->offset(n), bias); } } } @@ -177,82 +37,37 @@ void ConvolutionLayer::Forward_cpu(const vector*>& bottom, template void ConvolutionLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - const Dtype* weight = NULL; - Dtype* weight_diff = NULL; + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); if (this->param_propagate_down_[0]) { - weight = this->blobs_[0]->cpu_data(); - weight_diff = this->blobs_[0]->mutable_cpu_diff(); caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } - Dtype* bias_diff = NULL; - if (bias_term_ && this->param_propagate_down_[1]) { - bias_diff = this->blobs_[1]->mutable_cpu_diff(); - caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + caffe_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_cpu_diff()); } - const int weight_offset = M_ * K_; - const int col_offset = K_ * N_; - const int top_offset = M_ * N_; for (int i = 0; i < top.size(); ++i) { - const Dtype* top_diff = NULL; + const Dtype* top_diff = top[i]->cpu_diff(); + const Dtype* bottom_data = bottom[i]->cpu_data(); + Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); // Bias gradient, if necessary. - if (bias_term_ && this->param_propagate_down_[1]) { - top_diff = top[i]->cpu_diff(); - for (int n = 0; n < num_; ++n) { - caffe_cpu_gemv(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - bias_multiplier_.cpu_data(), 1., - bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); + for (int n = 0; n < this->num_; ++n) { + this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { - if (!top_diff) { - top_diff = top[i]->cpu_diff(); - } - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_cpu_data(); - } - const Dtype* bottom_data = bottom[i]->cpu_data(); - Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); - for (int n = 0; n < num_; ++n) { - // Since we saved memory in the forward pass by not storing all col - // data, we will need to recompute them. - if (!is_1x1_) { - im2col_cpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, col_buff); - } else { - col_buff = bottom[i]->mutable_cpu_data() + bottom[i]->offset(n); - } + for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, - col_buff + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); - } + this->weight_cpu_gemm(bottom_data + bottom[i]->offset(n), + top_diff + top[i]->offset(n), weight_diff); } // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - if (weight == NULL) { - weight = this->blobs_[0]->cpu_data(); - } - if (is_1x1_) { - col_buff = bottom[i]->mutable_cpu_diff() + bottom[i]->offset(n); - } - for (int g = 0; g < group_; ++g) { - caffe_cpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[i]->offset(n) + top_offset * g, - (Dtype)0., col_buff + col_offset * g); - } - // col2im back to the data - if (!is_1x1_) { - col2im_cpu(col_buff, channels_, height_, width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, bottom_diff + bottom[i]->offset(n)); - } + this->backward_cpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); } } } diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index af14facb523..3902fdf3930 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu @@ -8,135 +8,64 @@ namespace caffe { -/// @brief refer to CPU forward -- the BLAS implementation is the same. template void ConvolutionLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { + const Dtype* weight = this->blobs_[0]->gpu_data(); for (int i = 0; i < bottom.size(); ++i) { const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* top_data = top[i]->mutable_gpu_data(); - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_gpu_data(); - } - const Dtype* weight = this->blobs_[0]->gpu_data(); - int weight_offset = M_ * K_; - int col_offset = K_ * N_; - int top_offset = M_ * N_; - for (int n = 0; n < num_; ++n) { - // im2col transformation: unroll input regions for filtering - // into column matrix for multplication. - if (!is_1x1_) { - im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, - col_buff); - } else { - col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n); - } - // Take inner products for groups. - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, M_, N_, K_, - (Dtype)1., weight + weight_offset * g, col_buff + col_offset * g, - (Dtype)0., top_data + top[i]->offset(n) + top_offset * g); - } - // Add bias. - if (bias_term_) { - caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, num_output_, - N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(), - bias_multiplier_.gpu_data(), - (Dtype)1., top_data + top[i]->offset(n)); + for (int n = 0; n < this->num_; ++n) { + this->forward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); + if (this->bias_term_) { + const Dtype* bias = this->blobs_[1]->gpu_data(); + this->forward_gpu_bias(top_data + top[i]->offset(n), bias); } } } } -/// @brief refer to CPU backward -- the BLAS implementation is the same. template void ConvolutionLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { - const Dtype* weight = NULL; - Dtype* weight_diff = NULL; + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); if (this->param_propagate_down_[0]) { - weight = this->blobs_[0]->gpu_data(); - weight_diff = this->blobs_[0]->mutable_gpu_diff(); caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); } - Dtype* bias_diff = NULL; - if (bias_term_ && this->param_propagate_down_[1]) { - bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_gpu_diff()); } - const int weight_offset = M_ * K_; - const int col_offset = K_ * N_; - const int top_offset = M_ * N_; for (int i = 0; i < top.size(); ++i) { - const Dtype* top_diff = NULL; + const Dtype* top_diff = top[i]->gpu_diff(); // Bias gradient, if necessary. - if (bias_term_ && this->param_propagate_down_[1]) { - top_diff = top[i]->gpu_diff(); - for (int n = 0; n < num_; ++n) { - caffe_gpu_gemv(CblasNoTrans, num_output_, N_, - 1., top_diff + top[0]->offset(n), - bias_multiplier_.gpu_data(), 1., - bias_diff); + if (this->bias_term_ && this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + for (int n = 0; n < this->num_; ++n) { + this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); } } if (this->param_propagate_down_[0] || propagate_down[i]) { - if (!top_diff) { - top_diff = top[i]->gpu_diff(); - } - Dtype* col_buff = NULL; - if (!is_1x1_) { - col_buff = col_buffer_.mutable_gpu_data(); - } const Dtype* bottom_data = bottom[i]->gpu_data(); Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); - for (int n = 0; n < num_; ++n) { - // Since we saved memory in the forward pass by not storing all col - // data, we will need to recompute them. - if (!is_1x1_) { - im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_, - width_, kernel_h_, kernel_w_, pad_h_, pad_w_, - stride_h_, stride_w_, col_buff); - } else { - col_buff = bottom[i]->mutable_gpu_data() + bottom[i]->offset(n); - } + for (int n = 0; n < this->num_; ++n) { // gradient w.r.t. weight. Note that we will accumulate diffs. if (this->param_propagate_down_[0]) { - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasNoTrans, CblasTrans, M_, K_, N_, - (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g, - col_buff + col_offset * g, (Dtype)1., - weight_diff + weight_offset * g); - } + this->weight_gpu_gemm(bottom_data + bottom[i]->offset(n), + top_diff + top[i]->offset(n), weight_diff); } - // gradient w.r.t. bottom data, if necessary + // gradient w.r.t. bottom data, if necessary. if (propagate_down[i]) { - if (weight == NULL) { - weight = this->blobs_[0]->gpu_data(); - } - if (is_1x1_) { - col_buff = bottom[i]->mutable_gpu_diff() + bottom[i]->offset(n); - } - for (int g = 0; g < group_; ++g) { - caffe_gpu_gemm(CblasTrans, CblasNoTrans, K_, N_, M_, - (Dtype)1., weight + weight_offset * g, - top_diff + top[i]->offset(n) + top_offset * g, - (Dtype)0., col_buff + col_offset * g); - } - // col2im back to the data - if (!is_1x1_) { - col2im_gpu(col_buff, channels_, height_, width_, - kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, - bottom_diff + bottom[i]->offset(n)); - } + this->backward_gpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); } } } } } - INSTANTIATE_LAYER_GPU_FUNCS(ConvolutionLayer); } // namespace caffe diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp new file mode 100644 index 00000000000..59114f017bf --- /dev/null +++ b/src/caffe/layers/deconv_layer.cpp @@ -0,0 +1,85 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void DeconvolutionLayer::compute_output_shape() { + this->height_out_ = this->stride_h_ * (this->height_ - 1) + this->kernel_h_ + - 2 * this->pad_h_; + this->width_out_ = this->stride_w_ * (this->width_ - 1) + this->kernel_w_ + - 2 * this->pad_w_; +} + +template +void DeconvolutionLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* weight = this->blobs_[0]->cpu_data(); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->cpu_data(); + Dtype* top_data = top[i]->mutable_cpu_data(); + for (int n = 0; n < this->num_; ++n) { + this->backward_cpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); + if (this->bias_term_) { + const Dtype* bias = this->blobs_[1]->cpu_data(); + this->forward_cpu_bias(top_data + top[i]->offset(n), bias); + } + } + } +} + +template +void DeconvolutionLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* weight = this->blobs_[0]->cpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff(); + if (this->param_propagate_down_[0]) { + caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff); + } + if (this->bias_term_ && this->param_propagate_down_[1]) { + caffe_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_cpu_diff()); + } + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->cpu_diff(); + const Dtype* bottom_data = bottom[i]->cpu_data(); + Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); + // Bias gradient, if necessary. + if (this->bias_term_ && this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); + for (int n = 0; n < this->num_; ++n) { + this->backward_cpu_bias(bias_diff, top_diff + top[i]->offset(n)); + } + } + if (this->param_propagate_down_[0] || propagate_down[i]) { + for (int n = 0; n < this->num_; ++n) { + // Gradient w.r.t. weight. Note that we will accumulate diffs. + if (this->param_propagate_down_[0]) { + this->weight_cpu_gemm(top_diff + top[i]->offset(n), + bottom_data + bottom[i]->offset(n), weight_diff); + } + // Gradient w.r.t. bottom data, if necessary, reusing the column buffer + // we might have just computed above. + if (propagate_down[i]) { + this->forward_cpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n), + this->param_propagate_down_[0]); + } + } + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(DeconvolutionLayer); +#endif + +INSTANTIATE_CLASS(DeconvolutionLayer); +REGISTER_LAYER_CLASS(DECONVOLUTION, DeconvolutionLayer); +} // namespace caffe diff --git a/src/caffe/layers/deconv_layer.cu b/src/caffe/layers/deconv_layer.cu new file mode 100644 index 00000000000..9198dd64c72 --- /dev/null +++ b/src/caffe/layers/deconv_layer.cu @@ -0,0 +1,71 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/im2col.hpp" +#include "caffe/util/math_functions.hpp" +#include "caffe/vision_layers.hpp" + +namespace caffe { + +template +void DeconvolutionLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* weight = this->blobs_[0]->gpu_data(); + for (int i = 0; i < bottom.size(); ++i) { + const Dtype* bottom_data = bottom[i]->gpu_data(); + Dtype* top_data = top[i]->mutable_gpu_data(); + for (int n = 0; n < this->num_; ++n) { + this->backward_gpu_gemm(bottom_data + bottom[i]->offset(n), weight, + top_data + top[i]->offset(n)); + if (this->bias_term_) { + const Dtype* bias = this->blobs_[1]->gpu_data(); + this->forward_gpu_bias(top_data + top[i]->offset(n), bias); + } + } + } +} + +template +void DeconvolutionLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + const Dtype* weight = this->blobs_[0]->gpu_data(); + Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff(); + if (this->param_propagate_down_[0]) { + caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff); + } + if (this->bias_term_ && this->param_propagate_down_[1]) { + caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), + this->blobs_[1]->mutable_gpu_diff()); + } + for (int i = 0; i < top.size(); ++i) { + const Dtype* top_diff = top[i]->gpu_diff(); + const Dtype* bottom_data = bottom[i]->gpu_data(); + Dtype* bottom_diff = bottom[i]->mutable_gpu_diff(); + // Bias gradient, if necessary. + if (this->bias_term_ && this->param_propagate_down_[1]) { + Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); + for (int n = 0; n < this->num_; ++n) { + this->backward_gpu_bias(bias_diff, top_diff + top[i]->offset(n)); + } + } + if (this->param_propagate_down_[0] || propagate_down[i]) { + for (int n = 0; n < this->num_; ++n) { + // gradient w.r.t. weight. Note that we will accumulate diffs. + if (this->param_propagate_down_[0]) { + this->weight_gpu_gemm(top_diff + top[i]->offset(n), + bottom_data + bottom[i]->offset(n), weight_diff); + } + // gradient w.r.t. bottom data, if necessary. + if (propagate_down[i]) { + this->forward_gpu_gemm(top_diff + top[i]->offset(n), weight, + bottom_diff + bottom[i]->offset(n)); + } + } + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(DeconvolutionLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index b11e2786f0f..dd2c1471c29 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -231,7 +231,7 @@ message LayerParameter { // line above the enum. Update the next available ID when you add a new // LayerType. // - // LayerType next available ID: 39 (last added: EXP) + // LayerType next available ID: 40 (last added: DECONVOLUTION) enum LayerType { // "NONE" layer type is 0th enum element so that we don't cause confusion // by defaulting to an existent LayerType (instead, should usually error if @@ -245,6 +245,7 @@ message LayerParameter { CONTRASTIVE_LOSS = 37; CONVOLUTION = 4; DATA = 5; + DECONVOLUTION = 39; DROPOUT = 6; DUMMY_DATA = 32; EUCLIDEAN_LOSS = 7; diff --git a/src/caffe/test/test_deconvolution_layer.cpp b/src/caffe/test/test_deconvolution_layer.cpp new file mode 100644 index 00000000000..fc63d5efbe3 --- /dev/null +++ b/src/caffe/test/test_deconvolution_layer.cpp @@ -0,0 +1,158 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +// Since ConvolutionLayerTest checks the shared conv/deconv code in detail, +// we'll just do a simple forward test and a gradient check. +template +class DeconvolutionLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + DeconvolutionLayerTest() + : blob_bottom_(new Blob(2, 3, 6, 4)), + blob_bottom_2_(new Blob(2, 3, 6, 4)), + blob_top_(new Blob()), + blob_top_2_(new Blob()) {} + virtual void SetUp() { + // fill the values + FillerParameter filler_param; + filler_param.set_value(1.); + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_2_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~DeconvolutionLayerTest() { + delete blob_bottom_; + delete blob_bottom_2_; + delete blob_top_; + delete blob_top_2_; + } + + Blob* const blob_bottom_; + Blob* const blob_bottom_2_; + Blob* const blob_top_; + Blob* const blob_top_2_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(DeconvolutionLayerTest, TestDtypesAndDevices); + +TYPED_TEST(DeconvolutionLayerTest, TestSetup) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(4); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + shared_ptr > layer( + new DeconvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 4); + EXPECT_EQ(this->blob_top_->height(), 13); + EXPECT_EQ(this->blob_top_->width(), 9); + EXPECT_EQ(this->blob_top_2_->num(), 2); + EXPECT_EQ(this->blob_top_2_->channels(), 4); + EXPECT_EQ(this->blob_top_2_->height(), 13); + EXPECT_EQ(this->blob_top_2_->width(), 9); + // setting group should not change the shape + convolution_param->set_num_output(3); + convolution_param->set_group(3); + layer.reset(new DeconvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 13); + EXPECT_EQ(this->blob_top_->width(), 9); + EXPECT_EQ(this->blob_top_2_->num(), 2); + EXPECT_EQ(this->blob_top_2_->channels(), 3); + EXPECT_EQ(this->blob_top_2_->height(), 13); + EXPECT_EQ(this->blob_top_2_->width(), 9); +} + +TYPED_TEST(DeconvolutionLayerTest, TestSimpleDeconvolution) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + convolution_param->set_kernel_size(3); + convolution_param->set_stride(2); + convolution_param->set_num_output(4); + convolution_param->mutable_weight_filler()->set_type("constant"); + convolution_param->mutable_weight_filler()->set_value(1); + convolution_param->mutable_bias_filler()->set_type("constant"); + convolution_param->mutable_bias_filler()->set_value(0.1); + shared_ptr > layer( + new DeconvolutionLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + // constant-fill the bottom blobs + FillerParameter filler_param; + filler_param.set_value(1.); + ConstantFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_2_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + // simply check that accumulation works with overlapping filters + const Dtype* top_data = this->blob_top_->cpu_data(); + for (int n = 0; n < this->blob_top_->num(); ++n) { + for (int c = 0; c < this->blob_top_->channels(); ++c) { + for (int h = 0; h < this->blob_top_->height(); ++h) { + for (int w = 0; w < this->blob_top_->width(); ++w) { + Dtype expected = 3.1; + bool h_overlap = h % 2 == 0 && h > 0 + && h < this->blob_top_->height() - 1; + bool w_overlap = w % 2 == 0 && w > 0 + && w < this->blob_top_->width() - 1; + if (h_overlap && w_overlap) { + expected += 9; + } else if (h_overlap || w_overlap) { + expected += 3; + } + EXPECT_NEAR(top_data[this->blob_top_->offset(n, c, h, w)], + expected, 1e-4); + } + } + } + } +} + +TYPED_TEST(DeconvolutionLayerTest, TestGradient) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ConvolutionParameter* convolution_param = + layer_param.mutable_convolution_param(); + this->blob_bottom_vec_.push_back(this->blob_bottom_2_); + this->blob_top_vec_.push_back(this->blob_top_2_); + convolution_param->set_kernel_size(2); + convolution_param->set_stride(1); + convolution_param->set_num_output(1); + convolution_param->mutable_weight_filler()->set_type("gaussian"); + convolution_param->mutable_bias_filler()->set_type("gaussian"); + DeconvolutionLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe