This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
/
im2col.h
386 lines (376 loc) · 15.2 KB
/
im2col.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/*!
******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
*
* COPYRIGHT
*
* All contributions by the University of California:
* Copyright (c) 2014-2017 The Regents of the University of California (Regents)
* All rights reserved.
*
* All other contributions:
* Copyright (c) 2014-2017, the respective contributors
* All rights reserved.
*
* Caffe uses a shared copyright model: each contributor holds copyright over
* their contributions to Caffe. The project versioning records all such
* contribution and copyright details. If a contributor wants to further mark
* their specific copyright on a particular contribution, they should indicate
* their copyright solely in the commit message of the change when it is
* committed.
*
* LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* CONTRIBUTION AGREEMENT
*
* By contributing to the BVLC/caffe repository through pull-request, comment,
* or otherwise, the contributor releases their content to the
* license and copyright terms herein.
*
***************** END Caffe Copyright Notice and Disclaimer ********************
*
* \file im2col.h
* \brief Function definitions of converting an image to
* column matrix based on kernel, padding, and dilation.
* These functions are mainly used in convolution operators.
* The implementation of the im2col and col2im algorithms
* are copied from Caffe with minor interface modifications
* adapting to MXNet data structures.
*/
#ifndef MXNET_OPERATOR_NN_IM2COL_H_
#define MXNET_OPERATOR_NN_IM2COL_H_
#include <mxnet/base.h>
#include <mxnet/operator.h>
#include <cstring>
#include <vector>
#include "../mxnet_op.h"
namespace mxnet {
namespace op {
// Function uses casting from int to unsigned to compare if value of
// parameter a is greater or equal to zero and lower than value of
// parameter b. The b parameter is of type signed and is always positive,
// therefore its value is always lower than 0x800... where casting
// negative value of a parameter converts it to value higher than 0x800...
// The casting allows to use one condition instead of two.
inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
return static_cast<unsigned>(a) < static_cast<unsigned>(b);
}
/*!
* \brief im2col 2D cpu version.
* DO NOT call this function directly.
* Use the wrapper function im2col() instead.
*/
template <typename DType>
inline void im2col_cpu(const DType* data_im,
const int channels,
const int height,
const int width,
const int kernel_h,
const int kernel_w,
const int pad_h,
const int pad_w,
const int stride_h,
const int stride_w,
const int dilation_h,
const int dilation_w,
DType* data_col) {
const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
const int channel_size = height * width;
// TODO(junwu): we tested adding openmp (w/ & w/o collapse clause) here
// for testing the performance of convolution operator,
// but the total runtime increased by 0.8s for images of shape
// (8, 32, 64, 64) and decreased by 0.2s for images of shape
// (16, 64, 64, 64). Both kernel shapes are (8, 8). We think the
// bottleneck of the convolution operator probably lies in dot().
// Hence, adding more threads to the loops contributes little
// toward improving the convolution operator's performance.
// We will revisit this issue in the future.
for (int channel = channels; channel--; data_im += channel_size) {
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_row = -pad_h + kernel_row * dilation_h;
for (int output_rows = output_h; output_rows; output_rows--) {
if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
for (int output_cols = output_w; output_cols; output_cols--) {
*(data_col++) = 0;
}
} else {
int input_col = -pad_w + kernel_col * dilation_w;
for (int output_col = output_w; output_col; output_col--) {
if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
*(data_col++) = data_im[input_row * width + input_col];
} else {
*(data_col++) = 0;
}
input_col += stride_w;
}
}
input_row += stride_h;
}
}
}
}
}
/*!
* \brief core function of im2col algorithm. DO NOT call this function directly.
* Use wrapper function im2col() instead.
* \param data_input image pointer pointing the first element of channel dim
* \param im2col determine whether the algorithm is im2col or col2im
* \param im_shape input image shape in dimensions (N, C, H, W,)
* \param col_shape column buffer shape
* \param kernel_shape kernel filter shape
* \param pad pad shape
* \param stride stride shape
* \param dilation dilation shape
* \param data_output start pointer of the column buffer to be filled
*/
template <typename DType>
inline void im2col_nd_core_cpu(const DType* data_input,
const bool im2col,
const mxnet::TShape& im_shape,
const mxnet::TShape& col_shape,
const mxnet::TShape& kernel_shape,
const mxnet::TShape& pad,
const mxnet::TShape& stride,
const mxnet::TShape& dilation,
DType* data_output,
OpReqType req = mxnet::kWriteTo) {
if (mxnet::kNullOp == req)
return;
int num_spatial_axes = kernel_shape.ndim();
if (!im2col) {
index_t im_size = im_shape[1]; // skip batch dim
for (index_t i = 0; i < num_spatial_axes; ++i) {
im_size *= im_shape[2 + i];
}
if (mxnet::kAddTo != req) {
std::fill(data_output, data_output + im_size, static_cast<DType>(0));
}
}
index_t kernel_size = 1;
for (index_t i = 0; i < num_spatial_axes; ++i) {
kernel_size *= kernel_shape[i];
}
const index_t channels_col = col_shape[0];
std::vector<index_t> d_offset(num_spatial_axes, 0);
std::vector<index_t> d_iter(num_spatial_axes, 0);
for (index_t c_col = 0; c_col < channels_col; ++c_col) {
// Loop over spatial axes in reverse order to compute a per-axis offset.
index_t offset = c_col;
for (int d_i = static_cast<int>(num_spatial_axes) - 1; d_i >= 0; --d_i) {
if (d_i < static_cast<int>(num_spatial_axes) - 1) {
offset /= kernel_shape[d_i + 1];
}
d_offset[d_i] = offset % kernel_shape[d_i];
}
for (bool incremented = true; incremented;) {
// Loop over spatial axes in forward order to compute the indices in the
// image and column, and whether the index lies in the padding.
index_t index_col = c_col;
index_t index_im = c_col / kernel_size;
bool is_padding = false;
for (index_t d_i = 0; d_i < num_spatial_axes; ++d_i) {
const index_t d = d_iter[d_i];
const int d_im = static_cast<int>(d * stride[d_i] + d_offset[d_i] * dilation[d_i]) -
static_cast<int>(pad[d_i]);
is_padding |= d_im < 0 || d_im >= static_cast<int>(im_shape[d_i + 2]);
index_col *= col_shape[d_i + 1];
index_col += d;
index_im *= static_cast<index_t>(im_shape[d_i + 2]);
index_im += d_im;
}
if (im2col) {
if (is_padding) {
data_output[index_col] = 0;
} else {
data_output[index_col] = data_input[index_im];
}
} else if (!is_padding) { // col2im
data_output[index_im] += data_input[index_col];
}
// Loop over spatial axes in reverse order to choose an index,
// like counting.
incremented = false;
for (int d_i = static_cast<int>(num_spatial_axes) - 1; d_i >= 0; --d_i) {
const index_t d_max = col_shape[d_i + 1];
CHECK_LT(d_iter[d_i], d_max);
if (d_iter[d_i] + 1 == d_max) {
d_iter[d_i] = 0;
} else { // d_iter[d_i] < d_max - 1
++d_iter[d_i];
incremented = true;
break;
}
}
} // while(incremented)
} // for (int c = 0; c < channels_col; ++c)
}
/*!
* \brief cpu function of im2col algorithm
* \param data_im pointer of a image (C, H, W,...) in the image batch
* \param im_shape input image shape in dimensions (N, C, H, W,)
* \param col_shape column buffer shape
* \param kernel_shape kernel filter shape
* \param pad pad shape
* \param stride stride shape
* \param dilation dilation shape
* \param data_col start pointer of the column buffer to be filled
*/
template <typename DType>
inline void im2col(mshadow::Stream<cpu>* s,
const DType* data_im,
const mxnet::TShape& im_shape,
const mxnet::TShape& col_shape,
const mxnet::TShape& kernel_shape,
const mxnet::TShape& pad,
const mxnet::TShape& stride,
const mxnet::TShape& dilation,
DType* data_col) {
if (2 == kernel_shape.ndim()) {
im2col_cpu(data_im,
im_shape[1],
im_shape[2],
im_shape[3],
kernel_shape[0],
kernel_shape[1],
pad[0],
pad[1],
stride[0],
stride[1],
dilation[0],
dilation[1],
data_col);
} else {
im2col_nd_core_cpu(
data_im, true, im_shape, col_shape, kernel_shape, pad, stride, dilation, data_col);
}
}
/*!
* \brief col2im 2D cpu version.
* DO NOT call this function directly. Use wrapper function col2im() instead.
*/
template <typename DType>
inline void col2im_cpu(const DType* data_col,
const int channels,
const int height,
const int width,
const int kernel_h,
const int kernel_w,
const int pad_h,
const int pad_w,
const int stride_h,
const int stride_w,
const int dilation_h,
const int dilation_w,
DType* data_im,
OpReqType req) {
if (mxnet::kNullOp == req)
return;
if (mxnet::kAddTo != req) {
std::fill(data_im, data_im + height * width * channels, static_cast<DType>(0));
}
const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
const int channel_size = height * width;
// TODO(junwu): we tested adding openmp (w/ & w/o collapse clause) here
// for testing the performance of convolution operator,
// but the total runtime increased by 0.8s for images of shape
// (8, 32, 64, 64) and decreased by 0.2s for images of shape
// (16, 64, 64, 64). Both kernel shapes are (8, 8). We think the
// bottleneck of the convolution operator probably lies in dot().
// Hence, adding more threads to the loops contributes little
// toward improving the convolution operator's performance.
// We will revisit this issue in the future.
for (int channel = channels; channel--; data_im += channel_size) {
for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
int input_row = -pad_h + kernel_row * dilation_h;
for (int output_rows = output_h; output_rows; output_rows--) {
if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
data_col += output_w;
} else {
int input_col = -pad_w + kernel_col * dilation_w;
for (int output_col = output_w; output_col; output_col--) {
if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
data_im[input_row * width + input_col] += *data_col;
}
data_col++;
input_col += stride_w;
}
}
input_row += stride_h;
}
}
}
}
}
/*!\brief
* cpu function of col2im algorithm
* \param s device stream
* \param data_col start pointer of the column buffer to be filled
* \param im_shape input image shape in dimensions (N, C, H, W,)
* \param col_shape column buffer shape
* \param kernel_shape kernel filter shape
* \param pad pad shape
* \param stride stride shape
* \param dilation dilation shape
* \param data_im pointer of a image (C, H, W,...) in the image batch
*/
template <typename DType>
inline void col2im(mshadow::Stream<cpu>* s,
const DType* data_col,
const mxnet::TShape& im_shape,
const mxnet::TShape& col_shape,
const mxnet::TShape& kernel_shape,
const mxnet::TShape& pad,
const mxnet::TShape& stride,
const mxnet::TShape& dilation,
DType* data_im,
OpReqType req) {
int num_spatial_axes = kernel_shape.ndim();
if (2 == num_spatial_axes) {
col2im_cpu(data_col,
im_shape[1],
im_shape[2],
im_shape[3],
kernel_shape[0],
kernel_shape[1],
pad[0],
pad[1],
stride[0],
stride[1],
dilation[0],
dilation[1],
data_im,
req);
} else {
im2col_nd_core_cpu(
data_col, false, im_shape, col_shape, kernel_shape, pad, stride, dilation, data_im, req);
}
}
} // namespace op
} // namespace mxnet
#ifdef __CUDACC__
#include "./im2col.cuh"
#endif
#endif // MXNET_OPERATOR_NN_IM2COL_H_