forked from apache/mxnet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Export resize and support batch size (apache#14014)
* add image resize operator and unit test * refactor the resize operator and address lint issues * address comment and add doc * assert size is more than 2 * add test case of 4D input * use ndarray datatype * add inline to Shape * add 4D input example * refactor the duplicate code and separate the resize from image_random * clean up the code * add resize implementation * delete the variable not used * refactor the code with structure and enum to make code more understandable * fix the lint * address comments * address comment 1. add description 2. refactor unit test and add dtype * update data type check * lint * move the common utitlity to image_utils * add default value for keep_ratio * change the operator doc * update the image utility function * fix lint * use Hang implementation to achieve image resize operator GPU * update the check and doc * refactor the caffe_gpu_interp2_kernel * update doc and fix the cpu compile error * update the comment * fix lint * add unit test for gpu * address comments * remove the crop and centercop utility function to make the PR clear * fix the syntax error * delete the warning * add unit test with 4D * fix typo * add more unit test * fix unit test * set atol = 1 * fix missing numpy import * fix the unit test * delete test case * fix unit test missing dependency * fix error data type * unify the style and add invalid interp * update the doc
- Loading branch information
1 parent
9a3e4a0
commit 2a4634b
Showing
11 changed files
with
744 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
/*! | ||
* Copyright (c) 2019 by Contributors | ||
* \file bilinear_resize-inl.cuh | ||
* \brief bilinear resize operator cuda implementation | ||
* \author Hang Zhang, Jake Lee | ||
*/ | ||
|
||
#ifndef MXNET_OPERATOR_CONTRIB_BILINEAR_RESIZE_CUH_ | ||
#define MXNET_OPERATOR_CONTRIB_BILINEAR_RESIZE_CUH_ | ||
|
||
#include <cuda_runtime_api.h> | ||
#include <algorithm> | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
using namespace mshadow; | ||
|
||
enum ImageLayout { | ||
HWC, | ||
NHWC, | ||
NCHW | ||
}; | ||
|
||
template<typename In, typename Out> | ||
struct ScalarConvert { | ||
static __host__ __device__ __forceinline__ Out to(const In v) { return (Out) v; } | ||
}; | ||
|
||
// The maximum number of threads in a block | ||
static const unsigned MAX_BLOCK_SIZE = 512U; | ||
|
||
// Number of threads in a block given an input size up to MAX_BLOCK_SIZE | ||
static unsigned getNumThreads(int nElem, const bool smaller) { | ||
unsigned threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE}; | ||
const int maxi = smaller ? 4 : 5; | ||
for (int i = 0; i != maxi; ++i) { | ||
if (static_cast<unsigned>(nElem) <= threadSizes[i]) { | ||
return threadSizes[i]; | ||
} | ||
} | ||
return smaller ? (MAX_BLOCK_SIZE >> 1) : MAX_BLOCK_SIZE; | ||
} | ||
|
||
// caffe_gpu_interp2_kernel overloading with Tensor<xpu, 3, DType> | ||
template<typename xpu, typename Dtype, typename Acctype> | ||
__global__ void caffe_gpu_interp2_kernel(const int n, | ||
const Acctype rheight, const Acctype rwidth, | ||
const Tensor<xpu, 3, Dtype> data1, | ||
Tensor<xpu, 3, Dtype> data2, | ||
ImageLayout layout) { | ||
int index = threadIdx.x + blockIdx.x * blockDim.x; | ||
const int channels = data1.size(2); | ||
const int height1 = data1.size(0); | ||
const int width1 = data1.size(1); | ||
const int height2 = data2.size(0); | ||
const int width2 = data2.size(1); | ||
|
||
if (index < n) { | ||
const int w2 = index % width2; // 0:width2-1 | ||
const int h2 = index / width2; // 0:height2-1 | ||
// special case: just copy | ||
if (height1 == height2 && width1 == width2) { | ||
const int h1 = h2; | ||
const int w1 = w2; | ||
for (int c = 0; c < channels; ++c) { | ||
const Dtype val = data1[h1][w1][c]; | ||
data2[h2][w2][c] = val; | ||
} | ||
return; | ||
} | ||
// | ||
const Acctype h1r = rheight * h2; | ||
const int h1 = h1r; | ||
const int h1p = (h1 < height1 - 1) ? 1 : 0; | ||
const Acctype h1lambda = h1r - h1; | ||
const Acctype h0lambda = Acctype(1) - h1lambda; | ||
// | ||
const Acctype w1r = rwidth * w2; | ||
const int w1 = w1r; | ||
const int w1p = (w1 < width1 - 1) ? 1 : 0; | ||
const Acctype w1lambda = w1r - w1; | ||
const Acctype w0lambda = Acctype(1) - w1lambda; | ||
for (int c = 0; c < channels; ++c) { | ||
const Acctype val = h0lambda * (w0lambda * data1[h1][w1][c] | ||
+ w1lambda * data1[h1][w1+w1p][c]) | ||
+ h1lambda * (w0lambda * data1[h1+h1p][w1][c] | ||
+ w1lambda * data1[h1+h1p][w1+w1p][c]); | ||
data2[h2][w2][c] = ScalarConvert<Acctype, Dtype>::to(val); | ||
} | ||
} | ||
} | ||
|
||
// caffe_gpu_interp2_kernel overloading with Tensor<xpu, 4, DType> | ||
template<typename xpu, typename Dtype, typename Acctype> | ||
__global__ void caffe_gpu_interp2_kernel(const int n, | ||
const Acctype rheight, const Acctype rwidth, | ||
const Tensor<xpu, 4, Dtype> data1, | ||
Tensor<xpu, 4, Dtype> data2, | ||
ImageLayout layout) { | ||
int index = threadIdx.x + blockIdx.x * blockDim.x; | ||
int batch_size = (layout == NHWC) ? data1.size(0) : data1.size(0); | ||
int channels = (layout == NHWC) ? data1.size(3) : data1.size(1); | ||
int height1 = (layout == NHWC) ? data1.size(1) : data1.size(2); | ||
int width1 = (layout == NHWC) ? data1.size(2) : data1.size(3); | ||
int height2 = (layout == NHWC) ? data2.size(1) : data2.size(2); | ||
int width2 = (layout == NHWC) ? data2.size(2): data2.size(3); | ||
|
||
if (index < n) { | ||
const int w2 = index % width2; // 0:width2-1 | ||
const int h2 = index / width2; // 0:height2-1 | ||
// special case: just copy | ||
if (height1 == height2 && width1 == width2) { | ||
const int h1 = h2; | ||
const int w1 = w2; | ||
|
||
for (int n = 0; n < batch_size; ++n) { | ||
for (int c = 0; c < channels; ++c) { | ||
if (layout == NHWC) { | ||
const Dtype val = data1[n][h1][w1][c]; | ||
data2[n][h2][w2][c] = val; | ||
} else { | ||
const Dtype val = data1[n][c][h1][w1]; | ||
data2[n][c][h2][w2] = val; | ||
} | ||
} | ||
} | ||
return; | ||
} | ||
// | ||
const Acctype h1r = rheight * h2; | ||
const int h1 = h1r; | ||
const int h1p = (h1 < height1 - 1) ? 1 : 0; | ||
const Acctype h1lambda = h1r - h1; | ||
const Acctype h0lambda = Acctype(1) - h1lambda; | ||
// | ||
const Acctype w1r = rwidth * w2; | ||
const int w1 = w1r; | ||
const int w1p = (w1 < width1 - 1) ? 1 : 0; | ||
const Acctype w1lambda = w1r - w1; | ||
const Acctype w0lambda = Acctype(1) - w1lambda; | ||
|
||
for (auto n = 0; n < batch_size; ++n) { | ||
for (int c = 0; c < channels; ++c) { | ||
if (layout == NHWC) { | ||
const Acctype val = h0lambda * (w0lambda * data1[n][h1][w1][c] | ||
+ w1lambda * data1[n][h1][w1+w1p][c]) | ||
+ h1lambda * (w0lambda * data1[n][h1+h1p][w1][c] | ||
+ w1lambda * data1[n][h1+h1p][w1+w1p][c]); | ||
data2[n][h2][w2][c] = ScalarConvert<Acctype, Dtype>::to(val); | ||
} else { | ||
const Acctype val = h0lambda * (w0lambda * data1[n][c][h1][w1] | ||
+ w1lambda * data1[n][c][h1][w1+w1p]) | ||
+ h1lambda * (w0lambda * data1[n][c][h1+h1p][w1] | ||
+ w1lambda * data1[n][c][h1+h1p][w1+w1p]); | ||
data2[n][c][h2][w2] = ScalarConvert<Acctype, Dtype>::to(val); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_OPERATOR_CONTRIB_BILINEAR_RESIZE_CUH_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.