Add gpu implementation and tests

apache · Jan 10, 2019 · e0a2f1f · e0a2f1f
1 parent 0f10ec2
commit e0a2f1f
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 20 deletions.
diff --git a/src/operator/image/totensor_op-inl.h b/src/operator/image/totensor_op-inl.h
@@ -75,46 +75,69 @@ inline bool ToTensorType(const nnvm::NodeAttrs& attrs,
 }
 
 // Operator Implementation
-void ToTensorImpl(const std::vector<TBlob> &inputs,
-                        const std::vector<TBlob> &outputs,
-                        const int length,
-                        const int channel,
-                        const int step = 0) {
+
+template<int req>
+struct totensor_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int l, float* out_data, const DType* in_data,
+                                  const int c, const int length, const int channel,
+                                  const int step, const float normalize_factor = 255.0f) {
+      KERNEL_ASSIGN(out_data[step + c*length + l], req,
+                    (in_data[step + l*channel + c]) / normalize_factor);
+  }
+};
+
+template<typename xpu>
+void ToTensorImpl(const OpContext &ctx,
+                  const std::vector<TBlob> &inputs,
+                  const std::vector<TBlob> &outputs,
+                  const std::vector<OpReqType> &req,
+                  const int length,
+                  const int channel,
+                  const int step = 0) {
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+
   MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
       float* output = outputs[0].dptr<float>();
       DType* input = inputs[0].dptr<DType>();
 
-      for (int l = 0; l < length; ++l) {
-        for (int c = 0; c < channel; ++c) {
-          output[step + c*length + l] = static_cast<float>(input[step + l*channel + c]) / 255.0f;
-        }
+      for (int c = 0; c < channel; ++c) {
+        mxnet_op::Kernel<totensor_forward<req_type>, xpu>::Launch(
+          s, length, output, input, c, length, channel, step);
       }
     });
+  });
 }
 
-void ToTensor(const nnvm::NodeAttrs &attrs,
-                     const OpContext &ctx,
-                     const std::vector<TBlob> &inputs,
-                     const std::vector<OpReqType> &req,
-                     const std::vector<TBlob> &outputs) {
+template<typename xpu>
+void ToTensorOpForward(const nnvm::NodeAttrs &attrs,
+                       const OpContext &ctx,
+                       const std::vector<TBlob> &inputs,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+
   CHECK_EQ(req[0], kWriteTo)
-    << "`to_tensor` does not support inplace";
+    << "`to_tensor` does not support inplace updates";
 
-  // 3D Input - 1 image
+  // 3D Input - (h, w, c)
   if (inputs[0].ndim() == 3) {
     const int length = inputs[0].shape_[0] * inputs[0].shape_[1];
     const int channel = inputs[0].shape_[2];
-    ToTensorImpl(inputs, outputs, length, channel);
+    ToTensorImpl<xpu>(ctx, inputs, outputs, req, length, channel);
   } else if (inputs[0].ndim() == 4) {
-    // 4D input batch of images
+    // 4D input (n, h, w, c)
     const int batch_size = inputs[0].shape_[0];
     const int length = inputs[0].shape_[1] * inputs[0].shape_[2];
     const int channel = inputs[0].shape_[3];
     const int step = channel * length;
 
     #pragma omp parallel for
     for (auto n = 0; n < batch_size; ++n) {
-      ToTensorImpl(inputs, outputs, length, channel, n*step);
+      ToTensorImpl<xpu>(ctx, inputs, outputs, req, length, channel, n*step);
     }
   }
 }

diff --git a/src/operator/image/totensor_op.cc b/src/operator/image/totensor_op.cc
@@ -58,7 +58,7 @@ with values in the range [0, 1)
   })
 .set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
 .set_attr<nnvm::FInferType>("FInferType", ToTensorType)
-.set_attr<FCompute>("FCompute<cpu>", ToTensor)
+.set_attr<FCompute>("FCompute<cpu>", ToTensorOpForward<cpu>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs) {
     return std::vector<std::pair<int, int> >{{0, 0}};

diff --git a/src/operator/image/totensor_op.cu b/src/operator/image/totensor_op.cu
@@ -16,3 +16,15 @@
 * specific language governing permissions and limitations
 * under the License.
 */
+#include "./totensor_op-inl.h"
+
+namespace mxnet {
+namespace op {
+namespace image {
+
+NNVM_REGISTER_OP(_image_to_tensor)
+.set_attr<FCompute>("FCompute<gpu>", ToTensorOpForward<gpu>);
+
+}  // namespace image
+}  // namespace op
+}  // namespace mxnet
diff --git a/tests/python/gpu/test_gluon_transforms.py b/tests/python/gpu/test_gluon_transforms.py
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import print_function
+import os
+import sys
+import mxnet as mx
+import mxnet.ndarray as nd
+import numpy as np
+from mxnet import gluon
+from mxnet.base import MXNetError
+from mxnet.gluon.data.vision import transforms
+from mxnet.test_utils import assert_almost_equal, set_default_context
+from mxnet.test_utils import almost_equal
+curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+sys.path.insert(0, os.path.join(curr_path, '../unittest'))
+from common import assertRaises, setup_module, with_seed, teardown
+
+
+set_default_context(mx.gpu(0))
+
+@with_seed()
+def test_to_tensor():
+    # 3D Input
+    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
+    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
+    assert_almost_equal(out_nd.asnumpy(), np.transpose(
+        data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))
+
+    # 4D Input
+    data_in = np.random.uniform(0, 255, (5, 300, 300, 3)).astype(dtype=np.uint8)
+    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
+    assert_almost_equal(out_nd.asnumpy(), np.transpose(
+        data_in.astype(dtype=np.float32) / 255.0, (0, 3, 1, 2)))
+
+    # Invalid Input
+    invalid_data_in = nd.random.uniform(0, 255, (5, 5, 300, 300, 3)).astype(dtype=np.uint8)
+    transformer = transforms.ToTensor()
+    assertRaises(MXNetError, transformer, invalid_data_in)