Add int8 data loader (apache#14123)

* Enable int8 data layer Change-Id: I3d97ef80b7466d7555f4970e24f02e8dfba6be2b * fix lint * Add parameter description * Fix imagenet_inference.py * Allow quantize_v2 to accept int8 * make float32 default
haohuanw · Jun 23, 2019 · ec89a41 · ec89a41
1 parent 597e616
commit ec89a41
Show file tree

Hide file tree

Showing 10 changed files with 236 additions and 81 deletions.
diff --git a/docs/api/perl/io.md b/docs/api/perl/io.md
@@ -69,6 +69,7 @@ Then we can call `$mod->fit($nd_iter, num_epoch=>2)` to train `loss` by 2 epochs
 mx->io->NDArrayIter
 mx->io->CSVIter
 mx->io->ImageRecordIter
+mx->io->ImageRecordInt8Iter
 mx->io->ImageRecordUInt8Iter
 mx->io->MNISTIter
 mx->recordio->MXRecordIO

diff --git a/docs/api/python/io/io.md b/docs/api/python/io/io.md
@@ -75,6 +75,7 @@ A detailed tutorial is available at
     io.CSVIter
     io.LibSVMIter
     io.ImageRecordIter
+    io.ImageRecordInt8Iter
     io.ImageRecordUInt8Iter
     io.MNISTIter
     recordio.MXRecordIO

diff --git a/example/quantization/imagenet_inference.py b/example/quantization/imagenet_inference.py
@@ -19,6 +19,7 @@
 import logging
 import os
 import time
+import numpy as np
 import mxnet as mx
 from mxnet import nd
 from mxnet.contrib.quantization import *
@@ -98,22 +99,36 @@ def score(sym, arg_params, aux_params, data, devs, label_name, max_num_examples,
             logger.info(m.get())
 
 
-def benchmark_score(symbol_file, ctx, batch_size, num_batches, logger=None):
+def benchmark_score(symbol_file, ctx, batch_size, num_batches, data_layer_type, logger=None):
     # get mod
     cur_path = os.path.dirname(os.path.realpath(__file__))
     symbol_file_path = os.path.join(cur_path, symbol_file)
     if logger is not None:
         logger.info('Loading symbol from file %s' % symbol_file_path)
     sym = mx.sym.load(symbol_file_path)
     mod = mx.mod.Module(symbol=sym, context=ctx)
-    mod.bind(for_training     = False,
-             inputs_need_grad = False,
-             data_shapes      = [('data', (batch_size,)+data_shape)])
+    if data_layer_type == "int8":
+        dshape = mx.io.DataDesc(name='data', shape=(
+            batch_size,) + data_shape, dtype=np.int8)
+    elif data_layer_type == 'uint8':
+        dshape = mx.io.DataDesc(name='data', shape=(
+            batch_size,) + data_shape, dtype=np.uint8)
+    else:  # float32
+        dshape = mx.io.DataDesc(name='data', shape=(
+            batch_size,) + data_shape, dtype=np.float32)
+    mod.bind(for_training=False,
+             inputs_need_grad=False,
+             data_shapes=[dshape])
     mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
 
     # get data
-    data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=ctx) for _, shape in mod.data_shapes]
-    batch = mx.io.DataBatch(data, []) # empty label
+    if data_layer_type == "float32":
+        data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=ctx, dtype=data_layer_type)
+                for _, shape in mod.data_shapes]
+    else:
+        data = [mx.nd.full(shape=shape, val=127, ctx=ctx, dtype=data_layer_type)
+                for _, shape in mod.data_shapes]
+    batch = mx.io.DataBatch(data, [])  # empty label
 
     # run
     dry_run = 5                 # use 5 iterations to warm up
@@ -152,6 +167,9 @@ def benchmark_score(symbol_file, ctx, batch_size, num_batches, logger=None):
                         help='shuffling seed, see'
                              ' https://mxnet.incubator.apache.org/api/python/io/io.html?highlight=imager#mxnet.io.ImageRecordIter'
                              ' for more details')
+    parser.add_argument('--data-layer-type', type=str, default="float32",
+                        choices=['float32', 'int8', 'uint8'],
+                        help='data type for data layer')
 
     args = parser.parse_args()
 
@@ -192,24 +210,52 @@ def benchmark_score(symbol_file, ctx, batch_size, num_batches, logger=None):
     data_shape = tuple([int(i) for i in image_shape.split(',')])
     logger.info('Input data shape = %s' % str(data_shape))
 
+    data_layer_type = args.data_layer_type
     if args.benchmark == False:
         dataset = args.dataset
         download_dataset('http://data.mxnet.io/data/val_256_q90.rec', dataset)
         logger.info('Dataset for inference: %s' % dataset)
 
         # creating data iterator
-        data = mx.io.ImageRecordIter(path_imgrec=dataset,
-                                    label_width=1,
-                                    preprocess_threads=data_nthreads,
-                                    batch_size=batch_size,
-                                    data_shape=data_shape,
-                                    label_name=label_name,
-                                    rand_crop=False,
-                                    rand_mirror=False,
-                                    shuffle=True,
-                                    shuffle_chunk_seed=3982304,
-                                    seed=48564309,
-                                    **combine_mean_std)
+        if data_layer_type == 'int8':
+            data = mx.io.ImageRecordInt8Iter(path_imgrec=dataset,
+                                             label_width=1,
+                                             preprocess_threads=data_nthreads,
+                                             batch_size=batch_size,
+                                             data_shape=data_shape,
+                                             label_name=label_name,
+                                             rand_crop=False,
+                                             rand_mirror=False,
+                                             shuffle=args.shuffle_dataset,
+                                             shuffle_chunk_seed=args.shuffle_chunk_seed,
+                                             seed=args.shuffle_seed,
+                                             **combine_mean_std)
+        elif data_layer_type == 'uint8':
+            data = mx.io.ImageRecordUInt8Iter(path_imgrec=dataset,
+                                              label_width=1,
+                                              preprocess_threads=data_nthreads,
+                                              batch_size=batch_size,
+                                              data_shape=data_shape,
+                                              label_name=label_name,
+                                              rand_crop=False,
+                                              rand_mirror=False,
+                                              shuffle=args.shuffle_dataset,
+                                              shuffle_chunk_seed=args.shuffle_chunk_seed,
+                                              seed=args.shuffle_seed,
+                                              **combine_mean_std)
+        else:  #float32
+            data = mx.io.ImageRecordIter(path_imgrec=dataset,
+                                         label_width=1,
+                                         preprocess_threads=data_nthreads,
+                                         batch_size=batch_size,
+                                         data_shape=data_shape,
+                                         label_name=label_name,
+                                         rand_crop=False,
+                                         rand_mirror=False,
+                                         shuffle=args.shuffle_dataset,
+                                         shuffle_chunk_seed=args.shuffle_chunk_seed,
+                                         seed=args.shuffle_seed,
+                                         **combine_mean_std)
 
         # loading model
         sym, arg_params, aux_params = load_model(symbol_file, param_file, logger)
@@ -224,5 +270,5 @@ def benchmark_score(symbol_file, ctx, batch_size, num_batches, logger=None):
             max_num_examples=num_inference_images, logger=logger)
     else:
         logger.info('Running model %s for inference' % symbol_file)
-        speed = benchmark_score(symbol_file, ctx, batch_size, args.num_inference_batches, logger)
+        speed = benchmark_score(symbol_file, ctx, batch_size, args.num_inference_batches, data_layer_type, logger)
         logger.info('batch size %2d, image/sec: %f', batch_size, speed)
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
@@ -1602,8 +1602,8 @@ MXNET_DLL int MXSymbolInferTypePartial(SymbolHandle sym,
  * \param excluded_symbols op names to be excluded from being quantized
  * \param num_offline number of parameters that are quantized offline
  * \param offline_params array of c strings representing the names of params quantized offline
- * \param quantized_dtype the quantized destination type for input data.
- * \param calib_quantize **Deprecated**. quantize op will always be calibrated if could.
+ * \param quantized_dtype the quantized destination type for input data
+ * \param calib_quantize **Deprecated**. quantize op will always be calibrated if could
  */
 MXNET_DLL int MXQuantizeSymbol(SymbolHandle sym_handle, SymbolHandle *ret_sym_handle,
                                const mx_uint num_excluded_symbols,

diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm b/perl-package/AI-MXNet/lib/AI/MXNet/IO.pm
@@ -642,6 +642,7 @@ extends 'AI::MXNet::DataIter';
     mx->io->CSVIter                     Returns the CSV file iterator.
     mx->io->LibSVMIter                  Returns the LibSVM iterator which returns data with csr storage type.
     mx->io->ImageRecordIter             Iterates on image RecordIO files
+    mx->io->ImageRecordInt8Iter         Iterating on image RecordIO files
     mx->io->ImageRecordUInt8Iter        Iterating on image RecordIO files
     mx->io->MNISTIter                   Iterating on the MNIST dataset.
     mx->recordio->MXRecordIO            Reads/writes RecordIO data format, supporting sequential read and write.

diff --git a/src/io/iter_image_recordio_2.cc b/src/io/iter_image_recordio_2.cc
@@ -372,6 +372,7 @@ void ImageRecordIOParser2<DType>::ProcessImage(const cv::Mat& res,
   float RGBA_MULT[4] = { 0 };
   float RGBA_BIAS[4] = { 0 };
   float RGBA_MEAN[4] = { 0 };
+  int16_t RGBA_MEAN_INT[4] = {0};
   mshadow::Tensor<cpu, 3, DType>& data = (*data_ptr);
   if (!std::is_same<DType, uint8_t>::value) {
     RGBA_MULT[0] = contrast_scaled / normalize_param_.std_r;
@@ -387,6 +388,10 @@ void ImageRecordIOParser2<DType>::ProcessImage(const cv::Mat& res,
       RGBA_MEAN[1] = normalize_param_.mean_g;
       RGBA_MEAN[2] = normalize_param_.mean_b;
       RGBA_MEAN[3] = normalize_param_.mean_a;
+      RGBA_MEAN_INT[0] = std::round(normalize_param_.mean_r);
+      RGBA_MEAN_INT[1] = std::round(normalize_param_.mean_g);
+      RGBA_MEAN_INT[2] = std::round(normalize_param_.mean_b);
+      RGBA_MEAN_INT[3] = std::round(normalize_param_.mean_a);
     }
   }
 
@@ -408,17 +413,30 @@ void ImageRecordIOParser2<DType>::ProcessImage(const cv::Mat& res,
   for (int i = 0; i < res.rows; ++i) {
     const uchar* im_data = res.ptr<uchar>(i);
     for (int j = 0; j < res.cols; ++j) {
-      for (int k = 0; k < n_channels; ++k) {
-        RGBA[k] = im_data[swap_indices[k]];
-      }
-      if (!std::is_same<DType, uint8_t>::value) {
-        // normalize/mirror here to avoid memory copies
-        // logic from iter_normalize.h, function SetOutImg
+      if (std::is_same<DType, int8_t>::value) {
+        if (meanfile_ready_) {
+          for (int k = 0; k < n_channels; ++k) {
+            RGBA[k] = cv::saturate_cast<int8_t>(im_data[swap_indices[k]] -
+                                    static_cast<int16_t>(std::round(meanimg_[k][i][j])));
+          }
+        } else {
+          for (int k = 0; k < n_channels; ++k) {
+            RGBA[k] = cv::saturate_cast<int8_t>(im_data[swap_indices[k]] - RGBA_MEAN_INT[k]);
+          }
+        }
+      } else {
         for (int k = 0; k < n_channels; ++k) {
-          if (meanfile_ready_) {
-            RGBA[k] = (RGBA[k] - meanimg_[k][i][j]) * RGBA_MULT[k] + RGBA_BIAS[k];
-          } else {
-            RGBA[k] = (RGBA[k] - RGBA_MEAN[k]) * RGBA_MULT[k] + RGBA_BIAS[k];
+          RGBA[k] = im_data[swap_indices[k]];
+        }
+        if (!std::is_same<DType, uint8_t>::value) {
+          // normalize/mirror here to avoid memory copies
+          // logic from iter_normalize.h, function SetOutImg
+          for (int k = 0; k < n_channels; ++k) {
+            if (meanfile_ready_) {
+              RGBA[k] = (RGBA[k] - meanimg_[k][i][j]) * RGBA_MULT[k] + RGBA_BIAS[k];
+            } else {
+              RGBA[k] = (RGBA[k] - RGBA_MEAN[k]) * RGBA_MULT[k] + RGBA_BIAS[k];
+            }
           }
         }
       }
@@ -795,5 +813,22 @@ the data type instead of ``float``.
 .set_body([]() {
     return new ImageRecordIter2<uint8_t>();
   });
+
+MXNET_REGISTER_IO_ITER(ImageRecordInt8Iter)
+.describe(R"code(Iterating on image RecordIO files
+
+This iterator is identical to ``ImageRecordIter`` except for using ``int8`` as
+the data type instead of ``float``.
+
+)code" ADD_FILELINE)
+.add_arguments(ImageRecParserParam::__FIELDS__())
+.add_arguments(ImageRecordParam::__FIELDS__())
+.add_arguments(BatchParam::__FIELDS__())
+.add_arguments(PrefetcherParam::__FIELDS__())
+.add_arguments(ListDefaultAugParams())
+.set_body([]() {
+    return new ImageRecordIter2<int8_t>();
+  });
+
 }  // namespace io
 }  // namespace mxnet
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
@@ -123,13 +123,32 @@ static void MKLDNNQuantizeV2Compute(const nnvm::NodeAttrs& attrs, const OpContex
                                     const std::vector<OpReqType>& req,
                                     const std::vector<NDArray>& outputs) {
   const QuantizeV2Param& param = nnvm::get<QuantizeV2Param>(attrs.parsed);
-  auto out_type = GetOutputType(param);
-  if (out_type == mshadow::kUint8) {
-    MKLDNNQuantizeComputeKer<float, uint8_t>(inputs, outputs, param, req);
-  } else if (out_type == mshadow::kInt8) {
-    MKLDNNQuantizeComputeKer<float, int8_t>(inputs, outputs, param, req);
+  if (inputs[0].dtype() == mshadow::kUint8 || inputs[0].dtype() == mshadow::kInt8) {
+    if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
+      *outputs[1].data().dptr<float>() = param.min_calib_range.value();
+      *outputs[2].data().dptr<float>() = param.max_calib_range.value();
+    } else {
+      if (inputs[0].dtype() == mshadow::kUint8) {
+        *outputs[1].data().dptr<float>() = 0;
+        *outputs[2].data().dptr<float>() = 255;
+      } else {
+        *outputs[1].data().dptr<float>() = -127;
+        *outputs[2].data().dptr<float>() = 127;
+      }
+    }
+    if (req[0] != kWriteInplace) {
+      const_cast<NDArray&>(outputs[0]).CopyFrom(*inputs[0].GetMKLDNNData());
+      MKLDNNStream::Get()->Submit();
+    }
   } else {
-    LOG(FATAL) << "mkldnn quantize op only supports int8 and uint8 as output type";
+    auto out_type = GetOutputType(param);
+    if (out_type == mshadow::kUint8) {
+      MKLDNNQuantizeComputeKer<float, uint8_t>(inputs, outputs, param, req);
+    } else if (out_type == mshadow::kInt8) {
+      MKLDNNQuantizeComputeKer<float, int8_t>(inputs, outputs, param, req);
+    } else {
+      LOG(FATAL) << "mkldnn quantize op only supports int8 and uint8 as output type";
+    }
   }
 }