From 4fe5461eb98bdede589c511f486c1b934bfa6393 Mon Sep 17 00:00:00 2001
From: ciyong <ciyong.chen@intel.com>
Date: Tue, 15 Jan 2019 15:27:12 +0800
Subject: [PATCH] fix ssd quantization script error (#13843)

* fix ssd quantization script error

* update readme for ssd

* move quantized SSD instructions from quantization/README.md to ssd/README.md

* update ssd readme and accuracy

* update readme for SSD-vGG16
---
 example/quantization/README.md | 41 ++++------------------------------
 example/ssd/README.md          | 38 +++++++++++++++++++++++++++++++
 example/ssd/quantization.py    | 10 +++++----
 3 files changed, 48 insertions(+), 41 deletions(-)
diff --git a/example/quantization/README.md b/example/quantization/README.md
index 0e63cd84ede7..b77537d4fba7 100644
--- a/example/quantization/README.md
+++ b/example/quantization/README.md
@@ -34,7 +34,7 @@ The following models have been tested on Linux systems.
 |[Inception V3](#7)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.49%/93.10% |76.38%/93% |
 |[ResNet152-V2](#8)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/resnet/152-layers/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.76%/93.03%|76.48%/92.96%|
 |[Inception-BN](#9)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/inception-bn/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|72.09%/90.60%|72.00%/90.53%|
-| [SSD-VGG](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd)  | VOC2007/2012  | 0.83 mAP  | 0.82 mAP  |
+| [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd)  | VOC2007/2012  | 0.8366 mAP  | 0.8364 mAP  |
 
 <h3 id='3'>ResNet50-V1</h3>
 
@@ -208,42 +208,9 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbo
 python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
 ```
 
-<h3 id='10'>SSD-VGG</h3>
+<h3 id='10'>SSD-VGG16</h3>
 
-Follow the [SSD example's instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model) in [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Extract the zip files, then rename the directories to `model` and `data` respectively. Then, rename the files in directories as follows.
-
-```
-data/
-|---val.rec
-|---val.lxt
-|---val.idx
-model/
-|---ssd_vgg16_reduced_300.params
-|---ssd_vgg16_reduced_300-symbol.json
-```
-
-Then, use the following command for quantization. By default, this script uses 5 batches (32 samples per batch) for naive calibration:
-
-```
-python quantization.py
-```
-
-After quantization, INT8 models will be saved in `model/` dictionary.  Use the following command to launch inference.
-
-```
-# USE MKLDNN AS SUBGRAPH BACKEND
-export MXNET_SUBGRAPH_BACKEND=MKLDNN
-
-# Launch FP32 Inference 
-python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_
-
-# Launch INT8 Inference
-python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_
-
-# Launch dummy data Inference
-python benchmark_score.py --deploy --prefix=./model/ssd_
-python benchmark_score.py --deploy --prefix=./model/cqssd_
-```
+SSD model is located in [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd), follow [the insturctions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#quantize-model) to run quantized SSD model.
 
 <h3 id='11'>Custom Model</h3>
 
@@ -322,4 +289,4 @@ by invoking `launch_quantize.sh`.
 
 **NOTE**: 
 - This example has only been tested on Linux systems.
-- Performance is expected to decrease with GPU, however the memory footprint of a quantized model is smaller. The purpose of the quantization implementation is to minimize accuracy loss when converting FP32 models to INT8. MXNet community is working on improving the performance. 
\ No newline at end of file
+- Performance is expected to decrease with GPU, however the memory footprint of a quantized model is smaller. The purpose of the quantization implementation is to minimize accuracy loss when converting FP32 models to INT8. MXNet community is working on improving the performance.
diff --git a/example/ssd/README.md b/example/ssd/README.md
index f70823de4808..713a9ea33c1b 100644
--- a/example/ssd/README.md
+++ b/example/ssd/README.md
@@ -25,6 +25,7 @@ remarkable traits of MXNet.
 Due to the permission issue, this example is maintained in this [repository](https://github.com/zhreshold/mxnet-ssd) separately. You can use the link regarding specific per example [issues](https://github.com/zhreshold/mxnet-ssd/issues).
 
 ### What's new
+* Support uint8 inference on CPU with MKL-DNN backend. Uint8 inference achieves 0.8364 mAP, which is a comparable accuracy to FP32 (0.8366 mAP).
 * Added live camera capture and detection display (run with --camera flag). Example:
     `./demo.py --camera --cpu --frame-resize 0.5`
 * Added multiple trained models.
@@ -154,6 +155,43 @@ Make sure you have val.rec as validation dataset. It's the same one as used in t
 # cd /path/to/incubator-mxnet/example/ssd
 python evaluate.py --gpus 0,1 --batch-size 128 --epoch 0
 ```
+
+### Quantize model
+
+Follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd-val-fc19a535.idx` to `val.idx`, `ssd-val-fc19a535.lst` to `val.lst`, `ssd-val-fc19a535.rec` to `val.rec`, `ssd_vgg16_reduced_300-dd479559.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-dd479559.json` to `ssd_vgg16_reduced_300-symbol.json`.)
+
+```
+data/
+|---val.rec
+|---val.lxt
+|---val.idx
+model/
+|---ssd_vgg16_reduced_300-0000.params
+|---ssd_vgg16_reduced_300-symbol.json
+```
+
+Then, use the following command for quantization. By default, this script uses 5 batches (32 samples per batch) for naive calibration:
+
+```
+python quantization.py
+```
+
+After quantization, INT8 models will be saved in `model/` dictionary.  Use the following command to launch inference.
+
+```
+# USE MKLDNN AS SUBGRAPH BACKEND
+export MXNET_SUBGRAPH_BACKEND=MKLDNN
+
+# Launch FP32 Inference
+python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_
+
+# Launch INT8 Inference
+python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_
+
+# Launch dummy data Inference
+python benchmark_score.py --deploy --prefix=./model/ssd_
+python benchmark_score.py --deploy --prefix=./model/cqssd_
+```
 ### Convert model to deploy mode
 This simply removes all loss layers, and attach a layer for merging results and non-maximum suppression.
 Useful when loading python symbol is not available.
diff --git a/example/ssd/quantization.py b/example/ssd/quantization.py
index 231cc99f93bc..4e6e739963fc 100644
--- a/example/ssd/quantization.py
+++ b/example/ssd/quantization.py
@@ -119,12 +119,14 @@ def save_params(fname, arg_params, aux_params, logger=None):
     exclude_first_conv = args.exclude_first_conv
     excluded_sym_names = []
     rgb_mean = '123,117,104'
-    calib_layer = lambda name: name.endswith('_output')
     for i in range(1,19):
         excluded_sym_names += ['flatten'+str(i)]
     excluded_sym_names += ['relu4_3_cls_pred_conv',
                             'relu7_cls_pred_conv',
-                            'relu4_3_loc_pred_conv']
+                            'relu4_3_loc_pred_conv',
+                            'multibox_loc_pred',
+                            'concat0',
+                            'concat1']
     if exclude_first_conv:
         excluded_sym_names += ['conv1_1']
 
@@ -156,9 +158,9 @@ def save_params(fname, arg_params, aux_params, logger=None):
                                                         ctx=ctx, excluded_sym_names=excluded_sym_names,
                                                         calib_mode=calib_mode, calib_data=eval_iter,
                                                         num_calib_examples=num_calib_batches * batch_size,
-                                                        calib_layer=calib_layer, quantized_dtype=args.quantized_dtype,
+                                                        calib_layer=None, quantized_dtype=args.quantized_dtype,
                                                         label_names=(label_name,),
-                                                        calib_quantize_op = True,
+                                                        calib_quantize_op=True,
                                                         logger=logger)
         sym_name = '%s-symbol.json' % ('./model/cqssd_vgg16_reduced_300')
         param_name = '%s-%04d.params' % ('./model/cqssd_vgg16_reduced_300', epoch)