From 704d0d556f915c89d9c2e6e890d19739df9b7003 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Wed, 1 May 2019 05:59:34 +0800 Subject: [PATCH] [int8] Add MobileNetV2_1.0 & ResNet18 Quantization (#14823) * add resnet18 and mobilenetv2 models * add readme * support mkldnn s8s8 goihw16g weight format * fix_readme_typo --- example/quantization/README.md | 49 ++++++++++--------- .../quantization/imagenet_gen_qsym_mkldnn.py | 20 ++++++-- src/operator/nn/mkldnn/mkldnn_base.cc | 1 + 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/example/quantization/README.md b/example/quantization/README.md index fc9a26755b4e..c39d027f8aed 100644 --- a/example/quantization/README.md +++ b/example/quantization/README.md @@ -27,17 +27,19 @@ The following models have been tested on Linux systems. | Model | Source | Dataset | FP32 Accuracy (top-1/top-5)| INT8 Accuracy (top-1/top-5)| |:---|:---|---|:---:|:---:| +| [ResNet18-V1](#3) | [Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html) | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) |70.07%/89.30%|69.85%/89.23%| | [ResNet50-V1](#3) | [Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html) | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 75.87%/92.72% | 75.71%/92.65% | -| [ResNet101-V1](#4) | [Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html) | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 77.3%/93.58% | 77.09%/93.41% | -|[Squeezenet 1.0](#5)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|57.01%/79.71%|56.62%/79.55%| -|[MobileNet 1.0](#6)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|69.76%/89.32%|69.61%/89.09%| +| [ResNet101-V1](#3) | [Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html) | [Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec) | 77.3%/93.58% | 77.09%/93.41% | +|[Squeezenet 1.0](#4)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|57.01%/79.71%|56.62%/79.55%| +|[MobileNet 1.0](#5)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|69.76%/89.32%|69.61%/89.09%| +|[MobileNetV2 1.0](#6)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|70.14%/89.60%|69.53%/89.24%| |[Inception V3](#7)|[Gluon-CV](https://gluon-cv.mxnet.io/model_zoo/classification.html)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.49%/93.10% |76.38%/93% | |[ResNet152-V2](#8)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/resnet/152-layers/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.76%/93.03%|76.48%/92.96%| |[Inception-BN](#9)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/inception-bn/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|72.09%/90.60%|72.00%/90.53%| | [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | VOC2007/2012 | 0.8366 mAP | 0.8364 mAP | | [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | COCO2014 | 0.2552 mAP | 0.253 mAP | -

ResNet50-V1

+

ResNet18/50/101-V1

The following command is to download the pre-trained model from Gluon-CV and transfer it into the symbolic model which would be finally quantized. The [validation dataset](http://data.mxnet.io/data/val_256_q90.rec) is available for testing the pre-trained models: @@ -62,37 +64,36 @@ python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --bat python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True ``` -

ResNet101-V1

+

SqueezeNet 1.0

The following command is to download the pre-trained model from Gluon-CV and transfer it into the symbolic model which would be finally quantized. The [validation dataset](http://data.mxnet.io/data/val_256_q90.rec) is available for testing the pre-trained models: ``` -python imagenet_gen_qsym_mkldnn.py --model=resnet101_v1 --num-calib-batches=5 --calib-mode=naive +python imagenet_gen_qsym_mkldnn.py --model=squeezenet1.0 --num-calib-batches=5 --calib-mode=naive ``` - The model would be automatically replaced in fusion and quantization format. It is then saved as the quantized symbol and parameter files in the `./model` directory. The following command is to launch inference. ``` # USE MKLDNN AS SUBGRAPH BACKEND export MXNET_SUBGRAPH_BACKEND=MKLDNN -# Launch FP32 Inference -python imagenet_inference.py --symbol-file=./model/resnet101_v1-symbol.json --param-file=./model/resnet101_v1-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +# Launch FP32 Inference +python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --param-file=./model/squeezenet1.0-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch INT8 Inference -python imagenet_inference.py --symbol-file=./model/resnet101_v1-quantized-5batches-naive-symbol.json --param-file=./model/resnet101_v1-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/squeezenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/resnet101_v1-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/resnet101_v1-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True ``` -

SqueezeNet 1.0

+

MobileNet 1.0

The following command is to download the pre-trained model from Gluon-CV and transfer it into the symbolic model which would be finally quantized. The [validation dataset](http://data.mxnet.io/data/val_256_q90.rec) is available for testing the pre-trained models: ``` -python imagenet_gen_qsym_mkldnn.py --model=squeezenet1.0 --num-calib-batches=5 --calib-mode=naive +python imagenet_gen_qsym_mkldnn.py --model=mobilenet1.0 --num-calib-batches=5 --calib-mode=naive ``` The model would be automatically replaced in fusion and quantization format. It is then saved as the quantized symbol and parameter files in the `./model` directory. The following command is to launch inference. @@ -101,22 +102,22 @@ The model would be automatically replaced in fusion and quantization format. It export MXNET_SUBGRAPH_BACKEND=MKLDNN # Launch FP32 Inference -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --param-file=./model/squeezenet1.0-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --param-file=./model/mobilenet1.0-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch INT8 Inference -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/squeezenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True ``` -

MobileNet 1.0

+

MobileNetV2 1.0

The following command is to download the pre-trained model from Gluon-CV and transfer it into the symbolic model which would be finally quantized. The [validation dataset](http://data.mxnet.io/data/val_256_q90.rec) is available for testing the pre-trained models: ``` -python imagenet_gen_qsym_mkldnn.py --model=mobilenet1.0 --num-calib-batches=5 --calib-mode=naive +python imagenet_gen_qsym_mkldnn.py --model=mobilenetv2_1.0 --num-calib-batches=5 --calib-mode=naive ``` The model would be automatically replaced in fusion and quantization format. It is then saved as the quantized symbol and parameter files in the `./model` directory. The following command is to launch inference. @@ -125,14 +126,14 @@ The model would be automatically replaced in fusion and quantization format. It export MXNET_SUBGRAPH_BACKEND=MKLDNN # Launch FP32 Inference -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --param-file=./model/mobilenet1.0-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json --param-file=./model/mobilenetv2_1.0-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch INT8 Inference -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 +python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenetv2_1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu --data-nthreads=1 # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True ```

Inception-V3

diff --git a/example/quantization/imagenet_gen_qsym_mkldnn.py b/example/quantization/imagenet_gen_qsym_mkldnn.py index 2ef137273cca..06a1272caf21 100644 --- a/example/quantization/imagenet_gen_qsym_mkldnn.py +++ b/example/quantization/imagenet_gen_qsym_mkldnn.py @@ -92,11 +92,13 @@ def save_params(fname, arg_params, aux_params, logger=None): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Generate a calibrated quantized model from a FP32 model with Intel MKL-DNN support') - parser.add_argument('--model', type=str, choices=['resnet50_v1', + parser.add_argument('--model', type=str, choices=['resnet18_v1', + 'resnet50_v1', 'resnet101_v1', 'inceptionv3', 'squeezenet1.0', 'mobilenet1.0', + 'mobilenetv2_1.0', 'imagenet1k-resnet-152', 'imagenet1k-inception-bn', 'custom'], @@ -163,7 +165,13 @@ def save_params(fname, arg_params, aux_params, logger=None): download_calib_dataset('http://data.mxnet.io/data/val_256_q90.rec', args.calib_dataset) # download model - if args.model in ['resnet50_v1', 'resnet101_v1', 'squeezenet1.0', 'mobilenet1.0', 'inceptionv3']: + if args.model in ['resnet18_v1', + 'resnet50_v1', + 'resnet101_v1', + 'squeezenet1.0', + 'mobilenet1.0', + 'mobilenetv2_1.0', + 'inceptionv3']: logger.info('model %s is converted from GluonCV' % args.model) args.use_gluon_model = True if args.use_gluon_model == True: @@ -216,7 +224,7 @@ def save_params(fname, arg_params, aux_params, logger=None): excluded_sym_names += ['flatten'] if exclude_first_conv: excluded_sym_names += ['conv_1'] - elif args.model in ['resnet50_v1', 'resnet101_v1']: + elif args.model in ['resnet18_v1', 'resnet50_v1', 'resnet101_v1']: rgb_mean = '123.68,116.779,103.939' rgb_std = '58.393, 57.12, 57.375' if exclude_first_conv: @@ -234,6 +242,12 @@ def save_params(fname, arg_params, aux_params, logger=None): 'mobilenet0_pool0_fwd'] if exclude_first_conv: excluded_sym_names += ['mobilenet0_conv0_fwd'] + elif args.model == 'mobilenetv2_1.0': + rgb_mean = '123.68,116.779,103.939' + rgb_std = '58.393, 57.12, 57.375' + excluded_sym_names += ['mobilenetv20_output_flatten0_flatten0'] + if exclude_first_conv: + excluded_sym_names += ['mobilenetv20_conv0_fwd'] elif args.model == 'inceptionv3': rgb_mean = '123.68,116.779,103.939' rgb_std = '58.393, 57.12, 57.375' diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index e80358ac636a..5dccba281fd0 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -392,6 +392,7 @@ mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc &desc) { case mkldnn_gOhwi8o: case mkldnn_gOhwi16o: case mkldnn_gOhIw16o4i: + case mkldnn_Goihw16g_s8s8: return mkldnn_goihw; default: LOG(FATAL) << "Unknown MKLDNN format for 5 dimensions: " << desc.data.format;