diff --git a/example/quantization/README.md b/example/quantization/README.md
index 032ca97176df..8cdc1bb7e06f 100644
--- a/example/quantization/README.md
+++ b/example/quantization/README.md
@@ -80,6 +80,29 @@ optional arguments:
if calibration mode is enabled
```
+A new benchmark script `launch_inference_mkldnn.sh` has been designed to launch performance benchmark for float32 or int8 image-classification models with IntelĀ® MKL-DNN.
+```
+usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -s, --symbol_file symbol file for benchmark
+ -b, --batch_size inference batch size
+ default: 64
+ -iter, --iteration inference iteration
+ default: 500
+ -ins, --instance launch multi-instance inference
+ default: one instance per socket
+ -c, --core number of cores per instance
+ default: divide full physical cores
+
+example: resnet int8 performance benchmark on c5.24xlarge(duo sockets, 24 physical cores per socket).
+
+ bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json
+
+will launch two instances for throughput benchmark and each instance will use 24 physical cores.
+```
+
Use the following command to install [Gluon-CV](https://gluon-cv.mxnet.io/):
```
@@ -120,8 +143,8 @@ python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --par
python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --param-file=./model/resnet50_v1-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json
```
SqueezeNet 1.0
@@ -142,8 +165,8 @@ python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --p
python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/squeezenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-quantized-5batches-naive-symbol.json
```
MobileNet 1.0
@@ -164,8 +187,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --pa
python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-quantized-5batches-naive-symbol.json
```
MobileNetV2 1.0
@@ -186,8 +209,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json -
python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenetv2_1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json
```
Inception-V3
@@ -208,8 +231,8 @@ python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --par
python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --param-file=./model/inceptionv3-quantized-0000.params --image-shape=3,299,299 --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-quantized-5batches-naive-symbol.json
```
ResNet152-V2
@@ -231,8 +254,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol.
python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-resnet-152-quantized-0000.params --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json
```
Inception-BN
@@ -254,8 +277,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbo
python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-inception-bn-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
-python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-symbol.json
+bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json
```
SSD-VGG16
@@ -307,7 +330,7 @@ python imagenet_gen_qsym_mkldnn.py --model=custom --num-calib-batches=5 --calib-
python imagenet_inference.py --symbol-file=./model/*.json --param-file=./model/*.params --rgb-mean=* --rgb-std=* --num-skipped-batches=* --batch-size=* --num-inference-batches=*--dataset=./data/* --ctx=cpu
# Launch dummy data Inference
-python imagenet_inference.py --symbol-file=./model/*.json --batch-size=* --num-inference-batches=500 --ctx=cpu --benchmark=True
+bash ./launch_inference_mkldnn.sh -s ./model/*.json
```
Model Quantization with CUDNN
diff --git a/example/quantization/launch_inference_mkldnn.sh b/example/quantization/launch_inference_mkldnn.sh
new file mode 100644
index 000000000000..f67787b41b03
--- /dev/null
+++ b/example/quantization/launch_inference_mkldnn.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+usage()
+{
+ echo "usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]]"
+}
+
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --symbol | -s)
+ shift
+ SYMBOL=$1
+ ;;
+ --batch-size | -b)
+ shift
+ BS=$1
+ ;;
+ --iteration | -iter)
+ shift
+ ITERATIONS=$1
+ ;;
+ --instance | -ins)
+ shift
+ INS=$1
+ ;;
+ --core | -c)
+ shift
+ CORES=$1
+ ;;
+ --help | -h)
+ usage
+ exit 1
+ ;;
+ *)
+ usage
+ exit 1
+ esac
+ shift
+done
+
+NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
+NUM_NUMA_NODE=`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'`
+CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
+NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))
+CORES_PER_NUMA=$((NUM_CORES / NUM_NUMA_NODE))
+echo "target machine has $NUM_CORES physical core(s) on $NUM_NUMA_NODE numa nodes of $NUM_SOCKET socket(s)."
+
+if [ -z $SYMBOL ]; then
+ echo "Error: Need a symbol file as input."
+fi
+if [ -z $INS ]; then
+ echo "Default: launch one instance per socket."
+ INS=$NUM_SOCKET
+fi
+if [ -z $CORES ]; then
+ echo "Default: divide full physical cores."
+ CORES=$((NUM_CORES / $INS))
+fi
+if [ -z $BS ]; then
+ echo "Default: set batch size to 64."
+ BS=64
+fi
+if [ -z $ITERATIONS ]; then
+ echo "Default: set iterations to 500."
+ ITERATIONS=500
+fi
+
+echo " benchmark configs"
+echo " cores per instance: $CORES"
+echo " total instances: $INS"
+echo " batch size: $BS"
+echo " iterations: $ITERATIONS"
+echo ""
+
+rm BENCHMARK_*.log || echo "benchmarking..."
+
+for((i=0;i<$INS;i++));
+do
+ ((a=$i*$CORES))
+ ((b=$a+$CORES-1))
+ memid=$((b/CORES_PER_NUMA))
+ LOG=BENCHMARK_$i.log
+ echo " $i instance use $a-$b cores and $memid mem with $LOG"
+ KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0 \
+ OMP_NUM_THREADS=$CORES \
+ nohup numactl --physcpubind=$a-$b --membind=$memid python imagenet_inference.py --symbol-file=$SYMBOL --batch-size=$BS --num-inference-batches=$ITERATIONS --ctx=cpu --benchmark=True > $LOG 2>&1 &
+done
+wait
+
+fps=`grep image/sec BENCHMARK_*.log | awk '{ sum += $(NF) }; END { print sum }'`
+latency=$(echo "scale=2; 1000*$INS/$fps" | bc)
+echo "overall throughput: $fps"
+echo "latency per instance: $latency"
+echo "benchmark finish:)"