From 2d41f2a83d4205831fccecdd23cf1b547c2f1244 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Mon, 9 Dec 2019 20:37:29 +0800 Subject: [PATCH] add inference benchmark script (#16978) --- example/quantization/README.md | 53 ++++++--- .../quantization/launch_inference_mkldnn.sh | 111 ++++++++++++++++++ 2 files changed, 149 insertions(+), 15 deletions(-) create mode 100644 example/quantization/launch_inference_mkldnn.sh diff --git a/example/quantization/README.md b/example/quantization/README.md index 032ca97176df..8cdc1bb7e06f 100644 --- a/example/quantization/README.md +++ b/example/quantization/README.md @@ -80,6 +80,29 @@ optional arguments: if calibration mode is enabled ``` +A new benchmark script `launch_inference_mkldnn.sh` has been designed to launch performance benchmark for float32 or int8 image-classification models with IntelĀ® MKL-DNN. +``` +usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]] + +optional arguments: + -h, --help show this help message and exit + -s, --symbol_file symbol file for benchmark + -b, --batch_size inference batch size + default: 64 + -iter, --iteration inference iteration + default: 500 + -ins, --instance launch multi-instance inference + default: one instance per socket + -c, --core number of cores per instance + default: divide full physical cores + +example: resnet int8 performance benchmark on c5.24xlarge(duo sockets, 24 physical cores per socket). + + bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json + +will launch two instances for throughput benchmark and each instance will use 24 physical cores. +``` + Use the following command to install [Gluon-CV](https://gluon-cv.mxnet.io/): ``` @@ -120,8 +143,8 @@ python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --par python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --param-file=./model/resnet50_v1-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/resnet50_v1-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/resnet50_v1-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/resnet50_v1-quantized-5batches-naive-symbol.json ```

SqueezeNet 1.0

@@ -142,8 +165,8 @@ python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --p python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/squeezenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/squeezenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/squeezenet1.0-quantized-5batches-naive-symbol.json ```

MobileNet 1.0

@@ -164,8 +187,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --pa python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenet1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/mobilenet1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/mobilenet1.0-quantized-5batches-naive-symbol.json ```

MobileNetV2 1.0

@@ -186,8 +209,8 @@ python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json - python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --param-file=./model/mobilenetv2_1.0-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/mobilenetv2_1.0-quantized-5batches-naive-symbol.json ```

Inception-V3

@@ -208,8 +231,8 @@ python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --par python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --param-file=./model/inceptionv3-quantized-0000.params --image-shape=3,299,299 --rgb-mean=123.68,116.779,103.939 --rgb-std=58.393,57.12,57.375 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/inceptionv3-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/inceptionv3-quantized-5batches-naive-symbol.json --image-shape=3,299,299 --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/inceptionv3-quantized-5batches-naive-symbol.json ```

ResNet152-V2

@@ -231,8 +254,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol. python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-resnet-152-quantized-0000.params --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-resnet-152-quantized-5batches-naive-symbol.json ```

Inception-BN

@@ -254,8 +277,8 @@ python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbo python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --param-file=./model/imagenet1k-inception-bn-quantized-0000.params --rgb-mean=123.68,116.779,103.939 --num-skipped-batches=50 --batch-size=64 --num-inference-batches=500 --dataset=./data/val_256_q90.rec --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True -python imagenet_inference.py --symbol-file=./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json --batch-size=64 --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-symbol.json +bash ./launch_inference_mkldnn.sh -s ./model/imagenet1k-inception-bn-quantized-5batches-naive-symbol.json ```

SSD-VGG16

@@ -307,7 +330,7 @@ python imagenet_gen_qsym_mkldnn.py --model=custom --num-calib-batches=5 --calib- python imagenet_inference.py --symbol-file=./model/*.json --param-file=./model/*.params --rgb-mean=* --rgb-std=* --num-skipped-batches=* --batch-size=* --num-inference-batches=*--dataset=./data/* --ctx=cpu # Launch dummy data Inference -python imagenet_inference.py --symbol-file=./model/*.json --batch-size=* --num-inference-batches=500 --ctx=cpu --benchmark=True +bash ./launch_inference_mkldnn.sh -s ./model/*.json ```

Model Quantization with CUDNN

diff --git a/example/quantization/launch_inference_mkldnn.sh b/example/quantization/launch_inference_mkldnn.sh new file mode 100644 index 000000000000..f67787b41b03 --- /dev/null +++ b/example/quantization/launch_inference_mkldnn.sh @@ -0,0 +1,111 @@ +#!/bin/sh + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +usage() +{ + echo "usage: bash ./launch_inference_mkldnn.sh [[[-s symbol_file ] [-b batch_size] [-iter iteraton] [-ins instance] [-c cores/instance]] | [-h]]" +} + +while [ $# -gt 0 ]; do + case "$1" in + --symbol | -s) + shift + SYMBOL=$1 + ;; + --batch-size | -b) + shift + BS=$1 + ;; + --iteration | -iter) + shift + ITERATIONS=$1 + ;; + --instance | -ins) + shift + INS=$1 + ;; + --core | -c) + shift + CORES=$1 + ;; + --help | -h) + usage + exit 1 + ;; + *) + usage + exit 1 + esac + shift +done + +NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'` +NUM_NUMA_NODE=`lscpu | grep 'NUMA node(s)' | awk '{print $NF}'` +CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'` +NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET)) +CORES_PER_NUMA=$((NUM_CORES / NUM_NUMA_NODE)) +echo "target machine has $NUM_CORES physical core(s) on $NUM_NUMA_NODE numa nodes of $NUM_SOCKET socket(s)." + +if [ -z $SYMBOL ]; then + echo "Error: Need a symbol file as input." +fi +if [ -z $INS ]; then + echo "Default: launch one instance per socket." + INS=$NUM_SOCKET +fi +if [ -z $CORES ]; then + echo "Default: divide full physical cores." + CORES=$((NUM_CORES / $INS)) +fi +if [ -z $BS ]; then + echo "Default: set batch size to 64." + BS=64 +fi +if [ -z $ITERATIONS ]; then + echo "Default: set iterations to 500." + ITERATIONS=500 +fi + +echo " benchmark configs" +echo " cores per instance: $CORES" +echo " total instances: $INS" +echo " batch size: $BS" +echo " iterations: $ITERATIONS" +echo "" + +rm BENCHMARK_*.log || echo "benchmarking..." + +for((i=0;i<$INS;i++)); +do + ((a=$i*$CORES)) + ((b=$a+$CORES-1)) + memid=$((b/CORES_PER_NUMA)) + LOG=BENCHMARK_$i.log + echo " $i instance use $a-$b cores and $memid mem with $LOG" + KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0 \ + OMP_NUM_THREADS=$CORES \ + nohup numactl --physcpubind=$a-$b --membind=$memid python imagenet_inference.py --symbol-file=$SYMBOL --batch-size=$BS --num-inference-batches=$ITERATIONS --ctx=cpu --benchmark=True > $LOG 2>&1 & +done +wait + +fps=`grep image/sec BENCHMARK_*.log | awk '{ sum += $(NF) }; END { print sum }'` +latency=$(echo "scale=2; 1000*$INS/$fps" | bc) +echo "overall throughput: $fps" +echo "latency per instance: $latency" +echo "benchmark finish:)"