Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
wuxun-zhang committed Jul 23, 2019
1 parent 5712bd5 commit ab1e7b9
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 19 deletions.
2 changes: 1 addition & 1 deletion docs/tutorials/deployment/int8_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
GluonCV delivered some quantized models to improve the performance and reduce the deployment costs for the computer vision inference tasks. In real production, there are two main benefits of lower precision (INT8). First, the computation can be accelerated by the low precision instruction, like Intel Vector Neural Network Instruction (VNNI). Second, lower precision data type would save the memory bandwidth and allow for better cache locality and save the power. The new feature can get up to 4X performance speedup in the latest `AWS EC2 C5 instances <https://aws.amazon.com/blogs/aws/now-available-new-c5-instance-sizes-and-bare-metal-instances/>`_ under the `Intel Deep Learning Boost (VNNI) <https://www.intel.ai/intel-deep-learning-boost/>`_ enabled hardware with less than 0.5% accuracy drop.
Please checkout `verify_pretrained.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/classification/imagenet/verify_pretrained.py>`_ for imagenet inference,
`eval_ssd.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/detection/ssd/eval_ssd.py>`_ for SSD inference, and `eval_segmentation.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/segmentation/eval_segmentation.py>`_
`eval_ssd.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/detection/ssd/eval_ssd.py>`_ for SSD inference, and `test.py <https://raw.githubusercontent.com/dmlc/gluon-cv/master/scripts/segmentation/test.py>`_
for FCN inference.
Performance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys

import mxnet as mx
from mxnet import gluon
from mxnet import gluon, ndarray as nd
from mxnet.gluon.data.vision import transforms

import gluoncv
Expand All @@ -22,6 +22,8 @@ def parse_args():
help='model name (default: fcn)')
parser.add_argument('--backbone', type=str, default='resnet101',
help='base network')
parser.add_argument('--image-shape', type=int, default=480,
help='image shape')
parser.add_argument('--base-size', type=int, default=520,
help='base image size')
parser.add_argument('--crop-size', type=int, default=480,
Expand All @@ -33,7 +35,8 @@ def parse_args():
parser.add_argument('--quantized', action='store_true',
help='whether to use quantized model')
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--num-batches', type=int, default=100)
parser.add_argument('--num-iterations', type=int, default=100,
help='number of benchmarking iterations.')
parser.add_argument('--workers', type=int, default=4,
help='number of workers for data loading')
parser.add_argument('--pretrained', action="store_true",
Expand All @@ -49,6 +52,9 @@ def parse_args():
# evaluation only
parser.add_argument('--eval', action='store_true', default=False,
help='evaluation only')
# dummy benchmark
parser.add_argument('--benchmark', action='store_true', default=False,
help='whether to use dummy data for benchmark')

args = parser.parse_args()

Expand Down Expand Up @@ -81,13 +87,13 @@ def test(args, model):
testset = get_segmentation_dataset(
args.dataset, split='test', mode=args.mode, **data_kwargs)
size = len(testset)

# get dataloader
batchify_fn = ms_batchify_fn if args.mode == 'test' else None
test_data = gluon.data.DataLoader(
testset, args.batch_size, last_batch='keep', shuffle=False, num_workers=args.workers)
testset, args.batch_size, batchify_fn=batchify_fn, last_batch='keep', shuffle=False, num_workers=args.workers)

print(model)
if not args.eval:
evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx)
metric = gluoncv.utils.metrics.SegmentationMetric(testset.num_class)

tbar = tqdm(test_data)
Expand All @@ -99,17 +105,18 @@ def test(args, model):
data = mx.gluon.utils.split_and_load(batch, ctx_list=args.ctx, batch_axis=0, even_split=False)
outputs = None
for x in data:
output = model.forward(x)[0]
output = model.forward(x)
outputs = output if outputs is None else nd.concat(outputs, output, axis=0)
outputs = [outputs]
metric.update(targets, outputs)
pixAcc, mIoU = metric.get()
tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
else:
im_paths = dsts
predicts = evaluator.parallel_forward(batch)
for predict, impath in zip(predicts, im_paths):
predict = mx.nd.squeeze(mx.nd.argmax(predict[0], 1)).asnumpy() + \
for data, impath in zip(batch, dsts):
data = data.as_in_context(args.ctx[0])
if len(data.shape) < 4:
data = nd.expand_dims(data, axis=0)
predict = model.forward(data)[0]
predict = mx.nd.squeeze(mx.nd.argmax(predict, 1)).asnumpy() + \
testset.pred_offset
mask = get_color_pallete(predict, args.dataset)
outname = os.path.splitext(impath)[0] + '.png'
Expand All @@ -118,6 +125,26 @@ def test(args, model):
print('Inference speed with batchsize %d is %.2f img/sec' % (args.batch_size, speed))


def benchmarking(args, model):
print('-----benchmarking on %s -----' % args.model)
bs = args.batch_size
num_iterations = args.num_iterations
input_shape = (bs, 3, args.image_shape, args.image_shape)
size = num_iterations * bs
data = [mx.random.uniform(-1.0, 1.0, shape=input_shape, ctx=args.ctx[0], dtype='float32')]
dry_run = 5
with tqdm(total=size+dry_run*bs) as pbar:
for n in range(dry_run + num_iterations):
if n == dry_run:
tic = time.time()
outputs = model.forward(data[0])
for output in outputs:
output.wait_to_read()
pbar.update(bs)
speed = size / (time.time() - tic)
print('Throughput is %f imgs/sec' % speed)


if __name__ == "__main__":
args = parse_args()

Expand All @@ -139,12 +166,6 @@ def test(args, model):
if withQuantization and args.quantized:
model_prefix += '_int8'

if args.quantized and args.mode != 'val':
raise ValueError("Currently, %s mode or is not supported by quantized model." % args.mode)

if args.quantized and args.eval == False:
raise ValueError("Currently, only evaluation is supported by quantized model.")

# create network
if args.pretrained:
model = get_model(model_prefix, pretrained=True)
Expand All @@ -169,4 +190,7 @@ def test(args, model):
model.hybridize()

print('Testing model: ', args.resume)
test(args, model)
if not args.benchmark:
test(args, model)
else:
benchmarking(args, model)
File renamed without changes.
9 changes: 9 additions & 0 deletions tests/unittests/test_model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,15 @@ def test_quantized_ssd_models():
_test_model_list(model_list, ctx, x)


@unittest.skip("temporarily disabled to fallback to non-mkl version")
@with_cpu(0)
def test_quantized_fcn_models():
model_list = ['fcn_resnet101_voc_int8', 'fcn_resnet101_coco_int8-symbol']
ctx = mx.context.current_context()
x = mx.random.uniform(shape=(1, 3, 480, 480), ctx=ctx)
_test_model_list(model_list, ctx, x)


if __name__ == '__main__':
import nose

Expand Down

0 comments on commit ab1e7b9

Please sign in to comment.