From 6c7dbeb9d12043790aaaf50d7d86f50ce53620ee Mon Sep 17 00:00:00 2001 From: Marek Kolodziej Date: Tue, 17 Jul 2018 15:15:18 -0700 Subject: [PATCH] Fixes for CI --- ci/docker/runtime_functions.sh | 2 +- tests/python/tensorrt/common.py | 46 +++ tests/python/tensorrt/test_cycle.py | 4 +- tests/python/tensorrt/test_tensorrt_lenet5.py | 24 +- .../test_tensorrt_resnet_resnext_ssd.py | 282 ------------------ 5 files changed, 53 insertions(+), 305 deletions(-) create mode 100644 tests/python/tensorrt/common.py delete mode 100644 tests/python/tensorrt/test_tensorrt_resnet_resnext_ssd.py diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index eedf4c5768e2..fc33e69bf90d 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -694,7 +694,7 @@ unittest_ubuntu_tensorrt_gpu() { export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH - nosetests-3.4 --verbose tests/python/tensorrt + nosetests-3.4 --verbose --processes=1 --process-restartworker tests/python/tensorrt } # quantization gpu currently only runs on P3 instances diff --git a/tests/python/tensorrt/common.py b/tests/python/tensorrt/common.py new file mode 100644 index 000000000000..ab367ba17160 --- /dev/null +++ b/tests/python/tensorrt/common.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +# pylint: disable=unused-import +import unittest +# pylint: enable=unused-import +import numpy as np +import mxnet as mx +from ctypes.util import find_library + +def check_tensorrt_installation(): + assert find_library('nvinfer') is not None, "Can't find the TensorRT shared library" + +def get_use_tensorrt(): + return int(os.environ.get("MXNET_USE_TENSORRT", 0)) + +def set_use_tensorrt(status=False): + os.environ["MXNET_USE_TENSORRT"] = str(int(status)) + +def merge_dicts(*dict_args): + """Merge arg_params and aux_params to populate shared_buffer""" + result = {} + for dictionary in dict_args: + result.update(dictionary) + return result + +def get_fp16_infer_for_fp16_graph(): + return int(os.environ.get("MXNET_TENSORRT_USE_FP16_FOR_FP32", 0)) + +def set_fp16_infer_for_fp16_graph(status=False): + os.environ["MXNET_TENSORRT_USE_FP16_FOR_FP32"] = str(int(status)) diff --git a/tests/python/tensorrt/test_cycle.py b/tests/python/tensorrt/test_cycle.py index fe01dad19c8e..d56cf1652ad5 100644 --- a/tests/python/tensorrt/test_cycle.py +++ b/tests/python/tensorrt/test_cycle.py @@ -16,7 +16,7 @@ # under the License. import mxnet as mx -from test_tensorrt_lenet5 import * +from common import * def detect_cycle_from(sym, visited, stack): visited.add(sym.handle.value) @@ -57,7 +57,7 @@ def test_simple_cycle(): set_use_tensorrt(True) executor = C.simple_bind(ctx=mx.gpu(0), data=(1,10), softmax_label=(1,), shared_buffer=arg_params, grad_req='null', force_rebind=True) - assert has_no_cycle(executor.optimized_symbol), "The graph optimized by TRT contain a cycle" + assert has_no_cycle(executor.optimized_symbol), "The graph optimized by TRT contains a cycle" if __name__ == '__main__': test_simple_cycle() diff --git a/tests/python/tensorrt/test_tensorrt_lenet5.py b/tests/python/tensorrt/test_tensorrt_lenet5.py index 96ccd7e72897..b176a8cf437a 100644 --- a/tests/python/tensorrt/test_tensorrt_lenet5.py +++ b/tests/python/tensorrt/test_tensorrt_lenet5.py @@ -21,24 +21,7 @@ # pylint: enable=unused-import import numpy as np import mxnet as mx -from ctypes.util import find_library - -assert find_library('nvinfer') is not None, "Can't find the TensorRT shared library" - -def get_use_tensorrt(): - return int(os.environ.get("MXNET_USE_TENSORRT", 0)) - - -def set_use_tensorrt(status=False): - os.environ["MXNET_USE_TENSORRT"] = str(int(status)) - - -def merge_dicts(*dict_args): - """Merge arg_params and aux_params to populate shared_buffer""" - result = {} - for dictionary in dict_args: - result.update(dictionary) - return result +from common import * def get_iters(mnist, batch_size): @@ -137,8 +120,8 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_siz def test_tensorrt_inference(): - """Run inference comparison between MXNet and TensorRT. - This could be used stand-alone or with nosetests.""" + """Run LeNet-5 inference comparison between MXNet and TensorRT.""" + check_tensorrt_installation() mnist = mx.test_utils.get_mnist() num_epochs = 10 batch_size = 1024 @@ -156,6 +139,7 @@ def test_tensorrt_inference(): # Load serialized MXNet model (model-symbol.json + model-epoch.params) sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, num_epochs) + print("LeNet-5 test") print("Running inference in MXNet") set_use_tensorrt(False) mx_pct = run_inference(sym, arg_params, aux_params, mnist, diff --git a/tests/python/tensorrt/test_tensorrt_resnet_resnext_ssd.py b/tests/python/tensorrt/test_tensorrt_resnet_resnext_ssd.py deleted file mode 100644 index 6ec95c31c7b7..000000000000 --- a/tests/python/tensorrt/test_tensorrt_resnet_resnext_ssd.py +++ /dev/null @@ -1,282 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import gc -import gluoncv -import mxnet as mx -import multiprocessing -import numpy as np -import os -import sys - -from mxnet.gluon.data.vision import transforms -from mxnet import gluon -from time import time - -def get_use_tensorrt(): - return int(os.environ.get("MXNET_USE_TENSORRT", 0)) - -def set_use_tensorrt(status=False): - os.environ["MXNET_USE_TENSORRT"] = str(int(status)) - -def get_fp16_infer_for_fp16_graph(): - return int(os.environ.get("MXNET_TENSORRT_USE_FP16_FOR_FP32", 0)) - -def set_fp16_infer_for_fp16_graph(status=False): - os.environ["MXNET_TENSORRT_USE_FP16_FOR_FP32"] = str(int(status)) - -#ssd_512_resnet50_v1_coco -def get_ssd_model(model_name='ssd_512_mobilenet1_0_coco', use_tensorrt=True, - ctx=mx.gpu(0), batch_size=32, fp16_for_fp32_graph=False): - - set_use_tensorrt(use_tensorrt) - set_fp16_infer_for_fp16_graph(fp16_for_fp32_graph) - net = gluoncv.model_zoo.get_model(model_name, pretrained=True) - data = mx.sym.var('data') - anchors, class_preds, box_preds = net(data) - all_preds = mx.sym.concat(anchors, class_preds, box_preds, dim=2) - all_params = dict([(k, v.data()) for k, v in net.collect_params().items()]) - - if not get_use_tensorrt(): - all_params = dict([(k, v.as_in_context(mx.gpu(0))) for k, v in all_params.items()]) - - # class_preds - executor = all_preds.simple_bind(ctx=ctx, data=(batch_size, 3, 224, 224), grad_req='null', - shared_buffer=all_params, force_rebind=True) - return executor - - -def get_classif_model(model_name='cifar_resnet56_v1', use_tensorrt=True, - ctx=mx.gpu(0), batch_size=128, fp16_for_fp32_graph=False, imagenet=False): - - set_use_tensorrt(use_tensorrt) - set_fp16_infer_for_fp16_graph(fp16_for_fp32_graph) - net = gluoncv.model_zoo.get_model(model_name, pretrained=True) - data = mx.sym.var('data') - out = net(data) - - softmax = mx.sym.SoftmaxOutput(out, name='softmax') - - all_params = dict([(k, v.data()) for k, v in net.collect_params().items()]) - - if not get_use_tensorrt(): - all_params = dict([(k, v.as_in_context(mx.gpu(0))) for k, v in all_params.items()]) - - if imagenet: - h, w = 224, 224 - else: - h, w = 32, 32 - - executor = softmax.simple_bind(ctx=ctx, data=(batch_size, 3, h, w), softmax_label=(batch_size,), grad_req='null', - shared_buffer=all_params, force_rebind=True) - return executor - -def cifar10_infer(data_dir='./data', model_name='cifar_resnet56_v1', use_tensorrt=True, - ctx=mx.gpu(0), fp16_for_fp32_graph=False, batch_size=128, num_workers=1): - - executor = get_classif_model(model_name, use_tensorrt, ctx, batch_size, fp16_for_fp32_graph, imagenet=False) - - num_ex = 10000 - all_preds = np.zeros([num_ex, 10]) - - all_label_test = np.zeros(num_ex) - - transform_test = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) - ]) - - data_loader = lambda: gluon.data.DataLoader( - gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), - batch_size=batch_size, shuffle=False, num_workers=num_workers) - - val_data = data_loader() - - for idx, (data, label) in enumerate(val_data): - extent = data.shape[0] - offset = idx*batch_size - all_label_test[offset:offset+extent] = label.asnumpy() - - # warm-up, but don't use result - executor.arg_dict["data"][:extent, :] = data - executor.forward(is_train=False) - executor.outputs[0].wait_to_read() - - gc.collect() - - val_data = data_loader() - example_ct = 0 - - start = time() - - for idx, (data, label) in enumerate(val_data): - extent = data.shape[0] - executor.arg_dict["data"][:extent, :] = data - executor.forward(is_train=False) - preds = executor.outputs[0].asnumpy() - offset = idx*batch_size - all_preds[offset:offset+extent, :] = preds[:extent] - example_ct += extent - - all_preds = np.argmax(all_preds, axis=1) - matches = (all_preds[:example_ct] == all_label_test[:example_ct]).sum() - duration = time() - start - - return duration, 100.0 * matches / example_ct - -def ssd_infer(model_name='ssd_512_mobilenet1_0_voc', use_tensorrt=True, - ctx=mx.gpu(0), fp16_for_fp32_graph=False, batch_size=128, num_workers=1): - - print("Running SSD inference with model: %s" % model_name) - executor = get_ssd_model(model_name, use_tensorrt, ctx, batch_size, fp16_for_fp32_graph) - - start = None - num_runs = 50 - - for i in range(2): - data = np.random.randn(batch_size, 3, 224, 224) - executor.arg_dict["data"] = data - if i == 1: - start = time() - for runs in range(num_runs): - executor.forward(is_train = False) - executor.outputs[0].wait_to_read() -# all_preds = executor.outputs[0].asnumpy() -# anchors = all_preds[:, :, 0] -# class_preds = all_preds[:, :, 1] -# box_preds = all_preds[:, :, 2:] - - return time() - start - -def classif_imagenet_infer(model_name='ssd_512_mobilenet1_0_coco', use_tensorrt=True, - ctx=mx.gpu(0), fp16_for_fp32_graph=False, batch_size=128, num_workers=1): - - executor = get_ssd_model(model_name, use_tensorrt, ctx, batch_size, fp16_for_fp32_graph) - executor = get_classif_model(model_name, use_tensorrt, ctx, batch_size, fp16_for_fp32_graph, imagenet=False) - - start = None - num_runs = 2 - - for i in range(2): - data = np.random.randn(batch_size, 3, 224, 224) - executor.arg_dict["data"] = data - if i == 1: - start = time() - for runs in range(num_runs): - executor.forward(is_train = False) - executor.outputs[0].wait_to_read() - - return time() - start - - -def run_experiment_for(model_name, batch_size, num_workers, fp16_for_fp32_graph): - print("\n===========================================") - print("Model: %s" % model_name) - print("===========================================") - print("*** Running inference using pure MxNet ***\n") - mx_duration, mx_pct = cifar10_infer(model_name=model_name, batch_size=batch_size, - num_workers=num_workers, fp16_for_fp32_graph=fp16_for_fp32_graph, use_tensorrt=False) - print("\nMxNet: time elapsed: %.3fs, accuracy: %.2f%%" % (mx_duration, mx_pct)) - - print("\n*** Running inference using MxNet + TensorRT ***\n") - trt_duration, trt_pct = cifar10_infer(model_name=model_name, batch_size=batch_size, - num_workers=num_workers, use_tensorrt=True) - print("TensorRT: time elapsed: %.3fs, accuracy: %.2f%%" % (trt_duration, trt_pct)) - speedup = mx_duration / trt_duration - print("TensorRT speed-up (not counting compilation): %.2fx" % speedup) - - acc_diff = abs(mx_pct - trt_pct) - print("Absolute accuracy difference: %f" % acc_diff) - return speedup, acc_diff - - -def test_tensorrt_on_cifar_resnets(batch_size=32, tolerance=0.1, num_workers=1, test_fp16=False): - - models = [ - 'cifar_resnet20_v1', - 'cifar_resnet56_v1', - 'cifar_resnet110_v1', - 'cifar_resnet20_v2', - 'cifar_resnet56_v2', - 'cifar_resnet110_v2', - 'cifar_wideresnet16_10', - 'cifar_wideresnet28_10', - 'cifar_wideresnet40_8', - 'cifar_resnext29_16x64d' - ] - - num_models = len(models) - - speedups = np.zeros(num_models, dtype=np.float32) - acc_diffs = np.zeros(num_models, dtype=np.float32) - - precisions = ["fp32"] - if test_fp16: - precisions.append("fp16") - - for precision in precisions: - - test_start = time() - - print("\n\nRunning inference in %s\n\n" % precision) - use_fp16 = True if precision == "fp16" else False - for idx, model in enumerate(models): - speedup, acc_diff = run_experiment_for(model, batch_size, num_workers, fp16_for_fp32_graph=use_fp16) - speedups[idx] = speedup - acc_diffs[idx] = acc_diff - assert acc_diff < tolerance, "Accuracy difference between MxNet and TensorRT > %.2f%% for model %s" % (tolerance, model) - - print("Perf and correctness checks run on the following models:") - print(models) - mean_speedup = np.mean(speedups) - std_speedup = np.std(speedups) - print("\nSpeedups:") - print(speedups) - print("Speedup range: [%.2f, %.2f]" % (np.min(speedups), np.max(speedups))) - print("Mean speedup: %.2f" % mean_speedup) - print("St. dev. of speedups: %.2f" % std_speedup) - print("\nAcc. differences: %s" % str(acc_diffs)) - - test_duration = time() - test_start - - print("Test duration: %.2f seconds" % test_duration) - -if __name__ == '__main__': - num_workers = int(multiprocessing.cpu_count() / 2) - batch_size = 16 - -# print("\n\n ================= IMAGENET CLASSIFICATION =================\n\n") -# print("Running ResNet-152 inference in MxNet") -# mx_imagenet_time = classif_imagenet_infer(use_tensorrt=False, batch_size=batch_size) -# print("Running ResNet-152 inference in MxNet-TensorRT") -# trt_imagenet_time = classif_imagenet_infer(use_tensorrt=True, batch_size=batch_size) -# print("Speedup: %.2fx" % (mx_imagenet_time / trt_imagenet_time)) -# -# print("\n\n ================= CIFAR-10 CLASSIFICATION =================\n\n") -# # ResNets -# test_tensorrt_on_cifar_resnets(batch_size=batch_size, tolerance=0.1, num_workers=num_workers) -# -# print("\n\n ================= IMAGENET OBJECT DETECTION =================\n\n") - - # SSD - print("Running SSD in pure MxNet") - mx_ssd_time = ssd_infer(use_tensorrt=False, batch_size=batch_size) - print("Execution time: %.2f seconds" % mx_ssd_time) - print("Running SSD in MxNet + TensorRT") - trt_ssd_time = ssd_infer(use_tensorrt=True, batch_size=batch_size) - print("Execution time: %.2f seconds" % trt_ssd_time) - print("Speedup: %.2fx" % (mx_ssd_time / trt_ssd_time))