From 4da168b17cea1d55d8942a5a617d18a274ba7552 Mon Sep 17 00:00:00 2001 From: Kellen Sunderland Date: Thu, 19 Jul 2018 16:00:51 +0200 Subject: [PATCH 1/4] Match python test conventions --- tests/python/tensorrt/common.py | 13 +++++----- tests/python/tensorrt/test_cycle.py | 4 +-- tests/python/tensorrt/test_tensorrt_lenet5.py | 25 ++++++++++--------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/python/tensorrt/common.py b/tests/python/tensorrt/common.py index ab367ba17160..c64649a19c41 100644 --- a/tests/python/tensorrt/common.py +++ b/tests/python/tensorrt/common.py @@ -16,22 +16,21 @@ # under the License. import os -# pylint: disable=unused-import -import unittest -# pylint: enable=unused-import -import numpy as np -import mxnet as mx from ctypes.util import find_library + def check_tensorrt_installation(): assert find_library('nvinfer') is not None, "Can't find the TensorRT shared library" + def get_use_tensorrt(): return int(os.environ.get("MXNET_USE_TENSORRT", 0)) + def set_use_tensorrt(status=False): os.environ["MXNET_USE_TENSORRT"] = str(int(status)) + def merge_dicts(*dict_args): """Merge arg_params and aux_params to populate shared_buffer""" result = {} @@ -39,8 +38,10 @@ def merge_dicts(*dict_args): result.update(dictionary) return result + def get_fp16_infer_for_fp16_graph(): - return int(os.environ.get("MXNET_TENSORRT_USE_FP16_FOR_FP32", 0)) + return int(os.environ.get("MXNET_TENSORRT_USE_FP16_FOR_FP32", 0)) + def set_fp16_infer_for_fp16_graph(status=False): os.environ["MXNET_TENSORRT_USE_FP16_FOR_FP32"] = str(int(status)) diff --git a/tests/python/tensorrt/test_cycle.py b/tests/python/tensorrt/test_cycle.py index d56cf1652ad5..37c3f5da2689 100644 --- a/tests/python/tensorrt/test_cycle.py +++ b/tests/python/tensorrt/test_cycle.py @@ -60,5 +60,5 @@ def test_simple_cycle(): assert has_no_cycle(executor.optimized_symbol), "The graph optimized by TRT contains a cycle" if __name__ == '__main__': - test_simple_cycle() - + import nose + nose.runmodule() diff --git a/tests/python/tensorrt/test_tensorrt_lenet5.py b/tests/python/tensorrt/test_tensorrt_lenet5.py index b176a8cf437a..58150c7c28fb 100644 --- a/tests/python/tensorrt/test_tensorrt_lenet5.py +++ b/tests/python/tensorrt/test_tensorrt_lenet5.py @@ -16,11 +16,9 @@ # under the License. import os -# pylint: disable=unused-import -import unittest -# pylint: enable=unused-import -import numpy as np + import mxnet as mx +import numpy as np from common import * @@ -38,7 +36,7 @@ def get_iters(mnist, batch_size): def lenet5(): """LeNet-5 Symbol""" - #pylint: disable=no-member + # pylint: disable=no-member data = mx.sym.Variable('data') conv1 = mx.sym.Convolution(data=data, kernel=(5, 5), num_filter=20) tanh1 = mx.sym.Activation(data=conv1, act_type="tanh") @@ -57,7 +55,7 @@ def lenet5(): fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10) # loss lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax') - #pylint: enable=no-member + # pylint: enable=no-member return lenet @@ -77,7 +75,7 @@ def train_lenet5(num_epochs, batch_size, train_iter, val_iter, test_iter): # predict accuracy for lenet acc = mx.metric.Accuracy() lenet_model.score(test_iter, acc) - accuracy = acc.get()[1] + accuracy = float(acc.get()[1]) assert accuracy > 0.95, "LeNet-5 training accuracy on MNIST was too low" return lenet_model @@ -89,7 +87,7 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_siz if not get_use_tensorrt(): shared_buffer = dict([(k, v.as_in_context(mx.gpu(0))) for k, v in shared_buffer.items()]) executor = sym.simple_bind(ctx=mx.gpu(0), - data=(batch_size,) + mnist['test_data'].shape[1:], + data=(batch_size,) + mnist['test_data'].shape[1:], softmax_label=(batch_size,), shared_buffer=shared_buffer, grad_req='null', @@ -106,9 +104,9 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_siz for idx, dbatch in enumerate(test_iter): executor.arg_dict["data"][:] = dbatch.data[0] executor.forward(is_train=False) - offset = idx*batch_size + offset = idx * batch_size extent = batch_size if num_ex - offset > batch_size else num_ex - offset - all_preds[offset:offset+extent, :] = executor.outputs[0].asnumpy()[:extent] + all_preds[offset:offset + extent, :] = executor.outputs[0].asnumpy()[:extent] example_ct += extent all_preds = np.argmax(all_preds, axis=1) @@ -121,6 +119,7 @@ def run_inference(sym, arg_params, aux_params, mnist, all_test_labels, batch_siz def test_tensorrt_inference(): """Run LeNet-5 inference comparison between MXNet and TensorRT.""" + check_tensorrt_installation() mnist = mx.test_utils.get_mnist() num_epochs = 10 @@ -148,7 +147,7 @@ def test_tensorrt_inference(): print("Running inference in MXNet-TensorRT") set_use_tensorrt(True) trt_pct = run_inference(sym, arg_params, aux_params, mnist, - all_test_labels, batch_size=batch_size) + all_test_labels, batch_size=batch_size) print("MXNet accuracy: %f" % mx_pct) print("MXNet-TensorRT accuracy: %f" % trt_pct) @@ -159,4 +158,6 @@ def test_tensorrt_inference(): if __name__ == '__main__': - test_tensorrt_inference() + import nose + + nose.runmodule() From 9442c37ca89d15aedf6d7f13b5f8072a7a59f286 Mon Sep 17 00:00:00 2001 From: Kellen Sunderland Date: Thu, 19 Jul 2018 10:50:23 +0200 Subject: [PATCH 2/4] Disable restarts, remove uneeded deps. --- ci/docker/runtime_functions.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index fc33e69bf90d..051b3a0effb2 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -480,6 +480,8 @@ build_ubuntu_gpu_tensorrt() { USE_CUDNN=1 \ USE_DIST_KVSTORE=0 \ USE_TENSORRT=1 \ + USE_JEMALLOC=0 \ + USE_GPERFTOOLS=0 \ ONNX_NAMESPACE=onnx \ CUDA_ARCH="-gencode arch=compute_70,code=compute_70"\ -j$(nproc) @@ -694,7 +696,7 @@ unittest_ubuntu_tensorrt_gpu() { export PYTHONPATH=./python/ export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 export LD_LIBRARY_PATH=/work/mxnet/lib:$LD_LIBRARY_PATH - nosetests-3.4 --verbose --processes=1 --process-restartworker tests/python/tensorrt + nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_trt_gpu.xml --verbose tests/python/tensorrt/ } # quantization gpu currently only runs on P3 instances @@ -991,3 +993,5 @@ EOF declare -F | cut -d' ' -f3 echo fi + + From b4e5b566d6b931c14e1a6796d484ea63d707e5f8 Mon Sep 17 00:00:00 2001 From: Kellen Sunderland Date: Thu, 19 Jul 2018 10:49:58 +0200 Subject: [PATCH 3/4] Disable OpenCV to reduce deps in trt build --- ci/docker/runtime_functions.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 051b3a0effb2..4540ebc6104b 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -478,6 +478,7 @@ build_ubuntu_gpu_tensorrt() { USE_CUDA=1 \ USE_CUDA_PATH=/usr/local/cuda \ USE_CUDNN=1 \ + USE_OPENCV=0 \ USE_DIST_KVSTORE=0 \ USE_TENSORRT=1 \ USE_JEMALLOC=0 \ From 03bdeb723b48c66127d9cdc519ac5858e9bcd2f8 Mon Sep 17 00:00:00 2001 From: Kellen Sunderland Date: Wed, 18 Jul 2018 18:44:32 +0200 Subject: [PATCH 4/4] Apply google style naming to serialization --- src/common/serialization.h | 140 ++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 70 deletions(-) diff --git a/src/common/serialization.h b/src/common/serialization.h index 74a582fb8a53..56b6069304d0 100644 --- a/src/common/serialization.h +++ b/src/common/serialization.h @@ -47,49 +47,49 @@ namespace mxnet { namespace common { template -inline size_t serialized_size(const T& obj); +inline size_t SerializedSize(const T &obj); template -inline size_t serialized_size(const nnvm::Tuple& obj); +inline size_t SerializedSize(const nnvm::Tuple &obj); template -inline size_t serialized_size(const std::map& obj); +inline size_t SerializedSize(const std::map &obj); template<> -inline size_t serialized_size(const std::string& obj); +inline size_t SerializedSize(const std::string &obj); template -inline size_t serialized_size(const std::tuple& obj); +inline size_t SerializedSize(const std::tuple &obj); template -inline void serialize(const T& obj, char** buffer); +inline void Serialize(const T &obj, char **buffer); template -inline void serialize(const nnvm::Tuple& obj, char** buffer); +inline void Serialize(const nnvm::Tuple &obj, char **buffer); template -inline void serialize(const std::map& obj, char** buffer); +inline void Serialize(const std::map &obj, char **buffer); template<> -inline void serialize(const std::string& obj, char** buffer); +inline void Serialize(const std::string &obj, char **buffer); template -inline void serialize(const std::tuple& obj, char** buffer); +inline void Serialize(const std::tuple &obj, char **buffer); template -inline void deserialize(T* obj, const std::string& buffer, size_t* curr_pos); +inline void Deserialize(T *obj, const std::string &buffer, size_t *curr_pos); template -inline void deserialize(nnvm::Tuple* obj, const std::string& buffer, size_t* curr_pos); +inline void Deserialize(nnvm::Tuple *obj, const std::string &buffer, size_t *curr_pos); template -inline void deserialize(std::map* obj, const std::string& buffer, size_t* curr_pos); +inline void Deserialize(std::map *obj, const std::string &buffer, size_t *curr_pos); template<> -inline void deserialize(std::string* obj, const std::string& buffer, size_t* curr_pos); +inline void Deserialize(std::string *obj, const std::string &buffer, size_t *curr_pos); template -inline void deserialize(std::tuple* obj, const std::string& buffer, size_t* curr_pos); +inline void Deserialize(std::tuple *obj, const std::string &buffer, size_t *curr_pos); template @@ -98,16 +98,16 @@ struct is_container { }; template -inline size_t serialized_size(const T& obj) { +inline size_t SerializedSize(const T &obj) { return sizeof(T); } template -inline size_t serialized_size(const nnvm::Tuple& obj) { +inline size_t SerializedSize(const nnvm::Tuple &obj) { if (is_container::value) { size_t sum_val = 4; for (auto& el : obj) { - sum_val += serialized_size(el); + sum_val += SerializedSize(el); } return sum_val; } else { @@ -116,20 +116,20 @@ inline size_t serialized_size(const nnvm::Tuple& obj) { } template -inline size_t serialized_size(const std::map& obj) { +inline size_t SerializedSize(const std::map &obj) { size_t sum_val = 4; if (is_container::value && is_container::value) { for (const auto& p : obj) { - sum_val += serialized_size(p.first) + serialized_size(p.second); + sum_val += SerializedSize(p.first) + SerializedSize(p.second); } } else if (is_container::value) { for (const auto& p : obj) { - sum_val += serialized_size(p.first); + sum_val += SerializedSize(p.first); } sum_val += sizeof(V) * obj.size(); } else if (is_container::value) { for (const auto& p : obj) { - sum_val += serialized_size(p.second); + sum_val += SerializedSize(p.second); } sum_val += sizeof(K) * obj.size(); } else { @@ -139,35 +139,35 @@ inline size_t serialized_size(const std::map& obj) { } template<> -inline size_t serialized_size(const std::string& obj) { +inline size_t SerializedSize(const std::string &obj) { return obj.size() + 4; } template struct serialized_size_tuple { template - static inline size_t compute(const std::tuple& obj) { - return serialized_size(std::get(obj)) + serialized_size_tuple::compute(obj); + static inline size_t Compute(const std::tuple &obj) { + return SerializedSize(std::get(obj)) + serialized_size_tuple::Compute(obj); } }; template<> struct serialized_size_tuple<0> { template - static inline size_t compute(const std::tuple& obj) { - return serialized_size(std::get<0>(obj)); + static inline size_t Compute(const std::tuple &obj) { + return SerializedSize(std::get<0>(obj)); } }; template -inline size_t serialized_size(const std::tuple& obj) { - return serialized_size_tuple::compute(obj); +inline size_t SerializedSize(const std::tuple &obj) { + return serialized_size_tuple::Compute(obj); } -// SERIALIZE +// Serializer template -inline size_t serialize_container_size(const T& obj, char** buffer) { +inline size_t SerializedContainerSize(const T &obj, char **buffer) { uint32_t size = obj.size(); std::memcpy(*buffer, &size, 4); *buffer += 4; @@ -175,33 +175,33 @@ inline size_t serialize_container_size(const T& obj, char** buffer) { } template -inline void serialize(const T& obj, char** buffer) { +inline void Serialize(const T &obj, char **buffer) { std::memcpy(*buffer, &obj, sizeof(T)); *buffer += sizeof(T); } template -inline void serialize(const nnvm::Tuple& obj, char** buffer) { +inline void Serialize(const nnvm::Tuple &obj, char **buffer) { uint32_t size = obj.ndim(); std::memcpy(*buffer, &size, 4); *buffer += 4; for (auto& el : obj) { - serialize(el, buffer); + Serialize(el, buffer); } } template -inline void serialize(const std::map& obj, char** buffer) { - serialize_container_size(obj, buffer); +inline void Serialize(const std::map &obj, char **buffer) { + SerializedContainerSize(obj, buffer); for (auto& p : obj) { - serialize(p.first, buffer); - serialize(p.second, buffer); + Serialize(p.first, buffer); + Serialize(p.second, buffer); } } template<> -inline void serialize(const std::string& obj, char** buffer) { - auto size = serialize_container_size(obj, buffer); +inline void Serialize(const std::string &obj, char **buffer) { + auto size = SerializedContainerSize(obj, buffer); std::memcpy(*buffer, &obj[0], size); *buffer += size; } @@ -209,29 +209,29 @@ inline void serialize(const std::string& obj, char** buffer) { template struct serialize_tuple { template - static inline void compute(const std::tuple& obj, char** buffer) { - serialize_tuple::compute(obj, buffer); - serialize(std::get(obj), buffer); + static inline void Compute(const std::tuple &obj, char **buffer) { + serialize_tuple::Compute(obj, buffer); + Serialize(std::get(obj), buffer); } }; template<> struct serialize_tuple<0> { template - static inline void compute(const std::tuple& obj, char** buffer) { - serialize(std::get<0>(obj), buffer); + static inline void Compute(const std::tuple &obj, char **buffer) { + Serialize(std::get<0>(obj), buffer); } }; template -inline void serialize(const std::tuple& obj, char** buffer) { - serialize_tuple::compute(obj, buffer); +inline void Serialize(const std::tuple &obj, char **buffer) { + serialize_tuple::Compute(obj, buffer); } - // Deserializer + template -inline size_t deserialize_container_size(T* obj, const std::string& buffer, size_t* curr_pos) { +inline size_t DeserializedContainerSize(T *obj, const std::string &buffer, size_t *curr_pos) { uint32_t size = obj->size(); std::memcpy(&size, &buffer[*curr_pos], 4); *curr_pos += 4; @@ -239,35 +239,35 @@ inline size_t deserialize_container_size(T* obj, const std::string& buffer, size } template -inline void deserialize(T* obj, const std::string& buffer, size_t* curr_pos) { +inline void Deserialize(T *obj, const std::string &buffer, size_t *curr_pos) { std::memcpy(obj, &buffer[*curr_pos], sizeof(T)); *curr_pos += sizeof(T); } template -inline void deserialize(nnvm::Tuple* obj, const std::string& buffer, size_t* curr_pos) { +inline void Deserialize(nnvm::Tuple *obj, const std::string &buffer, size_t *curr_pos) { uint32_t size = obj->ndim(); std::memcpy(&size, &buffer[*curr_pos], 4); *curr_pos += 4; obj->SetDim(size); for (size_t i = 0; i < size; ++i) { - deserialize((*obj)[i], buffer, curr_pos); + Deserialize((*obj)[i], buffer, curr_pos); } } template -inline void deserialize(std::map* obj, const std::string& buffer, size_t* curr_pos) { - auto size = deserialize_container_size(obj, buffer, curr_pos); +inline void Deserialize(std::map *obj, const std::string &buffer, size_t *curr_pos) { + auto size = DeserializedContainerSize(obj, buffer, curr_pos); K first; for (size_t i = 0; i < size; ++i) { - deserialize(&first, buffer, curr_pos); - deserialize(&(*obj)[first], buffer, curr_pos); + Deserialize(&first, buffer, curr_pos); + Deserialize(&(*obj)[first], buffer, curr_pos); } } template<> -inline void deserialize(std::string* obj, const std::string& buffer, size_t* curr_pos) { - auto size = deserialize_container_size(obj, buffer, curr_pos); +inline void Deserialize(std::string *obj, const std::string &buffer, size_t *curr_pos) { + auto size = DeserializedContainerSize(obj, buffer, curr_pos); obj->resize(size); std::memcpy(&(obj->front()), &buffer[*curr_pos], size); *curr_pos += size; @@ -276,33 +276,33 @@ inline void deserialize(std::string* obj, const std::string& buffer, size_t* cur template struct deserialize_tuple { template - static inline void compute(std::tuple* obj, - const std::string& buffer, size_t* curr_pos) { - deserialize_tuple::compute(obj, buffer, curr_pos); - deserialize(&std::get(*obj), buffer, curr_pos); + static inline void Compute(std::tuple *obj, + const std::string &buffer, size_t *curr_pos) { + deserialize_tuple::Compute(obj, buffer, curr_pos); + Deserialize(&std::get(*obj), buffer, curr_pos); } }; template<> struct deserialize_tuple<0> { template - static inline void compute(std::tuple* obj, - const std::string& buffer, size_t* curr_pos) { - deserialize(&std::get<0>(*obj), buffer, curr_pos); + static inline void Compute(std::tuple *obj, + const std::string &buffer, size_t *curr_pos) { + Deserialize(&std::get<0>(*obj), buffer, curr_pos); } }; template -inline void deserialize(std::tuple* obj, const std::string& buffer, size_t* curr_pos) { - deserialize_tuple::compute(obj, buffer, curr_pos); +inline void Deserialize(std::tuple *obj, const std::string &buffer, size_t *curr_pos) { + deserialize_tuple::Compute(obj, buffer, curr_pos); } template inline void Serialize(const T& obj, std::string* serialized_data) { - serialized_data->resize(serialized_size(obj)); + serialized_data->resize(SerializedSize(obj)); char* curr_pos = &(serialized_data->front()); - serialize(obj, &curr_pos); + Serialize(obj, &curr_pos); CHECK_EQ((int64_t)curr_pos - (int64_t)&(serialized_data->front()), serialized_data->size()); } @@ -310,7 +310,7 @@ inline void Serialize(const T& obj, std::string* serialized_data) { template inline void Deserialize(T* obj, const std::string& serialized_data) { size_t curr_pos = 0; - deserialize(obj, serialized_data, &curr_pos); + Deserialize(obj, serialized_data, &curr_pos); CHECK_EQ(curr_pos, serialized_data.size()); }