From 40beeb2a7453f262e2820ff8faa6621abf81a88d Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Thu, 6 Jun 2019 23:13:49 +0000 Subject: [PATCH 01/20] Upgrade archive utility and add back FC improvement This reverts commit 65434886f6caa7210ed3ff39cd4e950c023d8328. --- ci/docker/Dockerfile.build.ubuntu_build_cuda | 2 ++ ci/docker/install/ubuntu_ar.sh | 35 ++++++++++++++++++++ src/operator/nn/fully_connected-inl.h | 14 +++++++- src/operator/nn/fully_connected.cc | 2 -- tests/python/unittest/test_operator.py | 21 ++++++++++++ 5 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 ci/docker/install/ubuntu_ar.sh diff --git a/ci/docker/Dockerfile.build.ubuntu_build_cuda b/ci/docker/Dockerfile.build.ubuntu_build_cuda index ad1a1c4558b5..5aec340f1731 100644 --- a/ci/docker/Dockerfile.build.ubuntu_build_cuda +++ b/ci/docker/Dockerfile.build.ubuntu_build_cuda @@ -43,6 +43,8 @@ COPY install/ubuntu_clang.sh /work/ RUN /work/ubuntu_clang.sh COPY install/ubuntu_mklml.sh /work/ RUN /work/ubuntu_mklml.sh +COPY install/ubuntu_ar.sh /work/ +RUN /work/ubuntu_ar.sh ENV CUDNN_VERSION=7.5.1.10 COPY install/ubuntu_cudnn.sh /work/ diff --git a/ci/docker/install/ubuntu_ar.sh b/ci/docker/install/ubuntu_ar.sh new file mode 100644 index 000000000000..e4677e675f02 --- /dev/null +++ b/ci/docker/install/ubuntu_ar.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# build and install are separated so changes to build don't invalidate +# the whole docker cache for the image + +wget https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz + +export DEBIAN_FRONTEND=noninteractive +apt-get update || true +apt-get install -y \ + wget + +mkdir /opt/binutils_install && mkdir /opt/binutils_install && mkdir /opt/binutils && cd /opt/binutils +wget -nv https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz +./configure --prefix=/opt/binutils_install --exec-prefix=/opt/binutils_other +make -j$(nproc) +make install +ln -s /opt/binutils_install/bin/ar /usr/local/bin/ar diff --git a/src/operator/nn/fully_connected-inl.h b/src/operator/nn/fully_connected-inl.h index e4bb11f6bc56..44af375486fb 100644 --- a/src/operator/nn/fully_connected-inl.h +++ b/src/operator/nn/fully_connected-inl.h @@ -36,6 +36,7 @@ #include "../elemwise_op_common.h" #include "../linalg.h" #include "../../common/utils.h" +#include "../tensor/broadcast_reduce_op.h" namespace mxnet { namespace op { @@ -169,7 +170,18 @@ void FCBackward(const OpContext &ctx, const FullyConnectedParam ¶m, // gradient of bias if (!param.no_bias) { Tensor gbias = in_grad[fullc::kBias].get(s); - Assign(gbias, req[fullc::kBias], sum_rows(grad)); + TBlob grad_blob = TBlob(grad); + TBlob gbias_blob = TBlob(gbias); + mxnet::TShape x(1, 0); + mxnet::TShape small; + if (shape_assign(&gbias_blob.shape_, Shape2(param.num_hidden, 1))) { + small = gbias_blob.shape_; + } else { + small = ReduceAxesShapeImpl(grad_blob.shape_, dmlc::optional(x), true, false); + } + ReduceAxesComputeImpl(ctx, {grad_blob}, {req[fullc::kBias]}, + {in_grad[fullc::kBias]}, small); } // gradient of data // Legacy approach shown here for comparison: diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index a097357ef5a3..27f6595aee9e 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -316,11 +316,9 @@ NNVM_REGISTER_OP(_backward_FullyConnected) const FullyConnectedParam& params = nnvm::get(attrs.parsed); return params.no_bias ? 2 : 3; }) -#if MXNET_USE_MKLDNN == 1 .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; }) -#endif .set_attr("TIsBackward", true) .set_attr("FInplaceOption", [](const NodeAttrs& attrs){ return std::vector >{{1, 0}}; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index ab33d2667fbe..e600cef3d04d 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -696,6 +696,27 @@ def test_symbol_pow(): check_symbolic_backward(test, [data_tmp, exp_tmp], [np.ones(shape)], [data_dir, exp_dir]) +@with_seed() +def test_fully_connected(): + data = mx.sym.var("data") + fc_weight = mx.sym.var("weight") + fc_bias = mx.sym.var("bias") + fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc') + data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32) + fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32) + fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32) + fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32) + data_np = data.asnumpy().reshape(5, 325) + fc_weight_np = np.transpose(fc_weight.asnumpy()) + fc_bias_np = fc_bias.asnumpy() + res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy() + check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res}) + check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, + numeric_eps=1e-2, rtol=1e-4, atol=1e-2) + # TODO: Fix Bug #15032 when bias has ndim > 1 + #check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res}) + + @with_seed() def test_pow_fn(): shape = (3, 4) From 928055f04a06244969ed2a834d931d5d329ad648 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 00:55:06 +0000 Subject: [PATCH 02/20] Change permissions for Ubuntu AR --- ci/docker/install/ubuntu_ar.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/docker/install/ubuntu_ar.sh diff --git a/ci/docker/install/ubuntu_ar.sh b/ci/docker/install/ubuntu_ar.sh old mode 100644 new mode 100755 From 2b418d3ea05add64131efb73c9402ae3497573c0 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 01:11:37 +0000 Subject: [PATCH 03/20] Extract and cd into binutils dir --- ci/docker/install/ubuntu_ar.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docker/install/ubuntu_ar.sh b/ci/docker/install/ubuntu_ar.sh index e4677e675f02..5fdda02bce8e 100755 --- a/ci/docker/install/ubuntu_ar.sh +++ b/ci/docker/install/ubuntu_ar.sh @@ -29,6 +29,7 @@ apt-get install -y \ mkdir /opt/binutils_install && mkdir /opt/binutils_install && mkdir /opt/binutils && cd /opt/binutils wget -nv https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz +tar -xvf binutils-2.27.tar.gz && cd binutils-2.27 ./configure --prefix=/opt/binutils_install --exec-prefix=/opt/binutils_other make -j$(nproc) make install From b8236238fe5fbfb1941b42ba837d6aef8fa16baa Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 04:29:56 +0000 Subject: [PATCH 04/20] Allow AR path to be chosen by user --- Makefile | 2 +- ci/docker/runtime_functions.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6fc3c3aca5f6..11803e212d64 100644 --- a/Makefile +++ b/Makefile @@ -543,7 +543,7 @@ endif # --Wl,--whole-archive -lmxnet --Wl,--no-whole-archive lib/libmxnet.a: $(ALLX_DEP) @mkdir -p $(@D) - ar crv $@ $(filter %.o, $?) + $(AR) crv $@ $(filter %.o, $?) lib/libmxnet.so: $(ALLX_DEP) @mkdir -p $(@D) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 1ad67280617d..7148770b342a 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -667,6 +667,7 @@ build_ubuntu_gpu_mkldnn() { USE_CUDNN=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ + AR=/usr/local/bin/ar \ -j$(nproc) } From d89ce85583f426c0ed0b184dd6e280d16d96e8bc Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 05:25:15 +0000 Subject: [PATCH 05/20] Add AR path to build --- ci/docker/runtime_functions.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 7148770b342a..5c2b29930497 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -667,7 +667,7 @@ build_ubuntu_gpu_mkldnn() { USE_CUDNN=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ - AR=/usr/local/bin/ar \ + AR=/opt/binutils_install/bin/ar \ -j$(nproc) } From f28bcfa2359a8526bf8c72f4823c6e65c8c51753 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 19:39:16 +0000 Subject: [PATCH 06/20] Fix AR paths --- ci/docker/install/ubuntu_ar.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/install/ubuntu_ar.sh b/ci/docker/install/ubuntu_ar.sh index 5fdda02bce8e..58836d243048 100755 --- a/ci/docker/install/ubuntu_ar.sh +++ b/ci/docker/install/ubuntu_ar.sh @@ -30,7 +30,7 @@ apt-get install -y \ mkdir /opt/binutils_install && mkdir /opt/binutils_install && mkdir /opt/binutils && cd /opt/binutils wget -nv https://mirror.clarkson.edu/gnu/binutils/binutils-2.27.tar.gz tar -xvf binutils-2.27.tar.gz && cd binutils-2.27 -./configure --prefix=/opt/binutils_install --exec-prefix=/opt/binutils_other +./configure --prefix=/opt/binutils_other --exec-prefix=/opt/binutils_install make -j$(nproc) make install ln -s /opt/binutils_install/bin/ar /usr/local/bin/ar From 7c0e25b7a4eafa3b66f7e0e77f33db910a4e2de5 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 19:44:44 +0000 Subject: [PATCH 07/20] Revert AR flag in makefile --- Makefile | 2 +- ci/docker/runtime_functions.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 11803e212d64..6fc3c3aca5f6 100644 --- a/Makefile +++ b/Makefile @@ -543,7 +543,7 @@ endif # --Wl,--whole-archive -lmxnet --Wl,--no-whole-archive lib/libmxnet.a: $(ALLX_DEP) @mkdir -p $(@D) - $(AR) crv $@ $(filter %.o, $?) + ar crv $@ $(filter %.o, $?) lib/libmxnet.so: $(ALLX_DEP) @mkdir -p $(@D) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 5c2b29930497..1ad67280617d 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -667,7 +667,6 @@ build_ubuntu_gpu_mkldnn() { USE_CUDNN=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ - AR=/opt/binutils_install/bin/ar \ -j$(nproc) } From 849c51307197f19606bc198637a43c7d7948d879 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 7 Jun 2019 21:43:55 +0000 Subject: [PATCH 08/20] Build from source doc updated --- docs/install/build_from_source.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/install/build_from_source.md b/docs/install/build_from_source.md index 7b00b03abefe..8e18f3d9635c 100644 --- a/docs/install/build_from_source.md +++ b/docs/install/build_from_source.md @@ -180,6 +180,8 @@ More information on turning these features on or off are found in the following There is a configuration file for make, [`make/config.mk`](https://github.com/apache/incubator-mxnet/blob/master/make/config.mk), that contains all the compilation options. You can edit it and then run `make` or `cmake`. `cmake` is recommended for building MXNet (and is required to build with MKLDNN), however you may use `make` instead. For building with Java/Scala/Clojure, only `make` is supported. +**NOTE:** When certain set of build flags are set, MXNet archive increases to more than 4 GB. Since MXNet uses archive internally archive runs into a bug ("File Truncated": [bugreport](https://sourceware.org/bugzilla/show_bug.cgi?id=14625)) for archives greater than 4 GB. Please use ar version 2.27 or greater to overcome this bug. Please see https://github.com/apache/incubator-mxnet/issues/15084 for more details. +
## Build MXNet From 9850ea4ed1d9c6a84161e3be809f7538f61868a3 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Wed, 12 Jun 2019 19:04:09 +0000 Subject: [PATCH 09/20] Commit for C Predict API --- src/c_api/c_predict_api.cc | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index 7de23ef935ef..5f29415b1b61 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -135,6 +135,7 @@ int _CreatePartialOut(const char* symbol_json_str, // load the parameters std::unordered_map arg_params, aux_params; + std::unordered_map arg_types, aux_types; { std::unordered_set arg_names, aux_names; std::vector arg_names_vec = sym.ListInputNames(Symbol::kReadOnlyArgs); @@ -156,15 +157,19 @@ int _CreatePartialOut(const char* symbol_json_str, std::string name(names[i].c_str() + 4); if (aux_names.count(name) != 0) { aux_params[name] = data[i]; + aux_types[name] = data[i].dtype(); } } if (!strncmp(names[i].c_str(), "arg:", 4)) { std::string name(names[i].c_str() + 4); if (arg_names.count(name) != 0) { arg_params[name] = data[i]; + arg_types[name] = data[i].dtype(); } } } + + } // shape inference and bind @@ -179,6 +184,7 @@ int _CreatePartialOut(const char* symbol_json_str, mxnet::ShapeVector out_shapes(sym.ListOutputNames().size()); mxnet::ShapeVector aux_shapes(aux_names.size()); mxnet::ShapeVector arg_shapes; + nnvm::DTypeVector result_arg_types, result_out_types, result_aux_types; std::unordered_map key2arg; for (size_t i = 0; i < arg_names.size(); ++i) { std::string key = arg_names[i]; @@ -187,6 +193,7 @@ int _CreatePartialOut(const char* symbol_json_str, try { mxnet::ShapeVector in_shapes; + nnvm::DTypeVector in_types; for (std::string key : sym.ListInputNames(Symbol::kAll)) { if (known_shape.count(key) != 0) { in_shapes.push_back(known_shape[key]); @@ -194,14 +201,29 @@ int _CreatePartialOut(const char* symbol_json_str, in_shapes.emplace_back(); } } + + for (std::string key : sym.ListInputNames(Symbol::kAll)) { + if (arg_types.count(key) != 0) { + in_types.push_back(arg_types[key]); + } else if (aux_types.count(key) != 0) { + in_types.push_back(aux_types[key]); + } + } nnvm::Graph g; g.outputs = sym.outputs; g = mxnet::exec::InferShape(std::move(g), std::move(in_shapes), "__shape__"); + g = mxnet::exec::InferType(std::move(g), std::move(in_types, "__dtype__"); bool infer_complete = (g.GetAttr("shape_num_unknown_nodes") == 0); + bool infer_type_complete = (g.GetAttr("dtype_num_unknown_nodes") == 0); CHECK(infer_complete) << "The shape information of is not enough to get the shapes"; + CHECK(infer_type_complete) + << "The infer type information is not enough to get the types"; CopyAttr(g.indexed_graph(), g.GetAttr("shape"), &arg_shapes, &out_shapes, &aux_shapes); + CopyAttr(g.indexed_graph(), + g.GetAttr("dtype"), + &result_arg_types, &result_out_types, &result_aux_types); } catch (const mxnet::op::InferShapeError &err) { throw dmlc::Error(err.msg); } @@ -210,14 +232,14 @@ int _CreatePartialOut(const char* symbol_json_str, std::vector arg_arrays, aux_arrays; for (size_t i = 0; i < arg_shapes.size(); ++i) { - NDArray nd = NDArray(arg_shapes[i], ctx); + NDArray nd = NDArray(arg_shapes[i], ctx, false, result_arg_types[i]); if (arg_params.count(arg_names[i]) != 0) { CopyFromTo(arg_params[arg_names[i]], &nd); } arg_arrays.push_back(nd); } for (size_t i = 0; i < aux_shapes.size(); ++i) { - NDArray nd = NDArray(aux_shapes[i], ctx); + NDArray nd = NDArray(aux_shapes[i], ctx, false, result_aux_types[i]); if (aux_params.count(aux_names[i]) != 0) { CopyFromTo(aux_params[aux_names[i]], &nd); } From 41f5c866e0867149e672ebff8846d216532aa877 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 14 Jun 2019 02:00:09 +0000 Subject: [PATCH 10/20] Add FP16 predict support --- amalgamation/python/mxnet_predict.py | 88 ++++++++++++++++++++++- include/mxnet/c_predict_api.h | 50 +++++++++++++ src/c_api/c_predict_api.cc | 96 ++++++++++++++++++++++--- tests/python/unittest/test_predictor.py | 30 ++++++++ 4 files changed, 252 insertions(+), 12 deletions(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index a91d3849b0d2..4940cf83a9eb 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -25,17 +25,63 @@ import os import sys +from array import array import ctypes import logging import numpy as np +# pylint: disable= no-member +_DTYPE_NP_TO_MX = { + None: -1, + np.float32: 0, + np.float64: 1, + np.float16: 2, + np.uint8: 3, + np.int32: 4, + np.int8: 5, + np.int64: 6, +} + __all__ = ["Predictor", "load_ndarray_file"] if sys.version_info[0] == 3: py_str = lambda x: x.decode('utf-8') + + def c_str_array(strings): + """Create ctypes const char ** from a list of Python strings. + + Parameters + ---------- + strings : list of strings + Python strings. + + Returns + ------- + (ctypes.c_char_p * len(strings)) + A const char ** pointer that can be passed to C API. + """ + arr = (ctypes.c_char_p * len(strings))() + arr[:] = strings + return arr else: py_str = lambda x: x + def c_str_array(strings): + """Create ctypes const char ** from a list of Python strings. + + Parameters + ---------- + strings : list of string + Python strings. + + Returns + ------- + (ctypes.c_char_p * len(strings)) + A const char ** pointer that can be passed to C API. + """ + arr = (ctypes.c_char_p * len(strings))() + arr[:] = [s.encode('utf-8') for s in strings] + return arr def c_str(string): """"Convert a python string to C string.""" @@ -48,6 +94,11 @@ def c_array(ctype, values): """Create ctypes array from a python array.""" return (ctype * len(values))(*values) +def c_array_buf(ctype, buf): + """Create ctypes array from a Python buffer.""" + return (ctype * len(buf)).from_buffer(buf) + + def _find_lib_path(): """Find mxnet library.""" @@ -76,6 +127,7 @@ def _find_lib_path(): def _load_lib(): """Load libary by searching possible path.""" lib_path = _find_lib_path() + print(lib_path) lib = ctypes.cdll.LoadLibrary(lib_path[0]) # DMatrix functions lib.MXGetLastError.restype = ctypes.c_char_p @@ -90,6 +142,7 @@ def _check_call(ret): _LIB = _load_lib() # type definitions mx_uint = ctypes.c_uint +mx_int = ctypes.c_int mx_float = ctypes.c_float mx_float_p = ctypes.POINTER(mx_float) PredictorHandle = ctypes.c_void_p @@ -116,10 +169,13 @@ class Predictor(object): dev_id : int, optional The device id of the predictor. + + type_dict : Dict of str->numpy.dtype + Input type dictionary, name->dtype """ def __init__(self, symbol_file, param_raw_bytes, input_shapes, - dev_type="cpu", dev_id=0): + dev_type="cpu", dev_id=0, type_dict=None): dev_type = devstr2type[dev_type] indptr = [0] sdata = [] @@ -133,7 +189,26 @@ def __init__(self, symbol_file, handle = PredictorHandle() param_raw_bytes = bytearray(param_raw_bytes) ptr = (ctypes.c_char * len(param_raw_bytes)).from_buffer(param_raw_bytes) - _check_call(_LIB.MXPredCreate( + + # data types + num_provided_arg_types = 0 + # provided type argument names + provided_arg_type_names = ctypes.POINTER(ctypes.c_char_p)() + # provided types + provided_arg_type_data = ctypes.POINTER(mx_uint)() + if type_dict is not None: + provided_arg_type_names = [] + provided_arg_type_data = [] + for k, v in type_dict.items(): + v = np.dtype(v).type + if v in _DTYPE_NP_TO_MX: + provided_arg_type_names.append(k) + provided_arg_type_data.append(_DTYPE_NP_TO_MX[v]) + num_provided_arg_types = mx_uint(len(provided_arg_type_names)) + provided_arg_type_names = c_str_array(provided_arg_type_names) + provided_arg_type_data = c_array_buf(ctypes.c_int, array('i', provided_arg_type_data)) + + _check_call(_LIB.MXPredCreateEx( c_str(symbol_file), ptr, len(param_raw_bytes), ctypes.c_int(dev_type), ctypes.c_int(dev_id), @@ -141,6 +216,9 @@ def __init__(self, symbol_file, c_array(ctypes.c_char_p, keys), c_array(mx_uint, indptr), c_array(mx_uint, sdata), + num_provided_arg_types, + provided_arg_type_names, + provided_arg_type_data, ctypes.byref(handle))) self.handle = handle @@ -218,12 +296,16 @@ def get_output(self, index): """ pdata = ctypes.POINTER(mx_uint)() ndim = mx_uint() + out_type = mx_int() _check_call(_LIB.MXPredGetOutputShape( self.handle, index, ctypes.byref(pdata), ctypes.byref(ndim))) + _check_call(_LIB.MXPredGetOutputType( + self.handle, index, + ctypes.byref(out_type))) shape = tuple(pdata[:ndim.value]) - data = np.empty(shape, dtype=np.float32) + data = np.empty(shape, dtype=out_type.value) _check_call(_LIB.MXPredGetOutput( self.handle, mx_uint(index), data.ctypes.data_as(mx_float_p), diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h index ecbbf8dfc819..663e820fb535 100644 --- a/include/mxnet/c_predict_api.h +++ b/include/mxnet/c_predict_api.h @@ -85,6 +85,44 @@ MXNET_DLL int MXPredCreate(const char* symbol_json_str, const mx_uint* input_shape_data, PredictorHandle* out); +/*! + * \brief create a predictor + * \param symbol_json_str The JSON string of the symbol. + * \param param_bytes The in-memory raw bytes of parameter ndarray file. + * \param param_size The size of parameter ndarray file. + * \param dev_type The device type, 1: cpu, 2: gpu + * \param dev_id The device id of the predictor. + * \param num_input_nodes Number of input nodes to the net. + * For feedforward net, this is 1. + * \param input_keys The name of the input argument. + * For feedforward net, this is {"data"} + * \param input_shape_indptr Index pointer of shapes of each input node. + * The length of this array = num_input_nodes + 1. + * For feedforward net that takes 4 dimensional input, this is {0, 4}. + * \param input_shape_data A flattened data of shapes of each input node. + * For feedforward net that takes 4 dimensional input, this is the shape data. + * \param num_provided_arg_dtypes + * The length of provided_arg_dtypes. + * \param provided_arg_dtype_names + * The provided_arg_dtype_names the names of args for which dtypes are provided. + * \param provided_arg_dtypes + * The provided_arg_dtypes the dtype provided + * \param out The created predictor handle. + * \return 0 when success, -1 when failure. + */ +MXNET_DLL int MXPredCreateEx(const char* symbol_json_str, + const void* param_bytes, + int param_size, + int dev_type, int dev_id, + mx_uint num_input_nodes, + const char** input_keys, + const mx_uint* input_shape_indptr, + const mx_uint* input_shape_data, + const mx_uint num_provided_arg_dtypes, + const char** provided_arg_dtype_names, + const int* provided_arg_dtypes, + PredictorHandle* out); + /*! * \brief create a predictor wich customized outputs * \param symbol_json_str The JSON string of the symbol. @@ -186,6 +224,18 @@ MXNET_DLL int MXPredGetOutputShape(PredictorHandle handle, mx_uint index, mx_uint** shape_data, mx_uint* shape_ndim); + +/*! + * \brief Get the dtype of output node. + * The returned data type is only valid before next call to MXPred function. + * \param handle The handle of the predictor. + * \param out_index The index of the output node, set to 0 if there is only one output. + * \param out_dtype The dtype of the output node + */ +MXNET_DLL int MXPredGetOutputType(PredictorHandle handle, + mx_uint out_index, + const int* out_dtype); + /*! * \brief Set the input data of predictor. * \param handle The predictor handle. diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index 5f29415b1b61..3c9165c1e94d 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -47,6 +47,9 @@ struct MXAPIPredictor { std::vector aux_arrays; // output shapes mxnet::ShapeVector out_shapes; + // output types + nnvm::DTypeVector out_dtypes; + // uint32_t buffer for output shapes std::vector out_shapes_buffer; // key to arguments @@ -97,6 +100,9 @@ int _CreatePartialOut(const char* symbol_json_str, // This is used for parallel inference. int num_threads, bool lazy, + const mx_uint num_provided_arg_dtypes, + const char** provided_arg_dtype_names, + const int* provided_arg_dtypes, PredictorHandle* out) { using nnvm::Symbol; @@ -169,7 +175,14 @@ int _CreatePartialOut(const char* symbol_json_str, } } - + if (num_provided_arg_dtypes > 0) { + for (mx_uint i = 0; i < num_provided_arg_dtypes; ++i) { + if (aux_types.count(provided_arg_dtype_names[i]) == 0 && + arg_types.count(provided_arg_dtype_names[i]) == 0) { + arg_types[provided_arg_dtype_names[i]] = provided_arg_dtypes[i]; + } + } + } } // shape inference and bind @@ -211,13 +224,10 @@ int _CreatePartialOut(const char* symbol_json_str, } nnvm::Graph g; g.outputs = sym.outputs; g = mxnet::exec::InferShape(std::move(g), std::move(in_shapes), "__shape__"); - g = mxnet::exec::InferType(std::move(g), std::move(in_types, "__dtype__"); + g = mxnet::exec::InferType(std::move(g), std::move(in_types), "__dtype__"); bool infer_complete = (g.GetAttr("shape_num_unknown_nodes") == 0); - bool infer_type_complete = (g.GetAttr("dtype_num_unknown_nodes") == 0); CHECK(infer_complete) << "The shape information of is not enough to get the shapes"; - CHECK(infer_type_complete) - << "The infer type information is not enough to get the types"; CopyAttr(g.indexed_graph(), g.GetAttr("shape"), &arg_shapes, &out_shapes, &aux_shapes); @@ -232,19 +242,31 @@ int _CreatePartialOut(const char* symbol_json_str, std::vector arg_arrays, aux_arrays; for (size_t i = 0; i < arg_shapes.size(); ++i) { - NDArray nd = NDArray(arg_shapes[i], ctx, false, result_arg_types[i]); + NDArray nd; + if (result_arg_types[i] != -1) { + nd = NDArray(arg_shapes[i], ctx, false, result_arg_types[i]); + } else { + nd = NDArray(arg_shapes[i], ctx); + } if (arg_params.count(arg_names[i]) != 0) { CopyFromTo(arg_params[arg_names[i]], &nd); } arg_arrays.push_back(nd); } + for (size_t i = 0; i < aux_shapes.size(); ++i) { - NDArray nd = NDArray(aux_shapes[i], ctx, false, result_aux_types[i]); + NDArray nd; + if (result_aux_types[i] != -1) { + nd = NDArray(aux_shapes[i], ctx, false, result_aux_types[i]); + } else { + nd = NDArray(aux_shapes[i], ctx); + } if (aux_params.count(aux_names[i]) != 0) { CopyFromTo(aux_params[aux_names[i]], &nd); } aux_arrays.push_back(nd); } + // bind for (int i = 0; i < num_threads; i++) { std::unique_ptr ret(new MXAPIPredictor()); @@ -254,6 +276,7 @@ int _CreatePartialOut(const char* symbol_json_str, ret->arg_arrays = arg_arrays; ret->aux_arrays = aux_arrays; ret->out_shapes = out_shapes; + ret->out_dtypes = result_out_types; if (!lazy) { std::map ctx_map; @@ -294,6 +317,9 @@ int MXPredCreatePartialOut(const char* symbol_json_str, output_keys, 1, false, + 0, + nullptr, + nullptr, out); } @@ -317,9 +343,44 @@ int MXPredCreate(const char* symbol_json_str, input_shape_indptr, input_shape_data, 0, - NULL, + nullptr, 1, false, + 0, + nullptr, + nullptr, + out); +} + +int MXPredCreateEx(const char* symbol_json_str, + const void* param_bytes, + int param_size, + int dev_type, int dev_id, + mx_uint num_input_nodes, + const char** input_keys, + const mx_uint* input_shape_indptr, + const mx_uint* input_shape_data, + const mx_uint num_provided_arg_dtypes, + const char** provided_arg_dtype_names, + const int* provided_arg_dtypes, + PredictorHandle* out) { + return _CreatePartialOut( + symbol_json_str, + param_bytes, + param_size, + dev_type, + dev_id, + num_input_nodes, + input_keys, + input_shape_indptr, + input_shape_data, + 0, + nullptr, + 1, + false, + num_provided_arg_dtypes, + provided_arg_dtype_names, + provided_arg_dtypes, out); } @@ -352,9 +413,12 @@ int MXPredCreateMultiThread(const char* symbol_json_str, input_shape_indptr, input_shape_data, 0, - NULL, + nullptr, num_threads, true, + 0, + nullptr, + nullptr, out); } @@ -466,6 +530,20 @@ int MXPredGetOutputShape(PredictorHandle handle, API_END(); } +int MXPredGetOutputType(PredictorHandle handle, + mx_uint out_index, + int* out_dtype) { + MXAPIPredictor* p = static_cast(handle); + API_BEGIN(); + CHECK_LT(out_index, p->out_arrays.size()) + << "Index exceed number of outputs"; + + const int s = p->out_dtypes[out_index]; + CHECK_GE(s, 0); + out_dtype[out_index] = s; + API_END(); +} + int MXPredSetInput(PredictorHandle handle, const char* key, const mx_float* data, diff --git a/tests/python/unittest/test_predictor.py b/tests/python/unittest/test_predictor.py index fc2fbf600cbc..a351f4f66520 100644 --- a/tests/python/unittest/test_predictor.py +++ b/tests/python/unittest/test_predictor.py @@ -81,6 +81,36 @@ def test_load_ndarray(): for k in nd_data.keys(): assert_almost_equal(nd_data[k].asnumpy(), nd_load[k], rtol=1e-5, atol=1e-6) +@with_seed() +def test_predictor(): + prefix = 'test_predictor_simple_dense' + symbol_file = "%s-symbol.json" % prefix + param_file = "%s-0000.params" % prefix + + input1 = np.random.uniform(size=(1, 3)) + input1 = input1.astype(np.float16) + + block = mx.gluon.nn.HybridSequential() + block.add(mx.gluon.nn.Dense(7)) + block.add(mx.gluon.nn.Dense(3)) + block.cast(np.float16) + block.hybridize() + block.initialize(ctx=mx.gpu(0)) + tmp = mx.nd.array(input1, dtype=np.float16, ctx=mx.gpu(0)) + out1 = block.forward(tmp) + block.export(prefix) + + predictor = Predictor(open(symbol_file, "r").read(), + open(param_file, "rb").read(), + {"data": input1.shape}, + dev_type="gpu", + dev_id=0, + type_dict={"data": input1.dtype}) + predictor.forward(data=input1) + predictor_out1 = predictor.get_output(0) + + assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) + if __name__ == '__main__': import nose From 14ff1f75ac55162f62ebb31ba292e3566b2adf00 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Thu, 20 Jun 2019 09:08:40 +0000 Subject: [PATCH 11/20] Add Test Predictor fixes --- amalgamation/python/mxnet_predict.py | 31 +++++++++++++++++++++---- include/mxnet/c_predict_api.h | 2 +- tests/python/unittest/test_predictor.py | 2 +- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index 4940cf83a9eb..6a605c519bc8 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -42,6 +42,17 @@ np.int64: 6, } +_DTYPE_MX_TO_NP = { + -1: None, + 0: np.float32, + 1: np.float64, + 2: np.float16, + 3: np.uint8, + 4: np.int32, + 5: np.int8, + 6: np.int64, +} + __all__ = ["Predictor", "load_ndarray_file"] if sys.version_info[0] == 3: @@ -220,6 +231,7 @@ def __init__(self, symbol_file, provided_arg_type_names, provided_arg_type_data, ctypes.byref(handle))) + self.type_dict = type_dict self.handle = handle def __del__(self): @@ -238,13 +250,21 @@ def forward(self, **kwargs): >>> predictor.forward(data=mydata) >>> out = predictor.get_output(0) """ + if self.type_dict and len(self.type_dict) != len(kwargs.items()): + raise ValueError("number of kwargs should be same as len of type_dict" \ + "Please check your forward pass inputs" \ + "or type_dict passed to Predictor instantiation") + for k, v in kwargs.items(): if not isinstance(v, np.ndarray): raise ValueError("Expect numpy ndarray as input") - v = np.asarray(v, dtype=np.float32, order='C') + if k in self.type_dict: + v = np.asarray(v, dtype=self.type_dict[k], order='C') + else: + v = np.asarray(v, dtype=np.float32, order='C') _check_call(_LIB.MXPredSetInput( self.handle, c_str(k), - v.ctypes.data_as(mx_float_p), + v.ctypes.data_as(ctypes.c_void_p), mx_uint(v.size))) _check_call(_LIB.MXPredForward(self.handle)) @@ -305,10 +325,10 @@ def get_output(self, index): self.handle, index, ctypes.byref(out_type))) shape = tuple(pdata[:ndim.value]) - data = np.empty(shape, dtype=out_type.value) + data = np.empty(shape, dtype=_DTYPE_MX_TO_NP[out_type.value]) _check_call(_LIB.MXPredGetOutput( self.handle, mx_uint(index), - data.ctypes.data_as(mx_float_p), + data.ctypes.data_as(ctypes.c_void_p), mx_uint(data.size))) return data @@ -355,4 +375,5 @@ def load_ndarray_file(nd_bytes): if len(keys) == 0 or len(keys[0]) == 0: return arrs else: - return {keys[i] : arrs[i] for i in range(len(keys))} + return {keys[i] : arrs[i] for i in range(len(keys)) + } diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h index 663e820fb535..7051d4edac82 100644 --- a/include/mxnet/c_predict_api.h +++ b/include/mxnet/c_predict_api.h @@ -234,7 +234,7 @@ MXNET_DLL int MXPredGetOutputShape(PredictorHandle handle, */ MXNET_DLL int MXPredGetOutputType(PredictorHandle handle, mx_uint out_index, - const int* out_dtype); + int* out_dtype); /*! * \brief Set the input data of predictor. diff --git a/tests/python/unittest/test_predictor.py b/tests/python/unittest/test_predictor.py index a351f4f66520..b0529d805f4c 100644 --- a/tests/python/unittest/test_predictor.py +++ b/tests/python/unittest/test_predictor.py @@ -82,7 +82,7 @@ def test_load_ndarray(): assert_almost_equal(nd_data[k].asnumpy(), nd_load[k], rtol=1e-5, atol=1e-6) @with_seed() -def test_predictor(): +def test_predictor_fp16(): prefix = 'test_predictor_simple_dense' symbol_file = "%s-symbol.json" % prefix param_file = "%s-0000.params" % prefix From 9985decfccc13aa898a9d895262d68fefc7b055b Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Thu, 20 Jun 2019 16:40:51 +0000 Subject: [PATCH 12/20] Add test for predictor --- tests/python/unittest/test_predictor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/python/unittest/test_predictor.py b/tests/python/unittest/test_predictor.py index b0529d805f4c..ec12df310097 100644 --- a/tests/python/unittest/test_predictor.py +++ b/tests/python/unittest/test_predictor.py @@ -108,6 +108,8 @@ def test_predictor_fp16(): type_dict={"data": input1.dtype}) predictor.forward(data=input1) predictor_out1 = predictor.get_output(0) + assert out1.asnumpy().dtype == predictor_out1.dtype, \ + "Dtypes of output from C predict API doesnt match with gluon" assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) From ac9c81c7fd5682a77f0781caccf05ac78d1ca491 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Thu, 20 Jun 2019 20:12:25 +0000 Subject: [PATCH 13/20] Cleanup fixes --- amalgamation/python/mxnet_predict.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index 6a605c519bc8..9ba46e79df41 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -138,7 +138,6 @@ def _find_lib_path(): def _load_lib(): """Load libary by searching possible path.""" lib_path = _find_lib_path() - print(lib_path) lib = ctypes.cdll.LoadLibrary(lib_path[0]) # DMatrix functions lib.MXGetLastError.restype = ctypes.c_char_p @@ -264,7 +263,7 @@ def forward(self, **kwargs): v = np.asarray(v, dtype=np.float32, order='C') _check_call(_LIB.MXPredSetInput( self.handle, c_str(k), - v.ctypes.data_as(ctypes.c_void_p), + v.ctypes.data_as(mx_float_p), mx_uint(v.size))) _check_call(_LIB.MXPredForward(self.handle)) @@ -328,7 +327,7 @@ def get_output(self, index): data = np.empty(shape, dtype=_DTYPE_MX_TO_NP[out_type.value]) _check_call(_LIB.MXPredGetOutput( self.handle, mx_uint(index), - data.ctypes.data_as(ctypes.c_void_p), + data.ctypes.data_as(mx_float_p), mx_uint(data.size))) return data From 15964becd8329bab860344c0a17b3bdaffd20a42 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 28 Jun 2019 18:33:22 +0000 Subject: [PATCH 14/20] Fixes --- amalgamation/python/mxnet_predict.py | 2 +- src/c_api/c_predict_api.cc | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index 9ba46e79df41..3a15a4a8b2a8 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -257,7 +257,7 @@ def forward(self, **kwargs): for k, v in kwargs.items(): if not isinstance(v, np.ndarray): raise ValueError("Expect numpy ndarray as input") - if k in self.type_dict: + if self.type_dict and k in self.type_dict: v = np.asarray(v, dtype=self.type_dict[k], order='C') else: v = np.asarray(v, dtype=np.float32, order='C') diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index 3c9165c1e94d..eb085f4bcc1e 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -226,8 +226,15 @@ int _CreatePartialOut(const char* symbol_json_str, g = mxnet::exec::InferShape(std::move(g), std::move(in_shapes), "__shape__"); g = mxnet::exec::InferType(std::move(g), std::move(in_types), "__dtype__"); bool infer_complete = (g.GetAttr("shape_num_unknown_nodes") == 0); + // This is tricky for AMP Use case, for example, with only weights input types + // cannot be inferred in AMP. Thus for AMP converted model type_dict will be + // required + bool infer_type_complete = (g.GetAttr("dtype_num_unknown_nodes") == 0); CHECK(infer_complete) << "The shape information of is not enough to get the shapes"; + CHECK(infer_type_complete) + << "The type information is not enough, please provide input arg_types " + "with provided_arg_dtype_names and provided_arg_dtypes"; CopyAttr(g.indexed_graph(), g.GetAttr("shape"), &arg_shapes, &out_shapes, &aux_shapes); From f7a9058e53bbfde1d2d2f220bd6e78656114f737 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 28 Jun 2019 22:41:43 +0000 Subject: [PATCH 15/20] Add support for forward pass only for gpu --- tests/python/unittest/test_predictor.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/python/unittest/test_predictor.py b/tests/python/unittest/test_predictor.py index ec12df310097..cd6a976096c2 100644 --- a/tests/python/unittest/test_predictor.py +++ b/tests/python/unittest/test_predictor.py @@ -95,8 +95,8 @@ def test_predictor_fp16(): block.add(mx.gluon.nn.Dense(3)) block.cast(np.float16) block.hybridize() - block.initialize(ctx=mx.gpu(0)) - tmp = mx.nd.array(input1, dtype=np.float16, ctx=mx.gpu(0)) + block.initialize(ctx=mx.current_context()) + tmp = mx.nd.array(input1, dtype=np.float16, ctx=mx.current_context()) out1 = block.forward(tmp) block.export(prefix) @@ -106,12 +106,13 @@ def test_predictor_fp16(): dev_type="gpu", dev_id=0, type_dict={"data": input1.dtype}) - predictor.forward(data=input1) - predictor_out1 = predictor.get_output(0) - assert out1.asnumpy().dtype == predictor_out1.dtype, \ - "Dtypes of output from C predict API doesnt match with gluon" + if ctx.current_context().dev_type == "gpu": + predictor.forward(data=input1) + predictor_out1 = predictor.get_output(0) + assert out1.asnumpy().dtype == predictor_out1.dtype, \ + "Dtypes of output from C predict API doesnt match with gluon" - assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) + assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) if __name__ == '__main__': From d6f0ceae991e9c62fa1af723ec28928001fc6f1d Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Sat, 29 Jun 2019 01:52:59 +0000 Subject: [PATCH 16/20] Fix Reshape, move test to gpu --- src/c_api/c_predict_api.cc | 1 + tests/python/unittest/test_predictor.py | 33 ------------------------- 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index eb085f4bcc1e..7a7e2580e559 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -514,6 +514,7 @@ int MXPredReshape(mx_uint num_input_nodes, p->exec.get())); ret->out_shapes = out_shapes; ret->out_arrays = ret->exec->outputs(); + ret->out_dtypes = p->out_dtypes; } *out = ret.release(); API_END(); diff --git a/tests/python/unittest/test_predictor.py b/tests/python/unittest/test_predictor.py index cd6a976096c2..fc2fbf600cbc 100644 --- a/tests/python/unittest/test_predictor.py +++ b/tests/python/unittest/test_predictor.py @@ -81,39 +81,6 @@ def test_load_ndarray(): for k in nd_data.keys(): assert_almost_equal(nd_data[k].asnumpy(), nd_load[k], rtol=1e-5, atol=1e-6) -@with_seed() -def test_predictor_fp16(): - prefix = 'test_predictor_simple_dense' - symbol_file = "%s-symbol.json" % prefix - param_file = "%s-0000.params" % prefix - - input1 = np.random.uniform(size=(1, 3)) - input1 = input1.astype(np.float16) - - block = mx.gluon.nn.HybridSequential() - block.add(mx.gluon.nn.Dense(7)) - block.add(mx.gluon.nn.Dense(3)) - block.cast(np.float16) - block.hybridize() - block.initialize(ctx=mx.current_context()) - tmp = mx.nd.array(input1, dtype=np.float16, ctx=mx.current_context()) - out1 = block.forward(tmp) - block.export(prefix) - - predictor = Predictor(open(symbol_file, "r").read(), - open(param_file, "rb").read(), - {"data": input1.shape}, - dev_type="gpu", - dev_id=0, - type_dict={"data": input1.dtype}) - if ctx.current_context().dev_type == "gpu": - predictor.forward(data=input1) - predictor_out1 = predictor.get_output(0) - assert out1.asnumpy().dtype == predictor_out1.dtype, \ - "Dtypes of output from C predict API doesnt match with gluon" - - assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) - if __name__ == '__main__': import nose From a14d2c71f45507ac92259f93a3876bd6f8b1eabe Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 5 Jul 2019 23:01:31 +0000 Subject: [PATCH 17/20] Add monitor callback for C Predict API --- amalgamation/python/mxnet_predict.py | 16 ++++++++++++++++ include/mxnet/c_predict_api.h | 15 +++++++++++++++ src/c_api/c_predict_api.cc | 22 +++++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index 3a15a4a8b2a8..b64c69042fce 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -149,6 +149,14 @@ def _check_call(ret): if ret != 0: raise RuntimeError(py_str(_LIB.MXGetLastError())) + +def _monitor_callback_wrapper(callback): + """A wrapper for the user-defined handle.""" + def callback_handle(name, array, _): + """ ctypes function """ + callback(name, array) + return callback_handle + _LIB = _load_lib() # type definitions mx_uint = ctypes.c_uint @@ -331,6 +339,14 @@ def get_output(self, index): mx_uint(data.size))) return data + def set_monitor_callback(self, callback, monitor_all=False): + cb_type = ctypes.CFUNCTYPE(None, ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p) + self._monitor_callback = cb_type(_monitor_callback_wrapper(callback)) + _check_call(_LIB.MXPredSetMonitorCallback(self.handle, + self._monitor_callback, + None, + ctypes.c_int(monitor_all))) + def load_ndarray_file(nd_bytes): """Load ndarray file and return as list of numpy array. diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h index 7051d4edac82..0872642d7ddc 100644 --- a/include/mxnet/c_predict_api.h +++ b/include/mxnet/c_predict_api.h @@ -49,6 +49,12 @@ typedef float mx_float; typedef void *PredictorHandle; /*! \brief handle to NDArray list */ typedef void *NDListHandle; +/*! \brief handle to NDArray */ +typedef void *NDArrayHandle; +/*! \brief callback used for add monitoring to nodes in the graph */ +typedef void (*PredMonitorCallback)(const char*, + NDArrayHandle, + void*); /*! * \brief Get the last error happeneed. @@ -319,6 +325,15 @@ MXNET_DLL int MXNDListGet(NDListHandle handle, const mx_float** out_data, const mx_uint** out_shape, mx_uint* out_ndim); + +/*! + * \brief set a call back to notify the completion of operation and allow for + * additional monitoring + */ +MXNET_DLL int MXPredSetMonitorCallback(PredictorHandle handle, + PredMonitorCallback callback, + void* callback_handle, + bool monitor_all); /*! * \brief Free a MXAPINDList * \param handle The handle of the MXAPINDList. diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index 7a7e2580e559..b2a685f135ae 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -220,6 +220,9 @@ int _CreatePartialOut(const char* symbol_json_str, in_types.push_back(arg_types[key]); } else if (aux_types.count(key) != 0) { in_types.push_back(aux_types[key]); + } else { + // if key not in arg_types or aux_types set to FP32 + in_types.push_back(0); } } nnvm::Graph g; g.outputs = sym.outputs; @@ -544,7 +547,8 @@ int MXPredGetOutputType(PredictorHandle handle, MXAPIPredictor* p = static_cast(handle); API_BEGIN(); CHECK_LT(out_index, p->out_arrays.size()) - << "Index exceed number of outputs"; + << "Index exceed number of outputs, provided out_index should be less than " + << p->out_arrays.size(); const int s = p->out_dtypes[out_index]; CHECK_GE(s, 0); @@ -651,6 +655,22 @@ int MXNDListGet(NDListHandle handle, API_END(); } +int MXPredSetMonitorCallback(PredictorHandle handle, + PredMonitorCallback callback, + void* callback_handle, + bool monitor_all) { + MXAPIPredictor* p = static_cast(handle); + API_BEGIN(); + PredMonitorCallback callback_temp = callback; + void* callback_handle_temp = callback_handle; + std::function clbk + = [callback_temp, callback_handle_temp](const char* name, void* handle) { + callback_temp(name, handle, callback_handle_temp); + }; + p->exec->SetMonitorCallback(clbk, monitor_all); + API_END(); +} + int MXNDListFree(NDListHandle handle) { API_BEGIN(); delete static_cast(handle); From 84d8652e8d9632953ec6642c27847d984ad20bbc Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 5 Jul 2019 23:15:28 +0000 Subject: [PATCH 18/20] Add tests, default dtype and set_monitor_callback --- include/mxnet/c_predict_api.h | 2 +- src/c_api/c_predict_api.cc | 2 +- tests/python/gpu/test_predictor.py | 128 +++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 tests/python/gpu/test_predictor.py diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h index 0872642d7ddc..18bec625f05f 100644 --- a/include/mxnet/c_predict_api.h +++ b/include/mxnet/c_predict_api.h @@ -120,7 +120,7 @@ MXNET_DLL int MXPredCreateEx(const char* symbol_json_str, const void* param_bytes, int param_size, int dev_type, int dev_id, - mx_uint num_input_nodes, + const mx_uint num_input_nodes, const char** input_keys, const mx_uint* input_shape_indptr, const mx_uint* input_shape_data, diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index b2a685f135ae..d614b09960c9 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -91,7 +91,7 @@ int _CreatePartialOut(const char* symbol_json_str, const void* param_bytes, int param_size, int dev_type, int dev_id, - mx_uint num_input_nodes, + const mx_uint num_input_nodes, const char** input_keys, const mx_uint* input_shape_indptr, const mx_uint* input_shape_data, diff --git a/tests/python/gpu/test_predictor.py b/tests/python/gpu/test_predictor.py new file mode 100644 index 000000000000..4838a76c7cb1 --- /dev/null +++ b/tests/python/gpu/test_predictor.py @@ -0,0 +1,128 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import sys, os +curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) +sys.path.append(os.path.join(curr_path, "../../../amalgamation/python/")) +from mxnet_predict import Predictor, load_ndarray_file + +import ctypes +import numpy as np +import mxnet as mx +import mxnet.ndarray as nd +from mxnet.ndarray import NDArray +from mxnet import gluon +from mxnet.test_utils import assert_almost_equal, download_model +from mxnet.contrib.amp import amp +from mxnet.base import NDArrayHandle, py_str +sys.path.insert(0, os.path.join(curr_path, '../unittest')) +from common import setup_module, with_seed, teardown + +@with_seed() +def test_predictor_with_dtype(): + prefix = 'test_predictor_simple_dense' + symbol_file = "%s-symbol.json" % prefix + param_file = "%s-0000.params" % prefix + + input1 = np.random.uniform(size=(1, 3)) + input1 = input1.astype(np.float16) + + block = mx.gluon.nn.HybridSequential() + block.add(mx.gluon.nn.Dense(7)) + block.add(mx.gluon.nn.Dense(3)) + block.cast(np.float16) + block.hybridize() + block.initialize(ctx=mx.gpu(0)) + tmp = mx.nd.array(input1, dtype=np.float16, ctx=mx.gpu(0)) + out1 = block.forward(tmp) + block.export(prefix) + + predictor = Predictor(open(symbol_file, "r").read(), + open(param_file, "rb").read(), + {"data": input1.shape}, + dev_type="gpu", + dev_id=0, + type_dict={"data": input1.dtype}) + predictor.forward(data=input1) + predictor_out1 = predictor.get_output(0) + + assert_almost_equal(out1.asnumpy(), predictor_out1, rtol=1e-5, atol=1e-6) + +def compare_module_cpredict(result_sym, result_arg_params, result_aux_params, monitor_callback=False): + # Dummmy inputs + input1 = np.ones((1, 3, 224, 224)) + input1 = input1.astype(np.float32) + nd_dict = {} + def pred_mon_callback(name, arr): + nd_dict[name] = arr + mod = mx.mod.Module(result_sym, data_names=["data"], label_names=["softmax_label"], context=mx.gpu()) + mod.bind(data_shapes=[['data', (1, 3, 224, 224)]], label_shapes=[['softmax_label', (1,)]], for_training=False) + mod.set_params(result_arg_params, result_aux_params) + mod.forward(mx.io.DataBatch(data=[mx.nd.array(input1, ctx=mx.gpu())], + label=[mx.nd.ones((1,), ctx=mx.gpu())])) + prefix = "test_predictor_amp" + mod.save_checkpoint(prefix, 0, remove_amp_cast=False) + sym_file = "{}-symbol.json".format(prefix) + params_file = "{}-0000.params".format(prefix) + predictor = Predictor(open(sym_file, "r").read(), + open(params_file, "rb").read(), + {'data': (1, 3, 224, 224), + 'softmax_label': (1,)}, + dev_type="gpu", + dev_id=0) + if monitor_callback: + predictor.set_monitor_callback(pred_mon_callback, monitor_all=True) + predictor.forward(data=input1, softmax_label=mx.nd.ones((1,)).asnumpy()) + predictor_out1 = predictor.get_output(0) + if monitor_callback: + assert len(nd_dict) > 0, "Callback not called" + assert_almost_equal(mod.get_outputs()[0].asnumpy(), predictor_out1, atol=1e-1, rtol=1e-1) + + +@with_seed() +def test_predictor_amp(): + dir_path = os.path.dirname(os.path.realpath(__file__)) + model_path = os.path.join(dir_path, 'model') + if not os.path.isdir(model_path): + os.mkdir(model_path) + prefix, epoch = download_model("imagenet1k-resnet-18", dst_dir=model_path) + + sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) + + + # Convert model to mixed precision model, params in FP32 + result_sym, result_arg_params, result_aux_params = amp.convert_model(sym, + arg_params, + aux_params, + target_dtype="float16", + target_dtype_ops=["Convolution"]) + compare_module_cpredict(result_sym, result_arg_params, result_aux_params) + + # Convert model to mixed precision model, params in FP16 + result_sym, result_arg_params, result_aux_params = amp.convert_model(sym, + arg_params, + aux_params, + target_dtype="float16", + target_dtype_ops=["Convolution"], + cast_optional_params=True) + compare_module_cpredict(result_sym, result_arg_params, result_aux_params, monitor_callback=True) + + +if __name__ == '__main__': + import nose + nose.runmodule() From 577deb9ed54e01415e6b8b7b5e771753879df976 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Fri, 5 Jul 2019 23:33:10 +0000 Subject: [PATCH 19/20] Improve error --- src/c_api/c_predict_api.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index d614b09960c9..b371fd044dc5 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -237,7 +237,9 @@ int _CreatePartialOut(const char* symbol_json_str, << "The shape information of is not enough to get the shapes"; CHECK(infer_type_complete) << "The type information is not enough, please provide input arg_types " - "with provided_arg_dtype_names and provided_arg_dtypes"; + "with provided_arg_dtype_names and provided_arg_dtypes." + "If using amalgamation python frontend you can use type_dict in Predictor API" + "to provide this information"; CopyAttr(g.indexed_graph(), g.GetAttr("shape"), &arg_shapes, &out_shapes, &aux_shapes); From d911bfa5ab6311602ee2a1f4b0fd183c36113969 Mon Sep 17 00:00:00 2001 From: Anirudh Subramanian Date: Mon, 8 Jul 2019 17:04:39 +0000 Subject: [PATCH 20/20] Fix c_str_array --- amalgamation/python/mxnet_predict.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/amalgamation/python/mxnet_predict.py b/amalgamation/python/mxnet_predict.py index b64c69042fce..48e3cd4a5145 100644 --- a/amalgamation/python/mxnet_predict.py +++ b/amalgamation/python/mxnet_predict.py @@ -63,7 +63,7 @@ def c_str_array(strings): Parameters ---------- - strings : list of strings + strings : list of string Python strings. Returns @@ -72,8 +72,10 @@ def c_str_array(strings): A const char ** pointer that can be passed to C API. """ arr = (ctypes.c_char_p * len(strings))() - arr[:] = strings + arr[:] = [s.encode('utf-8') for s in strings] return arr + + else: py_str = lambda x: x @@ -82,7 +84,7 @@ def c_str_array(strings): Parameters ---------- - strings : list of string + strings : list of strings Python strings. Returns @@ -91,9 +93,10 @@ def c_str_array(strings): A const char ** pointer that can be passed to C API. """ arr = (ctypes.c_char_p * len(strings))() - arr[:] = [s.encode('utf-8') for s in strings] + arr[:] = strings return arr + def c_str(string): """"Convert a python string to C string.""" if not isinstance(string, str):