Skip to content

Commit

Permalink
TVM bridge support to JIT NDArray Function by TVM (apache#9880)
Browse files Browse the repository at this point in the history
* TVM bridge support.
Support wrap TVM compiled function as a NDArray function.

* Testcases and CI to include TVM as dependency

* address review comments

* Add more comments, change to constexpr

* change to log warn

* update comment on the type code
  • Loading branch information
tqchen authored Feb 27, 2018
1 parent dec32cb commit 96b344d
Show file tree
Hide file tree
Showing 12 changed files with 349 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ include_directories("include")
include_directories("mshadow")
include_directories("3rdparty/cub")
include_directories("nnvm/include")
include_directories("nnvm/tvm/include")
include_directories("dmlc-core/include")
include_directories("dlpack/include")

Expand Down Expand Up @@ -696,4 +697,3 @@ endif()
set(LINT_DIRS "include src plugin cpp-package tests")
set(EXCLUDE_PATH "src/operator/contrib/ctc_include")
add_custom_target(mxnet_lint COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DLINT_DIRS=${LINT_DIRS} -DPROJECT_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR} -DPROJECT_NAME=mxnet -DEXCLUDE_PATH=${EXCLUDE_PATH} -P ${CMAKE_CURRENT_SOURCE_DIR}/dmlc-core/cmake/lint.cmake)

10 changes: 5 additions & 5 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ def init_git() {
deleteDir()
retry(5) {
try {
// Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of
// Make sure wait long enough for api.github.com request quota. Important: Don't increase the amount of
// retries as this will increase the amount of requests and worsen the throttling
timeout(time: 15, unit: 'MINUTES') {
checkout scm
sh 'git submodule update --init'
sh 'git clean -d -f'
sh 'git submodule update --init --recursive'
sh 'git clean -d -f'
}
} catch (exc) {
deleteDir()
Expand All @@ -61,8 +61,8 @@ def init_git_win() {
// retries as this will increase the amount of requests and worsen the throttling
timeout(time: 15, unit: 'MINUTES') {
checkout scm
bat 'git submodule update --init'
bat 'git clean -d -f'
bat 'git submodule update --init --recursive'
bat 'git clean -d -f'
}
} catch (exc) {
deleteDir()
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ ifeq ($(DEBUG), 1)
else
CFLAGS += -O3 -DNDEBUG=1
endif
CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -Iinclude $(MSHADOW_CFLAGS)
CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -I$(NNVM_PATH)/include -I$(DLPACK_PATH)/include -I$(NNVM_PATH)/tvm/include -Iinclude $(MSHADOW_CFLAGS)
LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
ifeq ($(DEBUG), 1)
NVCCFLAGS += -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
Expand Down Expand Up @@ -356,7 +356,7 @@ ifeq ($(USE_CUDA), 1)
LDFLAGS += -lcuda -lnvrtc
CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
endif
# Make sure to add stubs as fallback in order to be able to build
# Make sure to add stubs as fallback in order to be able to build
# without full CUDA install (especially if run without nvidia-docker)
LDFLAGS += -L/usr/local/cuda/lib64/stubs
SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-gpu
Expand Down
10 changes: 10 additions & 0 deletions include/mxnet/tensor_blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,18 @@
#include <utility>
#include <algorithm>
#include "./base.h"

namespace mxnet {

// redefine DLPack enumeration to be backward compatible.
constexpr const int kCPU = kDLCPU;
constexpr const int kGPU = kDLGPU;
// extension type code under TVM function.
// Currently NNVM reserved 16 to 19 type code from TVM
// 16, 17, 18 is used by NNVM compiler already.
// Pick code 19 for MXNet NDArray
constexpr const int kTVMNDArrayTypeCode = 19;

/* Forward declaration for friend declaration in TBlob */
class NDArray;

Expand Down
2 changes: 1 addition & 1 deletion nnvm
Submodule nnvm updated 73 files
+7 −0 .gitignore
+1 −0 CMakeLists.txt
+2 −2 Makefile
+1 −1 dmlc-core
+6 −1 docs/api/python/frontend.rst
+26 −1 docs/top.rst
+11 −10 include/nnvm/c_api.h
+33 −0 include/nnvm/compiler/util.h
+2 −1 include/nnvm/node.h
+2 −1 include/nnvm/op.h
+1 −1 include/nnvm/symbolic.h
+33 −0 include/nnvm/top/nn.h
+103 −11 include/nnvm/top/tensor.h
+3 −0 make/config.mk
+2 −2 python/nnvm/_base.py
+7 −4 python/nnvm/compiler/build_module.py
+6 −0 python/nnvm/compiler/graph_attr.py
+57 −0 python/nnvm/compiler/graph_util.py
+1 −0 python/nnvm/frontend/__init__.py
+35 −0 python/nnvm/frontend/common.py
+1 −40 python/nnvm/frontend/coreml.py
+498 −0 python/nnvm/frontend/keras.py
+7 −0 python/nnvm/frontend/mxnet.py
+119 −10 python/nnvm/frontend/onnx.py
+4 −15 python/nnvm/testing/resnet.py
+3 −10 python/nnvm/testing/vgg.py
+16 −0 python/nnvm/top/attr_dict.py
+31 −72 python/nnvm/top/nn.py
+0 −3 python/nnvm/top/reduction.py
+1 −1 python/nnvm/top/registry.py
+0 −35 python/nnvm/top/tensor.py
+1 −42 python/nnvm/top/transform.py
+89 −27 src/compiler/compile_engine.cc
+7 −4 src/compiler/compile_engine.h
+1 −1 src/compiler/fold_scale_axis.cc
+9 −3 src/compiler/graph_fuse.cc
+1 −1 src/compiler/graph_hash.cc
+1 −0 src/compiler/packed_func_ext.cc
+9 −0 src/core/symbolic.cc
+8 −3 src/pass/gradient.cc
+63 −0 src/top/elemwise_op_common.h
+38 −3 src/top/nn/convolution.cc
+221 −3 src/top/nn/nn.cc
+21 −7 src/top/nn/nn_common.h
+77 −0 src/top/nn/pooling.cc
+54 −0 src/top/nn/upsampling.cc
+96 −2 src/top/op_common.h
+22 −0 src/top/tensor/broadcast.cc
+515 −19 src/top/tensor/elemwise.cc
+138 −0 src/top/tensor/matrix_op.cc
+101 −14 src/top/tensor/reduce.cc
+188 −5 src/top/tensor/transform.cc
+3 −0 tests/ci_build/Dockerfile.gpu
+1 −0 tests/ci_build/install/ubuntu_install_keras.sh
+1 −1 tests/cpp/tuple_test.cc
+56 −0 tests/python/compiler/test_nhwc_layout.py
+2 −2 tests/python/compiler/test_rpc_exec.py
+179 −112 tests/python/compiler/test_top_level1.py
+20 −0 tests/python/compiler/test_top_level2.py
+159 −0 tests/python/frontend/keras/test_forward.py
+2 −1 tests/python/frontend/onnx/model_zoo/__init__.py
+8 −4 tests/python/frontend/onnx/test_forward.py
+19 −0 tests/python/unittest/test_graph.py
+136 −0 tests/python/unittest/test_graph_gradient.py
+16 −0 tests/python/unittest/test_infer_shape.py
+3 −0 tests/scripts/task_frontend_test.sh
+193 −0 tutorials/define_and_compile_model.py
+235 −0 tutorials/deploy_model_on_mali_gpu.py
+9 −12 tutorials/deploy_model_on_rasp.py
+4 −2 tutorials/from_coreml.py
+114 −0 tutorials/from_keras.py
+218 −0 tutorials/using_external_lib.py
+1 −1 tvm
7 changes: 7 additions & 0 deletions python/mxnet/ndarray/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,15 @@ class NDArray(NDArrayBase):
__slots__ = []
# make numpy functions return NDArray instead of numpy object array
__array_priority__ = 1000.0
# Extension type code for TVM function.
# See C++ side of definition(kTVMNDArrayTypeCode) at include/mxmet/tensor_blob.h
_tvm_tcode = 19
# pylint: disable= no-member, undefined-variable

@property
def _tvm_handle(self):
return self.handle.value

def __repr__(self):
"""Returns a string representation of the array."""
shape_info = 'x'.join(['%d' % x for x in self.shape])
Expand Down
180 changes: 180 additions & 0 deletions src/nnvm/tvm_bridge.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file tvm_bridge.cc
* \brief Bridge to run TVM's PackedFunc in MXNet's async engine.
*
* This bridge is mainly used to expose MXNet's async engine push to
* TVM. It only uses TVM runtime in aheader only mode, which means
* there is no link dependencies.
*
* Support for TVM is optional even when this code
* is always compiled and built with the project.
* We choose this strategy because we do not yet want
* llvm as dependency(which TVM uses). So instead we expose hook
* to TVM and let user use this feature when they have TVM installed.
*
* We do require TVM and MXNet to be built with same C++ ABI of std::function
*/
#define TVM_RUNTIME_HEADER_ONLY 1
#include <tvm/runtime/packed_func.h>
#include <mxnet/c_api.h>
#include <mxnet/ndarray.h>
#include <mxnet/engine.h>

#include <memory>

namespace mxnet {

using tvm::runtime::PackedFunc;
using tvm::runtime::TVMArgs;
using tvm::runtime::TVMRetValue;

/*!
* \brief Async functor object
* calling argument of the function.
*/
class TVMFunctor {
public:
// constructor
explicit TVMFunctor(PackedFunc func, PackedFunc fset_stream)
: func_(func), fset_stream_(fset_stream) {}

void Init(const TVMArgs& args,
const std::vector<int>& const_loc,
std::vector<Engine::VarHandle>* const_vars,
std::vector<Engine::VarHandle>* mutate_vars) {
values_.clear();
type_codes_.clear();
values_.insert(values_.end(), args.values, args.values + args.size());
type_codes_.insert(
type_codes_.end(), args.type_codes, args.type_codes + args.size());

size_t const_loc_ptr = 0;
for (int i = 0; i < args.size(); ++i) {
if (args.type_codes[i] == kTVMNDArrayTypeCode) {
const NDArray& nd =
static_cast<NDArray*>(args.values[i].v_handle)[0];
// We cannot set the value until
type_codes_[i] = kArrayHandle;
array_data_.push_back(nd);
array_loc_.push_back(i);
// check if there is read or mutate
// by default assume we mutate the array.
if (const_loc_ptr < const_loc.size() &&
i == const_loc[const_loc_ptr]) {
const_vars->push_back(nd.var());
++const_loc_ptr;
} else {
mutate_vars->push_back(nd.var());
}
} else {
CHECK_LT(args.type_codes[i], kTVMType)
<< "Only allow POD type in mxnet async call";
}
}
}

Context ctx() {
return array_data_[0].ctx();
}

void Run(const RunContext& rctx) {
// setup DLTensor
for (size_t i = 0; i < array_loc_.size(); ++i) {
values_[array_loc_[i]].v_handle =
const_cast<DLTensor*>(&(array_data_[i].data().dltensor()));
}
// run the packed function
TVMRetValue rv;
TVMArgs args(&values_[0], &type_codes_[0], values_.size());
if (ctx().dev_type == Context::kGPU) {
#if MXNET_USE_CUDA
// pass stream via last argument.
void* strm = static_cast<void*>(rctx.get_stream<gpu>()->stream_);
int dev_type = kDLGPU;
fset_stream_(dev_type, rctx.ctx.dev_id, strm);
func_.CallPacked(args, &rv);
fset_stream_(dev_type, rctx.ctx.dev_id, nullptr);
#else
LOG(FATAL) << "Please compile with CUDA enabled for cuda features";
#endif
} else {
func_.CallPacked(args, &rv);
}
}

private:
/*! \brief The function */
PackedFunc func_;
/*! \brief Set stream */
PackedFunc fset_stream_;
/*! \brief Values field */
std::vector<TVMValue> values_;
/*! \brief type code field */
std::vector<int> type_codes_;
/*! \brief arrays field */
std::vector<NDArray> array_data_;
/*! \brief position of array in arguments */
std::vector<int> array_loc_;
};


// Wrap a TVM function to a function that invokes MXNet's Engine
// It does two things: call the engine properly
// set up the NDArray to DLTensor during invocation.
void WrapAsyncCall(TVMArgs wrap_args, TVMRetValue* wrap_rv) {
PackedFunc f = wrap_args[0];
PackedFunc fset_stream = wrap_args[1];
int num_const = wrap_args[2];

// sorted position of constant arguments
std::vector<int> const_loc;
for (int i = 0; i < num_const; ++i) {
const_loc.push_back(wrap_args[i + 3].operator int());
}
std::sort(const_loc.begin(), const_loc.end());
// wrapped function
// This is the function that called by the user.
auto wrapped = [f, fset_stream, const_loc](TVMArgs args, TVMRetValue* rv) {
std::shared_ptr<TVMFunctor> func =
std::make_shared<TVMFunctor>(f, fset_stream);
std::vector<Engine::VarHandle> const_vars, mutate_vars;
func->Init(args, const_loc, &const_vars, &mutate_vars);
Engine *engine = Engine::Get();
engine->DeduplicateVarHandle(&const_vars, &mutate_vars);
engine->PushSync([func](RunContext ctx) {
func->Run(ctx);
}, func->ctx(), const_vars, mutate_vars);
};
*wrap_rv = PackedFunc(wrapped);
}

} // namespace mxnet

// C callback that can be used by TVM to extract
// the WrapAsyncCall function.
extern "C" MXNET_DLL int MXTVMBridge(TVMFunctionHandle pregister) {
using tvm::runtime::PackedFunc;
const PackedFunc& fregister =
*static_cast<PackedFunc*>(pregister);
fregister("WrapAsyncCall", PackedFunc(mxnet::WrapAsyncCall));
return 0;
}
6 changes: 6 additions & 0 deletions tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ COPY install/ubuntu_install_r.sh /install/
RUN /install/ubuntu_install_r.sh
COPY install/ubuntu_install_perl.sh /install/
RUN /install/ubuntu_install_perl.sh

COPY install/ubuntu_install_llvm.sh /install/
RUN /install/ubuntu_install_llvm.sh

COPY install/ubuntu_install_tvm.sh /install/
RUN /install/ubuntu_install_tvm.sh
28 changes: 28 additions & 0 deletions tests/ci_build/install/ubuntu_install_llvm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.



echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list

wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
apt-get update && apt-get install -y --force-yes llvm-5.0
44 changes: 44 additions & 0 deletions tests/ci_build/install/ubuntu_install_tvm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Build and install TVM
cd /tmp
git clone https://github.com/dmlc/tvm/ --recursive
cd tvm

# This is a stable tag that support MXNet TVM bridge.
# We use this since support for mxnet bridge just checked
# into master and there is yet a version tag
git checkout 30eaf463e34d7c301357c31a010945d11df16537

cp make/config.mk
echo USE_CUDA=1 >> config.mk
echo LLVM_CONFIG=llvm-config-5.0 >> config.mk
echo USE_RPC=1 >> config.mk
echo USE_GRAPH_RUNTIME=1 >> config.mk
echo CUDA_PATH=/usr/local/cuda >> config.mk
make -j`nproc`

cd python
python setup.py install
cd -

cd topi/python
python setup.py install
cd -
Loading

0 comments on commit 96b344d

Please sign in to comment.