diff --git a/Makefile b/Makefile index b3b188a2cf3b..4b95eaf8e848 100644 --- a/Makefile +++ b/Makefile @@ -473,11 +473,13 @@ CFLAGS += -I$(TVM_PATH)/include -DMXNET_USE_TVM_OP=1 LDFLAGS += -L$(ROOTDIR)/lib -ltvm_runtime -Wl,-rpath,'$${ORIGIN}' TVM_USE_CUDA := OFF +TVM_OP_CUDA_ARCH := NONE ifeq ($(USE_CUDA), 1) TVM_USE_CUDA := ON ifneq ($(USE_CUDA_PATH), NONE) TVM_USE_CUDA := $(USE_CUDA_PATH) endif + TVM_OP_CUDA_ARCH = $(USE_TVM_OP_CUDA_ARCH) endif endif @@ -630,11 +632,15 @@ lib/libtvm_runtime.so: ls $(ROOTDIR)/lib; \ cd $(ROOTDIR) +TVM_OP_COMPILE_OPTIONS = -o $(ROOTDIR)/lib/libtvmop.so +ifneq ($(TVM_OP_CUDA_ARCH), NONE) + TVM_OP_COMPILE_OPTIONS += --cuda-arch $(TVM_OP_CUDA_ARCH) +endif lib/libtvmop.so: lib/libtvm_runtime.so $(wildcard contrib/tvmop/*/*.py contrib/tvmop/*.py) echo "Compile TVM operators" PYTHONPATH=$(TVM_PATH)/python:$(TVM_PATH)/topi/python:$(ROOTDIR)/contrib \ LD_LIBRARY_PATH=$(ROOTDIR)/lib \ - python3 $(ROOTDIR)/contrib/tvmop/compile.py -o $(ROOTDIR)/lib/libtvmop.so + python3 $(ROOTDIR)/contrib/tvmop/compile.py $(TVM_OP_COMPILE_OPTIONS) NNVM_INC = $(wildcard $(NNVM_PATH)/include/*/*.h) NNVM_SRC = $(wildcard $(NNVM_PATH)/src/*/*/*.cc $(NNVM_PATH)/src/*/*.cc $(NNVM_PATH)/src/*.cc) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index b2a50f30af4e..1080dc57eaec 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -26,6 +26,7 @@ NOSE_COVERAGE_ARGUMENTS="--with-coverage --cover-inclusive --cover-xml --cover-b NOSE_TIMER_ARGUMENTS="--with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error" CI_CUDA_COMPUTE_CAPABILITIES="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_70,code=sm_70" CI_CMAKE_CUDA_ARCH_BIN="52,70" +CI_CUDA_ARCH="sm_70" clean_repo() { set -ex @@ -228,7 +229,7 @@ build_ubuntu_gpu_mkldnn_release() { # $1 -> mxnet_variant: the mxnet variant to build, e.g. cpu, cu100, cu92mkl, etc. build_dynamic_libmxnet() { set -ex - + local mxnet_variant=${1:?"This function requires a mxnet variant as the first argument"} # relevant licenses will be placed in the licenses directory @@ -769,6 +770,7 @@ build_ubuntu_gpu_mkldnn_nocudnn() { USE_TVM_OP=1 \ CUDA_ARCH="$CI_CUDA_COMPUTE_CAPABILITIES" \ USE_SIGNAL_HANDLER=1 \ + USE_TVM_OP_CUDA_ARCH="$CI_CUDA_ARCH $CI_CUDA_COMPUTE_CAPABILITIES" \ -j$(nproc) } @@ -948,7 +950,7 @@ cd_unittest_ubuntu() { fi $nose_cmd $NOSE_TIMER_ARGUMENTS --verbose tests/python/unittest - $nose_cmd $NOSE_TIMER_ARGUMENTS --verbose tests/python/quantization + $nose_cmd $NOSE_TIMER_ARGUMENTS --verbose tests/python/quantization # https://github.com/apache/incubator-mxnet/issues/11801 # if [[ ${mxnet_variant} = "cpu" ]] || [[ ${mxnet_variant} = "mkl" ]]; then diff --git a/contrib/tvmop/compile.py b/contrib/tvmop/compile.py index e6af0a276560..0f041c666379 100644 --- a/contrib/tvmop/compile.py +++ b/contrib/tvmop/compile.py @@ -22,6 +22,8 @@ import os import argparse from tvmop.opdef import __OP_DEF__ +from tvm.autotvm.measure.measure_methods import set_cuda_target_arch + def get_target(device): if device == "cpu": @@ -37,6 +39,8 @@ def get_target(device): parser = argparse.ArgumentParser(description="Generate tvm operators") parser.add_argument("-o", action="store", required=True, dest="target_path", help="Target path which stores compiled library") + parser.add_argument('--cuda-arch', type=str, default=None, dest='cuda_arch', + help='The cuda arch for compiling kernels for') arguments = parser.parse_args() func_list_llvm = [] @@ -52,8 +56,9 @@ def get_target(device): binds=operator_def.get_binds(args)) func_list.append(func_lower) - lowered_funcs = {get_target("cpu") : func_list_llvm} + lowered_funcs = {get_target("cpu"): func_list_llvm} if len(func_list_cuda) > 0: lowered_funcs[get_target("cuda")] = func_list_cuda + set_cuda_target_arch(arguments.cuda_arch) func_binary = tvm.build(lowered_funcs, name="tvmop") func_binary.export_library(arguments.target_path) diff --git a/make/config.mk b/make/config.mk index 982d15b19656..7af34bff0a75 100644 --- a/make/config.mk +++ b/make/config.mk @@ -65,6 +65,10 @@ ADD_CFLAGS = # whether to build operators written in TVM USE_TVM_OP = 0 +# specify the CUDA ARCH compilation flag for building +# operator kernels implemented using TVM +USE_TVM_OP_CUDA_ARCH = NONE + #--------------------------------------------- # matrix computation libraries for CPU/GPU #---------------------------------------------