Skip to content

Commit

Permalink
USE_NVRTC -> ENABLE_CUDA_RTC to fix maven build. Add compile-guard to…
Browse files Browse the repository at this point in the history
… fusion. (apache#16838)

* Rename USE_NVRTC -> ENABLE_CUDA_RTC to fix maven build.  Compile-guard fusion framework.

* Fix fusion-not-supported warning.

* Fix compile guards

* Fix cmake build so -DMXNET_ENABLE_CUDA_RTC=1 is passed to nvcc

* Minimize side-effects of prev change
  • Loading branch information
DickJC123 authored and ptrendx committed Nov 20, 2019
1 parent 8b58b78 commit 79f9f84
Show file tree
Hide file tree
Showing 15 changed files with 66 additions and 31 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -633,14 +633,17 @@ if(USE_CUDA)
else()
list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
# define preprocessor macro so that we will not include the generated forcelink header
if(ENABLE_CUDA_RTC)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
# Create '.cmake' files for cuda compiles given definitions added thus far
mshadow_cuda_compile(cuda_objs ${CUDA})
if(MSVC)
if(ENABLE_CUDA_RTC)
FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
Expand All @@ -652,7 +655,6 @@ if(USE_CUDA)
list(APPEND mxnet_LINKER_LIBS cufft cusolver)
if(ENABLE_CUDA_RTC)
list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
endif()
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ before_build:
set OpenCV_DIR=%APPVEYOR_BUILD_FOLDER%/%MXNET_OPENCV_DIR%/build
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
cmake .. -DOPENCV_DIR=%OpenCV_DIR% -DUSE_PROFILER=1 -DUSE_CUDA=0 -DUSE_CUDNN=0 -DENABLE_CUDA_RTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -G "Visual Studio 12 2013 Win64"
build_script:
- cmd: >-
Expand Down
12 changes: 6 additions & 6 deletions ci/build_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class BuildFlavour(Enum):
'WIN_CPU': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -67,7 +67,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -80,7 +80,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKLDNN_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -93,7 +93,7 @@ class BuildFlavour(Enum):
, 'WIN_CPU_MKL': (
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DUSE_NVRTC=OFF '
'-DENABLE_CUDA_RTC=OFF '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=mkl '
Expand All @@ -106,7 +106,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand All @@ -122,7 +122,7 @@ class BuildFlavour(Enum):
, 'WIN_GPU_MKLDNN': (
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DUSE_NVRTC=ON '
'-DENABLE_CUDA_RTC=ON '
'-DUSE_OPENCV=ON '
'-DUSE_OPENMP=ON '
'-DUSE_BLAS=open '
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_darwin_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 0
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu90mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_cu92mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ USE_NCCL = 1

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVTX=1
USE_NVRTC = 1
ENABLE_CUDA_RTC = 1

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
2 changes: 1 addition & 1 deletion make/maven/maven_linux_mkl.mk
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ USE_CUDNN = 0
# CUDA_ARCH :=

# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
USE_NVRTC = 0
ENABLE_CUDA_RTC = 0

# use openmp for parallelization
USE_OPENMP = 1
Expand Down
5 changes: 5 additions & 0 deletions src/executor/exec_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ Graph FusePointwiseForward(Graph&& g);
*/
Graph FusePointwiseBackward(Graph&& g);

/*!
* \brief Issue a one-time warning that fusion is not possible for this platform or build.
*/
void WarnFusionNotSupported();

/*!
* \brief Infer shapes in the graph given the information.
* \param graph The input graph.
Expand Down
9 changes: 7 additions & 2 deletions src/executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
// setup gradient
nnvm::Graph g = InitFullGraph(symbol, grad_req_types);

#if MXNET_USE_CUDA && !defined(_WIN32)
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, g, false);
Expand Down Expand Up @@ -1034,7 +1034,12 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (default_ctx.dev_mask() == Context::kGPU && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

// create "device" and "context" attrs for the graph
g = AssignContext(g, default_ctx, ctx_map,
Expand Down
22 changes: 19 additions & 3 deletions src/executor/pointwise_fusion_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,26 @@
#include "../operator/fusion/fused_op.h"
#include "../operator/operator_common.h"

#if MXNET_USE_CUDA

namespace mxnet {
namespace exec {

void WarnFusionNotSupported() {
static bool issued_warning = false;
if (!issued_warning) {
issued_warning = true;
#if defined(_WIN32)
LOG(WARNING) << "Omitting dynamic fused op creation- not enabled on Windows. "
<< "Unset env var MXNET_USE_FUSION=1 to quiet this message.";
#else
LOG(WARNING) << "Omitting dynamic fused op creation- needs MXNet lib built with "
<< "USE_CUDA=1 and ENABLE_CUDA_RTC=1. Unset env var MXNET_USE_FUSION=1 "
<< "to quiet this message.";
#endif // defined(_WIN32)
}
}

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace {
bool IsFusionCompatible(nnvm::Node* n) {
using namespace mxnet::fusion;
Expand Down Expand Up @@ -304,8 +320,8 @@ Graph FusePointwiseBackward(Graph &&g) {
ret.outputs = g.outputs;
return ret;
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

} // namespace exec
} // namespace mxnet

#endif // MXNET_USE_CUDA
13 changes: 8 additions & 5 deletions src/imperative/cached_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,8 @@ void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {

void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Graph * grad_graph,
const Context& context, size_t num_forward_outputs, const bool inlining) {
#if MXNET_USE_CUDA && !defined(_WIN32)
if (context.dev_mask() == kGPU &&
!inlining &&
dmlc::GetEnv("MXNET_USE_FUSION", true)) {
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", true)) {
nnvm::Graph unoptimized_graph;
common::CopyGraph(&unoptimized_graph, *full_graph, false);

Expand Down Expand Up @@ -202,7 +200,12 @@ void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Grap
<< "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
}
}
#endif // MXNET_USE_CUDA
#else
// Only warn user if MXNET_USE_FUSION env var is explicitly set
if (context.dev_mask() == kGPU && !inlining && dmlc::GetEnv("MXNET_USE_FUSION", false)) {
exec::WarnFusionNotSupported();
}
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)

*fwd_graph = nnvm::Graph();
fwd_graph->outputs = std::vector<nnvm::NodeEntry>(full_graph->outputs.begin(),
Expand Down
4 changes: 2 additions & 2 deletions src/operator/fusion/fused_op-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <map>
#include <vector>

#if MXNET_USE_CUDA
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -992,6 +992,6 @@ const char kernel_end[] = R"code(}

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_INL_H_
4 changes: 2 additions & 2 deletions src/operator/fusion/fused_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "../operator_common.h"
#include "../../executor/exec_pass.h"

#if MXNET_USE_CUDA
#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -302,4 +302,4 @@ NNVM_REGISTER_OP(_FusedOpOutHelper)

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
5 changes: 5 additions & 0 deletions src/operator/fusion/fused_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
* under the License.
*/

// Additional use of MXNET_USE_CUDA is not needed to guard a '.cu' file.
#if MXNET_ENABLE_CUDA_RTC

#include <sys/stat.h>
#include <nvrtc.h>
#include <cuda.h>
Expand Down Expand Up @@ -787,3 +790,5 @@ NNVM_REGISTER_OP(_FusedOp)
.set_attr<FCompute>("FCompute<gpu>", FusedOpForwardGPU);

} // namespace mxnet

#endif // MXNET_ENABLE_CUDA_RTC
7 changes: 3 additions & 4 deletions src/operator/fusion/fused_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#ifndef MXNET_OPERATOR_FUSION_FUSED_OP_H_
#define MXNET_OPERATOR_FUSION_FUSED_OP_H_


#include <mxnet/operator.h>
#include <nnvm/graph.h>
#include <vector>
Expand All @@ -29,8 +28,7 @@
#include <mutex>
#include <tuple>

#if MXNET_USE_CUDA

#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

namespace mxnet {

Expand Down Expand Up @@ -202,5 +200,6 @@ using FusedOpHelperParamPtr = std::shared_ptr<FusedOpHelperParam>;

} // namespace mxnet

#endif // MXNET_USE_CUDA
#endif // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC

#endif // MXNET_OPERATOR_FUSION_FUSED_OP_H_

0 comments on commit 79f9f84

Please sign in to comment.