apache · szha · Aug 20, 2020 · Feb 14, 2020 · May 18, 2020 · May 19, 2020
diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -272,7 +272,6 @@ extern "C" {
   }
 
 #include "./half.h"
-#include "./half2.h"
 #include "./bfloat.h"
 #define MSHADOW_HALF_BF_OPERATOR(RTYPE, OP)                                               \
   MSHADOW_XINLINE RTYPE operator OP(mshadow::half::half_t a, mshadow::bfloat::bf16_t b) { \
@@ -387,11 +386,6 @@ struct DataType<half::half_t> {
 #endif
 };
 template<>
-struct DataType<half::half2_t> {
-  static const int kFlag = kFloat16;
-  static const int kLanes = 2;
-};
-template<>
 struct DataType<bfloat::bf16_t> {
   static const int kFlag = kBfloat16;
   static const int kLanes = 1;
@@ -1144,48 +1138,6 @@ struct minimum {
   }
 #endif
 
-#define MSHADOW_TYPE_SWITCH_WITH_HALF2(type, DType, ...)  \
-  switch (type) {                                         \
-  case mshadow::kFloat32:                                 \
-    {                                                     \
-      typedef float DType;                                \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  case mshadow::kFloat64:                                 \
-    {                                                     \
-      typedef double DType;                               \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  case mshadow::kFloat16:                                 \
-    {                                                     \
-      typedef mshadow::half::half2_t DType;               \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  case mshadow::kUint8:                                   \
-    {                                                     \
-      typedef uint8_t DType;                              \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  case mshadow::kInt32:                                   \
-    {                                                     \
-      typedef int32_t DType;                              \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  case mshadow::kInt64:                                   \
-    {                                                     \
-      typedef int64_t DType;                              \
-      {__VA_ARGS__}                                       \
-    }                                                     \
-    break;                                                \
-  default:                                                \
-    LOG(FATAL) << "Unknown type enum " << type;           \
-  }
-
 #define MSHADOW_SGL_DBL_TYPE_SWITCH(type, DType, ...)  \
   switch (type) {                                      \
   case mshadow::kFloat32:                              \

diff --git a/3rdparty/mshadow/mshadow/half2.h b/3rdparty/mshadow/mshadow/half2.h
@@ -79,7 +79,6 @@ option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
 option(USE_GPROF "Compile with gprof (profiling) flag" OFF)
 option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
 option(USE_TVM_OP "Enable use of TVM operator build system." OFF)
-option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON)
 option(BUILD_CPP_EXAMPLES "Build cpp examples" ON)
 option(INSTALL_EXAMPLES "Install the example source files." OFF)
 option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON)
@@ -570,18 +569,11 @@ if(USE_CUDA)
 
   string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}")
 
-  find_package(CUDAToolkit REQUIRED cublas cufft cusolver curand
-    OPTIONAL_COMPONENTS nvToolsExt nvrtc)
+  find_package(CUDAToolkit REQUIRED cublas cufft cusolver curand nvrtc cuda_driver
+    OPTIONAL_COMPONENTS nvToolsExt)
 
-  list(APPEND mxnet_LINKER_LIBS CUDA::cudart CUDA::cublas CUDA::cufft CUDA::cusolver CUDA::curand)
-  if(ENABLE_CUDA_RTC)
-    if(CUDA_nvrtc_LIBRARY)
-      list(APPEND mxnet_LINKER_LIBS CUDA::nvrtc cuda)
-      add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
-    else()
-      message(FATAL_ERROR "ENABLE_CUDA_RTC=ON, but failed to find NVRTC. CMake will exit." )
-    endif()
-  endif()
+  list(APPEND mxnet_LINKER_LIBS CUDA::cudart CUDA::cublas CUDA::cufft CUDA::cusolver CUDA::curand
+                                CUDA::nvrtc CUDA::cuda_driver)
   list(APPEND SOURCE ${CUDA})
   add_definitions(-DMXNET_USE_CUDA=1)
 

@@ -61,7 +61,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=OFF '
         '-DUSE_CUDNN=OFF '
-        '-DENABLE_CUDA_RTC=OFF '
         '-DUSE_OPENCV=ON '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=open '
@@ -76,7 +75,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=OFF '
         '-DUSE_CUDNN=OFF '
-        '-DENABLE_CUDA_RTC=OFF '
         '-DUSE_OPENCV=ON '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=open '
@@ -91,7 +89,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=OFF '
         '-DUSE_CUDNN=OFF '
-        '-DENABLE_CUDA_RTC=OFF '
         '-DUSE_OPENCV=ON '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=mkl '
@@ -106,7 +103,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=OFF '
         '-DUSE_CUDNN=OFF '
-        '-DENABLE_CUDA_RTC=OFF '
         '-DUSE_OPENCV=ON '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=mkl '
@@ -121,7 +117,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=ON '
         '-DUSE_CUDNN=ON '
-        '-DENABLE_CUDA_RTC=ON '
         '-DUSE_OPENCV=ON  '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=open '
@@ -136,7 +131,6 @@ class BuildFlavour(Enum):
         '-DCMAKE_CXX_COMPILER=cl '
         '-DUSE_CUDA=ON '
         '-DUSE_CUDNN=ON '
-        '-DENABLE_CUDA_RTC=ON '
         '-DUSE_OPENCV=ON '
         '-DUSE_OPENMP=ON '
         '-DUSE_BLAS=open '

@@ -142,7 +142,6 @@ build_jetson() {
         -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \
         -DUSE_CUDA=ON \
         -DMXNET_CUDA_ARCH="5.2" \
-        -DENABLE_CUDA_RTC=OFF \
         -DUSE_OPENCV=OFF \
         -DUSE_OPENMP=ON \
         -DUSE_LAPACK=OFF \
@@ -669,27 +668,6 @@ build_ubuntu_gpu_cmake() {
     ninja
 }
 
-build_ubuntu_gpu_cmake_no_rtc() {
-    set -ex
-    cd /work/build
-    CC=gcc-7 CXX=g++-7 cmake \
-        -DUSE_SIGNAL_HANDLER=ON                 \
-        -DUSE_CUDA=ON                           \
-        -DUSE_CUDNN=ON                          \
-        -DUSE_MKL_IF_AVAILABLE=OFF              \
-        -DUSE_MKLML_MKL=OFF                     \
-        -DUSE_MKLDNN=ON                         \
-        -DUSE_DIST_KVSTORE=ON                   \
-        -DCMAKE_BUILD_TYPE=Release              \
-        -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-        -DBUILD_CYTHON_MODULES=1                \
-        -DENABLE_CUDA_RTC=OFF                   \
-        -G Ninja                                \
-        /work/mxnet
-
-    ninja
-}
-
 build_ubuntu_cpu_large_tensor() {
     set -ex
     cd /work/build

@@ -258,20 +258,6 @@ def compile_unix_cmake_gpu(lib_name) {
     }]
 }
 
-def compile_unix_cmake_gpu_no_rtc(lib_name) {
-    return ['GPU: CMake CUDA RTC OFF': {
-      node(NODE_LINUX_CPU) {
-        ws('workspace/build-cmake-gpu-no-rtc') {
-          timeout(time: max_time, unit: 'MINUTES') {
-            utils.init_git()
-            utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
-            utils.pack_lib(lib_name, mx_cmake_lib)
-          }
-        }
-      }
-    }]
-}
-
 def compile_unix_tensorrt_gpu(lib_name) {
     return ['TensorRT': {
       node(NODE_LINUX_CPU) {

@@ -41,7 +41,6 @@ core_logic: {
     custom_steps.compile_unix_cmake_gpu('cmake_gpu'),
     custom_steps.compile_unix_tensorrt_gpu('tensorrt'),
     custom_steps.compile_unix_int64_gpu('gpu_int64'),
-    custom_steps.compile_unix_cmake_gpu_no_rtc('gpu_no_rtc'),
   ])
 
   utils.parallel_stage('Tests', [

diff --git a/config/darwin.cmake b/config/darwin.cmake
@@ -126,6 +126,5 @@ set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total num
 # Other GPU features
 set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
 set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
-set(ENABLE_CUDA_RTC ON CACHE BOOL "Build with CUDA runtime compilation support")
 set(USE_NVTX ON CACHE BOOL "Build with NVTX support")
 set(USE_CXX11_ABI ON CACHE BOOL "Build with GLIBCXX_USE_CXX11_ABI")
diff --git a/config/linux.cmake b/config/linux.cmake
@@ -125,6 +125,5 @@ set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total num
 # Other GPU features
 set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
 set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
-set(ENABLE_CUDA_RTC ON CACHE BOOL "Build with CUDA runtime compilation support")
 set(USE_NVTX ON CACHE BOOL "Build with NVTX support")
 set(USE_CXX11_ABI ON CACHE BOOL "Build with GLIBCXX_USE_CXX11_ABI")
diff --git a/config/linux_gpu.cmake b/config/linux_gpu.cmake
@@ -125,6 +125,5 @@ set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total num
 # Other GPU features
 set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
 set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
-set(ENABLE_CUDA_RTC ON CACHE BOOL "Build with CUDA runtime compilation support")
 set(USE_NVTX ON CACHE BOOL "Build with NVTX support")
 set(USE_CXX11_ABI ON CACHE BOOL "Build with GLIBCXX_USE_CXX11_ABI")