TensorRT Open Source Release/6.0

NVIDIA · Sep 16, 2019 · 639d11a · 639d11a
1 parent 443e495
commit 639d11a
Show file tree

Hide file tree

Showing 235 changed files with 29,622 additions and 4,951 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,7 +1,7 @@
 [submodule "parsers/onnx"]
 	path = parsers/onnx
 	url = https://github.com/onnx/onnx-tensorrt.git
-	branch = 5.1
+	branch = 6.0
 [submodule "third_party/protobuf"]
 	path = third_party/protobuf
 	url = https://github.com/protocolbuffers/protobuf.git

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -21,7 +21,7 @@ include(cmake/modules/find_library_create_target.cmake)
 set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR})
 set_ifndef(TRT_BIN_DIR ${CMAKE_BINARY_DIR})
 
-file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInfer.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*")
+file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInferVersion.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*")
 
 foreach(TYPE MAJOR MINOR PATCH BUILD)
     string(REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]" TRT_TYPE_STRING ${VERSION_STRINGS})
@@ -37,15 +37,14 @@ set(TRT_VERSION "${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH}.${TRT_BUILD}" CACHE STRI
 set(TRT_SOVERSION "${TRT_SO_MAJOR}.${TRT_SO_MINOR}.${TRT_SO_PATCH}" CACHE STRING "TRT library so version")
 message("Building for TensorRT version: ${TRT_VERSION}, library version: ${TRT_SOVERSION}")
 
-set(FIND_CUDA "")
 if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
     find_program(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++)
-    set(FIND_CUDA "CUDA")
 endif()
 
-message("CHECK for ${FIND_CUDA}")
+set(CMAKE_SKIP_BUILD_RPATH True)
+
 project(TensorRT
-        LANGUAGES CXX ${FIND_CUDA}
+        LANGUAGES CXX CUDA
         VERSION ${TRT_VERSION}
         DESCRIPTION "TensorRT is a C++ library that facilitates high performance inference on NVIDIA GPUs and deep learning accelerators."
         HOMEPAGE_URL "https://github.com/NVIDIA/TensorRT")
@@ -78,6 +77,8 @@ endif()
 
 set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss")
 
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")
+
 ################################### DEPENDENCIES ##########################################
 set(DEFAULT_CUDA_VERSION 10.1)
 set(DEFAULT_CUDNN_VERSION 7.5)
@@ -151,15 +152,18 @@ else()
         set(CUB_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/cub CACHE STRING "directory of CUB installation")
     endif()
 
-    find_package(CUDA ${CUDA_VERSION} REQUIRED)
+    ## find_package(CUDA) is broken for cross-compilation. Enable CUDA language instead.
+    if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
+        find_package(CUDA ${CUDA_VERSION} REQUIRED)
+    endif()
 
     include_directories(
         ${CUDA_INCLUDE_DIRS}
     )
     find_library(CUDNN_LIB cudnn HINTS
-        ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDNN_ROOT_DIR}/lib64)
+        ${CUDA_TOOLKIT_ROOT_DIR} ${CUDNN_ROOT_DIR} PATH_SUFFIXES lib64 lib)
     find_library(CUBLAS_LIB cublas HINTS
-        ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
+        ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib64 lib lib/stubs)
 
     if(BUILD_PARSERS)
         configure_protobuf(${PROTOBUF_VERSION})
@@ -173,8 +177,10 @@ if (NOT (NVINTERNAL OR NVPARTNER))
     find_library_create_target(nvuffparser nvparsers SHARED ${TRT_LIB_DIR})
 endif()
 
-find_library(CUDART_LIB cudart HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
+find_library(CUDART_LIB cudart HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib64)
 find_library(RT_LIB rt)
+
+set(CUDA_LIBRARIES ${CUDART_LIB})
 ############################################################################################
 # TensorRT
 

diff --git a/README.md b/README.md
@@ -15,9 +15,9 @@ To build the TensorRT OSS components, ensure you meet the following package requ
 
 * [CUDA](https://developer.nvidia.com/cuda-toolkit)
   * Recommended versions:
-  * [cuda-10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) + cuDNN-7.5
-  * [cuda-10.0](https://developer.nvidia.com/cuda-10.0-download-archive) + cuDNN-7.5
-  * [cuda-9.0](https://developer.nvidia.com/cuda-90-download-archive) + cuDNN 7.3
+  * [cuda-10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) + cuDNN-7.6
+  * [cuda-10.0](https://developer.nvidia.com/cuda-10.0-download-archive) + cuDNN-7.6
+  * [cuda-9.0](https://developer.nvidia.com/cuda-90-download-archive) + cuDNN 7.6
 
 * [GNU Make](https://ftp.gnu.org/gnu/make/) >= v4.1
 
@@ -45,12 +45,11 @@ To build the TensorRT OSS components, ensure you meet the following package requ
 
 **TensorRT Release**
 
-* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-5x-download) v5.1.5
-
+* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-download) v6.0.1
 
 NOTE: Along with the TensorRT OSS components, the following source packages will also be downloaded, and they are not required to be installed on the system.
 
-- [ONNX-TensorRT](https://github.com/onnx/onnx-tensorrt) v5.1
+- [ONNX-TensorRT](https://github.com/onnx/onnx-tensorrt) v6.0
 - [CUB](http://nvlabs.github.io/cub/) v1.8.0
 - [Protobuf](https://github.com/protocolbuffers/protobuf.git) v3.8.x
 
@@ -60,34 +59,34 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
 1. #### Download TensorRT OSS sources.
 
 	```bash
-	git clone -b release/5.1 https://github.com/nvidia/TensorRT TensorRT
+	git clone -b master https://github.com/nvidia/TensorRT TensorRT
 	cd TensorRT
 	git submodule update --init --recursive
 	export TRT_SOURCE=`pwd`
 	```
 
 2. #### Download the TensorRT binary release.
 
-	To build the TensorRT OSS, obtain the corresponding TensorRT 5.1.5 binary release from [NVidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-5x-download). For a list of key features, known and fixed issues, see the [TensorRT 5.1.5 Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/tensorrt-5.html#rel_5-1-5).
+	To build the TensorRT OSS, obtain the corresponding TensorRT 6.0.1 binary release from [NVidia Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download). For a list of key features, known and fixed issues, see the [TensorRT 6.0.1 Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/index.html).
 
 	**Example: Ubuntu 18.04 with cuda-10.1**
 
-	Download and extract the *TensorRT 5.1.5.0 GA for Ubuntu 18.04 and CUDA 10.1 tar package*
+	Download and extract the *TensorRT 6.0.1.5 GA for Ubuntu 18.04 and CUDA 10.1 tar package*
 	```bash
 	cd ~/Downloads
-	# Download TensorRT-5.1.5.0.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.5.tar.gz
-	tar -xvzf TensorRT-5.1.5.0.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.5.tar.gz
-	export TRT_RELEASE=`pwd`/TensorRT-5.1.5.0
+	# Download TensorRT-6.0.1.5.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz
+	tar -xvzf TensorRT-6.0.1.5.Ubuntu-18.04.2.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz
+	export TRT_RELEASE=`pwd`/TensorRT-6.0.1.5
 	```
 
 	**Example: CentOS/RedHat 7 with cuda-9.0**
 
-	Download and extract the *TensorRT 5.1.5.0 GA for CentOS/RedHat 7 and CUDA 9.0 tar package*
+	Download and extract the *TensorRT 6.0.1.5 GA for CentOS/RedHat 7 and CUDA 9.0 tar package*
 	```bash
 	cd ~/Downloads
-	# Download TensorRT-5.1.5.0.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.5.tar.gz
-	tar -xvzf TensorRT-5.1.5.0.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.5.tar.gz
-	export TRT_RELEASE=~/Downloads/TensorRT-5.1.5.0
+	# Download TensorRT-6.0.1.5.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.6.tar.gz
+	tar -xvzf TensorRT-6.0.1.5.Red-Hat.x86_64-gnu.cuda-9.0.cudnn7.6.tar.gz
+	export TRT_RELEASE=~/Downloads/TensorRT-6.0.1.5
 	```
 
 ## Setting Up The Build Environment
@@ -134,20 +133,9 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
 
 	> NOTE:
 	> 1. The default CUDA version used by CMake is 10.1. To override this, for example to 9.0, append `-DCUDA_VERSION=9.0` to the cmake command.
-	> 2. If linking against the plugin and parser libraries obtained from TensorRT release (default behavior) is causing compatibility issues with TensorRT OSS, try building the OSS components separately in the following dependency order:
+	> 2. Samples may fail to link on CentOS7. To work around this create the following symbolic link:
 	> ```bash
-	> # 1. Build Plugins
-	> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
-	>          -DBUILD_PLUGINS=ON -DBUILD_PARSERS=OFF -DBUILD_SAMPLES=OFF
-	> make -j$(nproc)
-	> # 2. Build Parsers
-	> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
-	>          -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=ON -DBUILD_SAMPLES=OFF
-	> make -j$(nproc)
-	> # 3. Build Samples
-	> cmake .. -DTRT_LIB_DIR=$TRT_RELEASE/lib -DTRT_BIN_DIR=`pwd`/out \
-	>          -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DBUILD_SAMPLES=ON
-	> make -j$(nproc)
+	> ln -s $TRT_BIN_DIR/libnvinfer_plugin.so $TRT_BIN_DIR/libnvinfer_plugin.so.6
 	> ```
 
 	The required CMake arguments are:
@@ -176,6 +164,10 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
 
 	Other build options with limited applicability:
 
+	- `NVINTERNAL`: Used by TensorRT team for internal builds. Values consists of [`OFF`] | `ON`.
+
+	- `PROTOBUF_INTERNAL_VERSION`: The version of protobuf to use, for example [`10.0`].  Only applicable if `NVINTERNAL` is also enabled.
+
 	- `NVPARTNER`: For use by NVIDIA partners with exclusive source access.  Values consists of [`OFF`] | `ON`.
 
 	- `CUB_VERSION`: The version of CUB to use, for example [`1.8.0`].
@@ -191,6 +183,7 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
 * Copy the build artifacts into the TensorRT installation directory, updating the installation.
   * TensorRT installation directory is determined as `$TRT_LIB_DIR/..`
   * Installation might require superuser privileges depending on the path and permissions of files being replaced.
+  * Installation is not supported in cross compilation scenario. Please copy the result files from `build/out` folder into the target device.
 
 	```bash
 	sudo make install
@@ -208,6 +201,5 @@ NOTE: Along with the TensorRT OSS components, the following source packages will
 
 ## Known Issues
 
-#### TensorRT 5.1.5
-* FP16/INT8 modes have been disabled in SampleSSD (Caffe version). Please see the [SampleSSD README](samples/opensource/sampleSSD/README.md#known-issues) for details.
-* Additionally, see the TensorRT [Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/tensorrt-5.html#rel_5-1-5).
+#### TensorRT 6.0.1
+* See [Release Notes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-release-notes/index.html).
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-5.1.5.0
+6.0.1.5
diff --git a/cmake/toolchains/cmake_aarch64.toolchain b/cmake/toolchains/cmake_aarch64.toolchain
@@ -19,11 +19,11 @@ set(CMAKE_SYSTEM_PROCESSOR aarch64)
 set(TRT_PLATFORM_ID "aarch64")
 set(CUDA_PLATFORM_ID "aarch64-linux")
 
-set(CMAKE_C_COMPILER $ENV{AARCH64_CC})
-set(CMAKE_CXX_COMPILER $ENV{AARCH64_CC})
+set(CMAKE_C_COMPILER /usr/bin/aarch64-linux-gnu-gcc)
+set(CMAKE_CXX_COMPILER /usr/bin/aarch64-linux-gnu-g++)
 
-set(CMAKE_C_FLAGS "$ENV{AARCH64_CFLAGS}" CACHE STRING "" FORCE)
-set(CMAKE_CXX_FLAGS "$ENV{AARCH64_CFLAGS}" CACHE STRING "" FORCE)
+set(CMAKE_C_FLAGS "" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS "" CACHE STRING "" FORCE)
 
 set(CMAKE_C_COMPILER_TARGET aarch64)
 set(CMAKE_CXX_COMPILER_TARGET aarch64)
@@ -35,14 +35,16 @@ if(NVINTERNAL)
     set(EXT_PATH ${PROJECT_SOURCE_DIR}/../externals)
     set(CUDA_ROOT ${EXT_PATH}/cuda-${CUDA_VERSION}-${TRT_PLATFORM_ID}/${CUDA_PLATFORM_ID})
 else()
-    set(CUDA_ROOT /usr/local/cuda-${CUDA_VERSION}/targets/${CUDA_PLATFORM_ID})
+    set(CUDA_ROOT /usr/local/cuda-${CUDA_VERSION}/targets/${CUDA_PLATFORM_ID} CACHE STRING "CUDA ROOT dir")
 endif()
 
 set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT})
 set(CUDA_INCLUDE_DIRS ${CUDA_ROOT}/include)
 
+set(RT_LIB /usr/aarch64-linux-gnu/lib/librt.so)
+
 set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE)
-set(CMAKE_CUDA_FLAGS "-I${CUDA_INCLUDE_DIRS} -Xcompiler=\"-fPIC ${CMAKE_CXX_FLAGS}\"" CACHE STRING "" FORCE)
+set(CMAKE_CUDA_FLAGS "-cudart none -I${CUDA_INCLUDE_DIRS} -Xcompiler=\"-fPIC ${CMAKE_CXX_FLAGS}\"" CACHE STRING "" FORCE)
 set(CMAKE_CUDA_COMPILER_FORCED TRUE)
 
 if(DEFINED ENV{VULCAN} AND NOT $ENV{VULCAN} STREQUAL "")

diff --git a/demo/BERT/CMakeLists.txt b/demo/BERT/CMakeLists.txt
@@ -20,62 +20,72 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}  \
 --expt-extended-lambda \
 -gencode arch=compute_70,code=sm_70 \
 -gencode arch=compute_75,code=sm_75 \
--O3")
+-Wno-deprecated-declarations")
 
-set(BERT_LIBS 
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+
+set(BERT_LIBS
     cudart
     cublas
     nvinfer
+    nvinfer_plugin
     pthread
     z
-    )
+)
 
 include_directories(
-    ../../include
-    ../../samples/common
-    /usr/local/cuda-10.1/targets/x86_64-linux/include
     ./
-    ./plugins
+    ./bert
     ./layers
+    ./plugins
     ./util
+    ../../include/
+    ../../samples/common/
     ../../third_party/cub/
+    /usr/include/x86_64-linux-gnu
+    /usr/local/cuda-10.1/targets/x86_64-linux/include
+    /workspace/tensorrt/include
+    /workspace/tensorrt/samples/common
     /workspace/cub/
     /workspace/cutlass/
-    )
+)
 
 link_directories(
+    /usr/lib/x86_64-linux-gnu
     /usr/local/cuda-10.1/targets/x86_64-linux/lib
-    /tensorrt/lib
-    )
+    /workspace/tensorrt/lib
+)
 
 add_library(common SHARED
     ../../samples/common/logger.cpp
-    util/dataUtils.cpp
-    )
+)
 
 add_library(bert_plugins SHARED
+    plugins/embLayerNormPlugin.cu
     plugins/geluPlugin.cu
-    plugins/skipLayerNormPlugin.cu
     plugins/qkvToContextPlugin.cu
-    plugins/embLayerNormPlugin.cu
-    )
+    plugins/skipLayerNormPlugin.cu
+)
 
-target_link_libraries(bert_plugins 
+target_link_libraries(bert_plugins
+    common
     ${BERT_LIBS}
-    )
+)
 
-target_link_libraries(common 
+target_link_libraries(common
     ${BERT_LIBS}
-    )
+)
 
 add_executable(sample_bert
+    bert/bert.cpp
+    bert/driver.cpp
+    util/dataUtils.cpp
     sampleBERT.cpp
-    )
+)
 
 target_compile_features(sample_bert PUBLIC cxx_std_11)
 
-target_link_libraries(sample_bert 
+target_link_libraries(sample_bert
     common
     bert_plugins
-    )
-
+)
diff --git a/demo/BERT/Dockerfile b/demo/BERT/Dockerfile
@@ -12,25 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM nvcr.io/nvidia/tensorrt:19.05-py3
+FROM nvcr.io/nvidia/tensorrt:19.09-py3
 ARG myuid
 ARG mygid
 
-RUN  echo $myuid
-RUN  echo $mygid
+RUN echo $myuid
+RUN echo $mygid
 
-# TODO: Depending on the docker version, this might work without mapping the user for home dir access
 RUN groupadd -r -g ${mygid} nb && useradd -r -u ${myuid} -g ${mygid} -ms /bin/bash nb
 
 RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:ubuntu-toolchain-r/test
-RUN apt-get update && apt-get install -y pbzip2 pv bzip2  sudo gcc-7 g++-7  zlib1g-dev g++-4.9
+RUN apt-get update && apt-get install -y pbzip2 pv bzip2 sudo gcc-7 g++-7 zlib1g-dev g++-4.8
 RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 \
                                  --slave /usr/bin/g++ g++ /usr/bin/g++-7  && \
                                  update-alternatives --config gcc
 
 RUN wget https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh && \
     sh cmake-3.14.0-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
 RUN pip install tensorflow==1.13.1 && pip install horovod
+RUN pip install jupyter
 
 RUN echo 'nb:abc123' | chpasswd
 
@@ -42,4 +42,3 @@ WORKDIR /workspace
 
 RUN git clone https://github.com/NVlabs/cub.git
 RUN git clone https://github.com/NVIDIA/cutlass.git
-