Skip to content

Commit

Permalink
Adding support for building BERT plugins with GPU_ARCHS specified (#255)
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinch-nv authored Dec 5, 2019
1 parent fb305de commit 98ab844
Show file tree
Hide file tree
Showing 33 changed files with 102 additions and 93 deletions.
27 changes: 27 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,33 @@ endif()

set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss")

if (DEFINED GPU_ARCHS)
message(STATUS "GPU_ARCHS defined as ${GPU_ARCHS}. Generating CUDA code for SM ${GPU_ARCHS}")
separate_arguments(GPU_ARCHS)
else()
list(APPEND GPU_ARCHS
35
53
61
70
75
)
message(STATUS "GPU_ARCHS is not defined. Generating CUDA code for default SMs: ${GPU_ARCHS}")
endif()
set(BERT_GENCODES)
# Generate SASS for each architecture
foreach(arch ${GPU_ARCHS})
if (${arch} GREATER_EQUAL 70)
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
endif()
set(GENCODES "${GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
endforeach()
# Generate PTX for the last architecture in the list.
list(GET GPU_ARCHS -1 LATEST_SM)
set(GENCODES "${GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
if (${LATEST_SM} GREATER_EQUAL 70)
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")

################################### DEPENDENCIES ##########################################
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,12 @@ NOTE: Along with the TensorRT OSS components, the following source packages will

- `CUB_VERSION`: The version of CUB to use, for example [`1.8.0`].

- `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for the latest SM version. If lower SM versions are desired, they can be specified here as a comma separated list. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples:
- `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples:
- Titan V: `-DGPU_ARCHS="70"`
- Tesla V100: `-DGPU_ARCHS="70"`
- GeForce RTX 2080: `-DGPU_ARCHS="75"`
- Tesla T4: `-DGPU_ARCHS="75"`
- Multiple SMs: `-DGPU_ARCHS="70 75"`

## Install the TensorRT OSS Components [Optional]

Expand Down
30 changes: 22 additions & 8 deletions plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,10 @@ if(${CMAKE_BUILD_TYPE} MATCHES "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
endif()

set(PLUGIN_SRCS)
set(PLUGIN_SOURCES)
set(CUDA_SRCS)
set(COMMON_SRCS)
set(PLUGIN_CU_SOURCES)

set(PLUGIN_LISTS
embLayerNormPlugin
fcPlugin
geluPlugin
bertQKVToContextPlugin
skipLayerNormPlugin
nmsPlugin
normalizePlugin
priorBoxPlugin
Expand All @@ -57,6 +50,19 @@ set(PLUGIN_LISTS
instanceNormalizationPlugin
)

# Add BERT sources if ${BERT_GENCODES} was populated
if(BERT_GENCODES)
set(BERT_CU_SOURCES)
set(PLUGIN_LISTS
${PLUGIN_LISTS}
embLayerNormPlugin
fcPlugin
geluPlugin
bertQKVToContextPlugin
skipLayerNormPlugin
)
endif()

include_directories(common common/kernels ../samples/common)

foreach(PLUGIN_ITER ${PLUGIN_LISTS})
Expand All @@ -67,6 +73,14 @@ endforeach(PLUGIN_ITER)
# Add common
add_subdirectory(common)

# Set gencodes
set_source_files_properties(${PLUGIN_CU_SOURCES} PROPERTIES COMPILE_FLAGS ${GENCODES})
list(APPEND PLUGIN_SOURCES "${PLUGIN_CU_SOURCES}")
if (BERT_CU_SOURCES)
set_source_files_properties(${BERT_CU_SOURCES} PROPERTIES COMPILE_FLAGS ${BERT_GENCODES})
list(APPEND PLUGIN_SOURCES "${BERT_CU_SOURCES}")
endif()

list(APPEND PLUGIN_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/InferPlugin.cpp")
list(APPEND PLUGIN_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/../samples/common/logger.cpp")

Expand Down
2 changes: 1 addition & 1 deletion plugin/batchTilePlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
5 changes: 4 additions & 1 deletion plugin/batchedNMSPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
file(GLOB CU_SRCS *.cu)
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS})
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE)


15 changes: 3 additions & 12 deletions plugin/bertQKVToContextPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(FIND ${CMAKE_CUDA_FLAGS} "sm_7" POS_SM)
string(FIND ${CMAKE_CUDA_FLAGS} "compute_7" POS_COMPUTE)

if(${POS_SM} GREATER_EQUAL 0 OR ${POS_COMPUTE} GREATER_EQUAL 0)
file(GLOB SRCS *.cpp *.cu)

set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
endif()


file(GLOB CU_SRCS *.cu)
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} ${CU_SRCS})
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/bertQKVToContextPlugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ This is the first release of this `README.md` file.

## Known issues

There are no known issues in this plugin.
This plugin only supports GPUs with compute capability >= 7.0. For more information see the [CUDA GPU Compute Capability Support Matrix](https://developer.nvidia.com/cuda-gpus#compute)
8 changes: 5 additions & 3 deletions plugin/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)

add_subdirectory(kernels)

file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
file(GLOB CU_SRCS *.cu)
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS})
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE)


2 changes: 2 additions & 0 deletions plugin/common/bertCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ extern LogStreamConsumer gLogFatal;

void setReportableSeverity(Logger::Severity severity);

#define TRT_UNUSED (void)

#include <numeric>
#include <vector>

Expand Down
5 changes: 4 additions & 1 deletion plugin/common/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
file(GLOB CU_SRCS *.cu)
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} ${CU_SRCS})
set(PLUGIN_CU_SOURCES ${PLUGIN_CU_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/cropAndResizePlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/detectionLayerPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
13 changes: 3 additions & 10 deletions plugin/embLayerNormPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(FIND ${CMAKE_CUDA_FLAGS} "sm_7" POS_SM)
string(FIND ${CMAKE_CUDA_FLAGS} "compute_7" POS_COMPUTE)

if(${POS_SM} GREATER_EQUAL 0 OR ${POS_COMPUTE} GREATER_EQUAL 0)
file(GLOB SRCS *.cpp *.cu)

set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
endif()
file(GLOB CU_SRCS *.cu)
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} ${CU_SRCS})
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} PARENT_SCOPE)


2 changes: 1 addition & 1 deletion plugin/embLayerNormPlugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ This is the first release of this `README.md` file.

## Known issues

There are no known issues in this plugin.
This plugin only supports GPUs with compute capability >= 7.0. For more information see the [CUDA GPU Compute Capability Support Matrix](https://developer.nvidia.com/cuda-gpus#compute)
15 changes: 3 additions & 12 deletions plugin/fcPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(FIND ${CMAKE_CUDA_FLAGS} "sm_7" POS_SM)
string(FIND ${CMAKE_CUDA_FLAGS} "compute_7" POS_COMPUTE)

if(${POS_SM} GREATER_EQUAL 0 OR ${POS_COMPUTE} GREATER_EQUAL 0)
file(GLOB SRCS *.cpp *.cu)

set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
endif()


file(GLOB CU_SRCS *.cu)
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} ${CU_SRCS})
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/fcPlugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ This is the first release of this `README.md` file.

## Known issues

There are no known issues in this plugin.
This plugin only supports GPUs with compute capability >= 7.0. For more information see the [CUDA GPU Compute Capability Support Matrix](https://developer.nvidia.com/cuda-gpus#compute)
2 changes: 1 addition & 1 deletion plugin/flattenConcat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
15 changes: 3 additions & 12 deletions plugin/geluPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(FIND ${CMAKE_CUDA_FLAGS} "sm_7" POS_SM)
string(FIND ${CMAKE_CUDA_FLAGS} "compute_7" POS_COMPUTE)

if(${POS_SM} GREATER_EQUAL 0 OR ${POS_COMPUTE} GREATER_EQUAL 0)
file(GLOB SRCS *.cpp *.cu)

set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
endif()


file(GLOB CU_SRCS *.cu)
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} ${CU_SRCS})
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/geluPlugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,4 @@ This is the first release of this `README.md` file.

## Known issues

There are no known issues in this plugin.
This plugin only supports GPUs with compute capability >= 7.0. For more information see the [CUDA GPU Compute Capability Support Matrix](https://developer.nvidia.com/cuda-gpus#compute)
2 changes: 1 addition & 1 deletion plugin/gridAnchorPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/nmsPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/normalizePlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/nvFasterRCNN/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/priorBoxPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/proposalLayerPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/proposalPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/pyramidROIAlignPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/regionPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/reorgPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/resizeNearestPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
file(GLOB SRCS *.cpp)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
15 changes: 3 additions & 12 deletions plugin/skipLayerNormPlugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(FIND ${CMAKE_CUDA_FLAGS} "sm_7" POS_SM)
string(FIND ${CMAKE_CUDA_FLAGS} "compute_7" POS_COMPUTE)

if(${POS_SM} GREATER_EQUAL 0 OR ${POS_COMPUTE} GREATER_EQUAL 0)
file(GLOB SRCS *.cpp *.cu)

set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
endif()


file(GLOB CU_SRCS *.cu)
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} ${CU_SRCS})
set(BERT_CU_SOURCES ${BERT_CU_SOURCES} PARENT_SCOPE)
2 changes: 1 addition & 1 deletion plugin/skipLayerNormPlugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ This is the first release of this `README.md` file.

## Known issues

There are no known issues in this plugin.
This plugin only supports GPUs with compute capability >= 7.0. For more information see the [CUDA GPU Compute Capability Support Matrix](https://developer.nvidia.com/cuda-gpus#compute)
Loading

0 comments on commit 98ab844

Please sign in to comment.