Skip to content

Commit 97ab168

Browse files
committed
Merge remote-tracking branch 'origin' into merge-cute-dsl-blackwell-step-0
Signed-off-by: Mindy Li <[email protected]>
2 parents eddf7ea + 780d750 commit 97ab168

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1868
-554
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ tensorrt_llm/bindings/**/*.pyi
4343
tensorrt_llm/deep_ep/
4444
tensorrt_llm/deep_ep_cpp_tllm.*.so
4545
tensorrt_llm/deep_ep_cpp_tllm.pyi
46+
tensorrt_llm/deep_gemm/
47+
tensorrt_llm/deep_gemm_cpp_tllm.*.so
48+
tensorrt_llm/deep_gemm_cpp_tllm.pyi
4649
*docs/cpp_docs*
4750
*docs/source/_cpp_gen*
4851
docs/source/**/*.rst

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,6 @@
2626
[submodule "3rdparty/cppzmq"]
2727
path = 3rdparty/cppzmq
2828
url = https://github.com/zeromq/cppzmq.git
29+
[submodule "3rdparty/DeepGEMM"]
30+
path = 3rdparty/DeepGEMM
31+
url = https://github.com/deepseek-ai/DeepGEMM.git

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ repos:
2727
args: [--allow-multiple-documents]
2828
exclude: ".*/gitlab/.*.yml"
2929
- id: trailing-whitespace
30-
exclude: '\.patch$'
30+
exclude: '\.(patch|md)$'
3131
- id: check-toml
3232
- id: mixed-line-ending
3333
args: [--fix=lf]

3rdparty/DeepGEMM

Submodule DeepGEMM added at 7b6b556

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,15 @@ TensorRT-LLM
1818
<div align="left">
1919

2020
## Tech Blogs
21+
* [08/06] Running a High Performance GPT-OSS-120B Inference Server with TensorRT-LLM
22+
[➡️ link](./docs/source/blogs/tech_blog/blog9_Deploying_GPT_OSS_on_TRTLLM.md)
23+
2124

2225
* [08/01] Scaling Expert Parallelism in TensorRT-LLM (Part 2: Performance Status and Optimization)
2326
[➡️ link](./docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md)
2427

2528
* [07/26] N-Gram Speculative Decoding in TensorRT‑LLM
26-
[➡️ link](./docs/source/blogs/tech_blog/blog_7_NGram_performance_Analysis_And_Auto_Enablement.md)
29+
[➡️ link](./docs/source/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md)
2730

2831
* [06/19] Disaggregated Serving in TensorRT-LLM
2932
[➡️ link](./docs/source/blogs/tech_blog/blog5_Disaggregated_Serving_in_TensorRT-LLM.md)

cpp/CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ option(BUILD_PYT "Build in PyTorch TorchScript class mode" ON)
3131
option(BUILD_TESTS "Build Google tests" ON)
3232
option(BUILD_BENCHMARKS "Build benchmarks" ON)
3333
option(BUILD_DEEP_EP "Build the Deep EP module" ON)
34+
option(BUILD_DEEP_GEMM "Build the DeepGEMM module" ON)
3435
option(BUILD_MICRO_BENCHMARKS "Build C++ micro benchmarks" OFF)
3536
option(NVTX_DISABLE "Disable all NVTX features" ON)
3637
option(WARNING_IS_ERROR "Treat all warnings as errors" OFF)
@@ -199,7 +200,9 @@ set(TRT_LIB TensorRT::NvInfer)
199200
get_filename_component(TRT_LLM_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} PATH)
200201

201202
set(3RDPARTY_DIR ${TRT_LLM_ROOT_DIR}/3rdparty)
202-
if(BINDING_TYPE STREQUAL "pybind" OR BUILD_DEEP_EP)
203+
if(BINDING_TYPE STREQUAL "pybind"
204+
OR BUILD_DEEP_EP
205+
OR BUILD_DEEP_GEMM)
203206
add_subdirectory(${3RDPARTY_DIR}/pybind11
204207
${CMAKE_CURRENT_BINARY_DIR}/pybind11)
205208
endif()
@@ -218,7 +221,9 @@ include_directories(
218221
${3RDPARTY_DIR}/cutlass/tools/util/include
219222
${3RDPARTY_DIR}/NVTX/include
220223
${3RDPARTY_DIR}/json/include)
221-
if(BINDING_TYPE STREQUAL "pybind" OR BUILD_DEEP_EP)
224+
if(BINDING_TYPE STREQUAL "pybind"
225+
OR BUILD_DEEP_EP
226+
OR BUILD_DEEP_GEMM)
222227
include_directories(${3RDPARTY_DIR}/pybind11/include)
223228
endif()
224229
if(BINDING_TYPE STREQUAL "nanobind")

cpp/tensorrt_llm/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,4 +314,8 @@ if(BUILD_DEEP_EP)
314314
add_subdirectory(deep_ep)
315315
endif()
316316

317+
if(BUILD_DEEP_GEMM)
318+
add_subdirectory(deep_gemm)
319+
endif()
320+
317321
add_subdirectory(plugins)
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
add_custom_target(deep_gemm)
2+
3+
if(WIN32)
4+
return()
5+
endif()
6+
7+
# Prepare files
8+
# =============
9+
10+
# Use DeepGEMM submodule
11+
set(DEEP_GEMM_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../3rdparty/DeepGEMM)
12+
get_filename_component(DEEP_GEMM_SOURCE_DIR ${DEEP_GEMM_SOURCE_DIR} ABSOLUTE)
13+
14+
if(NOT EXISTS ${DEEP_GEMM_SOURCE_DIR})
15+
message(
16+
FATAL_ERROR
17+
"DeepGEMM submodule not found at ${DEEP_GEMM_SOURCE_DIR}. Please run: git submodule update --init --recursive"
18+
)
19+
endif()
20+
21+
# Check if submodules are initialized
22+
if(NOT EXISTS ${DEEP_GEMM_SOURCE_DIR}/third-party/cutlass/include)
23+
message(
24+
FATAL_ERROR
25+
"DeepGEMM submodules not initialized. Please run: git submodule update --init --recursive"
26+
)
27+
endif()
28+
29+
# Copy and update python files
30+
set(DEEP_GEMM_PYTHON_DEST ${CMAKE_CURRENT_BINARY_DIR}/python/deep_gemm)
31+
file(REMOVE_RECURSE ${DEEP_GEMM_PYTHON_DEST})
32+
file(MAKE_DIRECTORY ${DEEP_GEMM_PYTHON_DEST})
33+
34+
# Copy all files from deep_gemm directory
35+
file(GLOB_RECURSE DEEP_GEMM_ALL_FILES ${DEEP_GEMM_SOURCE_DIR}/deep_gemm/*)
36+
configure_file(${DEEP_GEMM_SOURCE_DIR}/LICENSE ${DEEP_GEMM_PYTHON_DEST}/LICENSE
37+
COPYONLY)
38+
foreach(SOURCE_FILE ${DEEP_GEMM_ALL_FILES})
39+
file(RELATIVE_PATH REL_PATH ${DEEP_GEMM_SOURCE_DIR}/deep_gemm ${SOURCE_FILE})
40+
get_filename_component(REL_DIR ${REL_PATH} DIRECTORY)
41+
file(MAKE_DIRECTORY ${DEEP_GEMM_PYTHON_DEST}/${REL_DIR})
42+
43+
# Check if it's a Python file that needs import renaming
44+
get_filename_component(FILE_EXT ${SOURCE_FILE} EXT)
45+
if(FILE_EXT STREQUAL ".py")
46+
# Read file content and replace module imports for Python files
47+
file(READ ${SOURCE_FILE} _content)
48+
string(REPLACE "deep_gemm_cpp" "tensorrt_llm.deep_gemm_cpp_tllm" _content
49+
"${_content}")
50+
51+
# Add adaptation header
52+
string(
53+
PREPEND
54+
_content
55+
"# Adapted from https://github.com/deepseek-ai/DeepGEMM/blob/main/deep_gemm/${REL_PATH}\n"
56+
)
57+
58+
# Write modified content
59+
set(_dst "${DEEP_GEMM_PYTHON_DEST}/${REL_PATH}")
60+
file(WRITE ${_dst} "${_content}")
61+
else()
62+
# Copy non-Python files as-is
63+
set(_dst "${DEEP_GEMM_PYTHON_DEST}/${REL_PATH}")
64+
file(COPY ${SOURCE_FILE} DESTINATION ${DEEP_GEMM_PYTHON_DEST}/${REL_DIR})
65+
endif()
66+
67+
# Add dependency tracking
68+
set_property(
69+
DIRECTORY
70+
APPEND
71+
PROPERTY CMAKE_CONFIGURE_DEPENDS ${SOURCE_FILE})
72+
endforeach()
73+
74+
# Copy third-party includes (cutlass and fmt) to the include directory
75+
set(DEEP_GEMM_INCLUDE_DEST ${DEEP_GEMM_PYTHON_DEST}/include)
76+
file(MAKE_DIRECTORY ${DEEP_GEMM_INCLUDE_DEST})
77+
file(COPY ${DEEP_GEMM_SOURCE_DIR}/third-party/cutlass/include/cute
78+
DESTINATION ${DEEP_GEMM_INCLUDE_DEST})
79+
file(COPY ${DEEP_GEMM_SOURCE_DIR}/third-party/cutlass/include/cutlass
80+
DESTINATION ${DEEP_GEMM_INCLUDE_DEST})
81+
82+
# Find torch_python
83+
find_library(TORCH_PYTHON_LIB torch_python REQUIRED
84+
HINTS ${TORCH_INSTALL_PREFIX}/lib)
85+
86+
# Build deep_gemm_cpp_tllm extension (matching deep_gemm's setup.py)
87+
set(DEEP_GEMM_SOURCES ${DEEP_GEMM_SOURCE_DIR}/csrc/python_api.cpp)
88+
89+
pybind11_add_module(deep_gemm_cpp_tllm ${DEEP_GEMM_SOURCES})
90+
set_target_properties(
91+
deep_gemm_cpp_tllm
92+
PROPERTIES CXX_STANDARD_REQUIRED ON
93+
CXX_STANDARD 17
94+
CXX_SCAN_FOR_MODULES OFF
95+
CUDA_SEPARABLE_COMPILATION ON
96+
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/deep_gemm_cpp_tllm.version
97+
INSTALL_RPATH "${TORCH_INSTALL_PREFIX}/lib"
98+
BUILD_WITH_INSTALL_RPATH TRUE)
99+
100+
target_compile_options(deep_gemm_cpp_tllm PRIVATE ${TORCH_CXX_FLAGS} -std=c++17
101+
-O3 -fPIC -Wno-psabi)
102+
103+
# Extension name definition
104+
target_compile_definitions(deep_gemm_cpp_tllm
105+
PRIVATE TORCH_EXTENSION_NAME=deep_gemm_cpp_tllm)
106+
107+
# Include directories matching deep_gemm setup.py
108+
target_include_directories(
109+
deep_gemm_cpp_tllm
110+
PRIVATE ${CUDA_INCLUDE_DIRS} ${DEEP_GEMM_SOURCE_DIR}/deep_gemm/include
111+
${DEEP_GEMM_SOURCE_DIR}/third-party/cutlass/include
112+
${DEEP_GEMM_SOURCE_DIR}/third-party/fmt/include)
113+
114+
# Link libraries (matching deep_gemm setup.py: cuda, cudart + torch)
115+
target_link_libraries(
116+
deep_gemm_cpp_tllm PRIVATE ${TORCH_LIBRARIES} ${TORCH_PYTHON_LIB}
117+
CUDA::cuda_driver CUDA::cudart)
118+
119+
# Link directories
120+
target_link_directories(
121+
deep_gemm_cpp_tllm PRIVATE ${CUDA_TOOLKIT_ROOT_DIR}/lib64
122+
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
123+
124+
# Set targets
125+
# ===========
126+
add_dependencies(deep_gemm deep_gemm_cpp_tllm)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
global: PyInit_deep_gemm_cpp_tllm;
3+
local: *;
4+
};

0 commit comments

Comments
 (0)