Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1240,16 +1240,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
CUDA_ARCHS "${DSV3_ROUTER_GEMM_ARCHS}")
list(APPEND VLLM_MOE_EXT_SRC "${DSV3_ROUTER_GEMM_SRC}")
message(STATUS "Building DSV3 router GEMM kernel for archs: ${DSV3_ROUTER_GEMM_ARCHS}")

# DeepSeek V4 fused RMSNorm + router GEMV - same arch gating as DSV3.
set(DSV4_NORM_ROUTER_GEMM_SRC
"csrc/moe/dsv4_norm_router_gemm_entry.cu"
"csrc/moe/dsv4_norm_router_gemm_kernel.cu")
set_gencode_flags_for_srcs(
SRCS "${DSV4_NORM_ROUTER_GEMM_SRC}"
CUDA_ARCHS "${DSV3_ROUTER_GEMM_ARCHS}")
list(APPEND VLLM_MOE_EXT_SRC "${DSV4_NORM_ROUTER_GEMM_SRC}")
message(STATUS "Building DSV4 norm+router GEMV kernel for archs: ${DSV3_ROUTER_GEMM_ARCHS}")
else()
message(STATUS "Not building DSV3 router GEMM kernel as no compatible archs found"
" (requires SM90+ and CUDA >= 12.0)")
Expand Down
183 changes: 0 additions & 183 deletions benchmarks/kernels/benchmark_norm_router_gemm.py

This file was deleted.

2 changes: 1 addition & 1 deletion csrc/moe/dsv3_router_gemm_bf16_out.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ void invokeRouterGemmBf16Output(__nv_bfloat16* output, T const* mat_a,
config.stream = stream;
cudaLaunchAttribute attrs[1];
attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
attrs[0].val.programmaticStreamSerializationAllowed = getEnvEnablePDL();
attrs[0].val.programmaticStreamSerializationAllowed = 1;
config.numAttrs = 1;
config.attrs = attrs;
cudaLaunchKernelEx(
Expand Down
2 changes: 1 addition & 1 deletion csrc/moe/dsv3_router_gemm_float_out.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ void invokeRouterGemmFloatOutput(float* output, T const* mat_a, T const* mat_b,
config.stream = stream;
cudaLaunchAttribute attrs[1];
attrs[0].id = cudaLaunchAttributeProgrammaticStreamSerialization;
attrs[0].val.programmaticStreamSerializationAllowed = getEnvEnablePDL();
attrs[0].val.programmaticStreamSerializationAllowed = 1;
config.numAttrs = 1;
config.attrs = attrs;
cudaLaunchKernelEx(
Expand Down
12 changes: 0 additions & 12 deletions csrc/moe/dsv3_router_gemm_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,3 @@ inline int getSMVersion() {
auto* props = at::cuda::getCurrentDeviceProperties();
return props->major * 10 + props->minor;
}

inline bool getEnvEnablePDL() {
static std::once_flag flag;
static bool enablePDL = false;
std::call_once(flag, [&]() {
if (getSMVersion() >= 90) {
const char* env = std::getenv("TRTLLM_ENABLE_PDL");
enablePDL = env && env[0] == '1' && env[1] == '\0';
}
});
return enablePDL;
}
30 changes: 0 additions & 30 deletions csrc/moe/dsv4_norm_router_gemm.h

This file was deleted.

130 changes: 0 additions & 130 deletions csrc/moe/dsv4_norm_router_gemm_entry.cu

This file was deleted.

Loading
Loading