Skip to content

Commit 22b97b0

Browse files
committed
fix H100 unit test error
1 parent b09efb0 commit 22b97b0

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

csrc/fused_moe/cutlass_backend/cutlass_fused_moe_instantiation.cu

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,12 @@ template class CutlassMoeFCRunner<__nv_fp8_e4m3, __nv_fp4_e2m1, __nv_bfloat16, _
5454
template class CutlassMoeFCRunner<__nv_bfloat16, __nv_fp4_e2m1>;
5555
#endif
5656
#endif
57+
58+
// Explicit instantiations for finalizeMoeRoutingKernelLauncher to ensure
59+
// symbols are emitted in the JIT library for common data types.
60+
INSTANTIATE_FINALIZE_MOE_ROUTING(half, half, half);
61+
INSTANTIATE_FINALIZE_MOE_ROUTING(float, float, float);
62+
#ifdef ENABLE_BF16
63+
INSTANTIATE_FINALIZE_MOE_ROUTING(__nv_bfloat16, __nv_bfloat16, __nv_bfloat16);
64+
#endif
5765
} // namespace tensorrt_llm::kernels::cutlass_kernels

csrc/fused_moe/cutlass_backend/cutlass_fused_moe_kernels.cuh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2923,8 +2923,9 @@ void CutlassMoeFCRunner<T, WeightType, OutputType, InputType, ScaleBiasType, Ena
29232923
finalizeMoeRoutingKernelLauncher<OutputType, UnfusedGemmOutputType>(
29242924
static_cast<UnfusedGemmOutputType const*>(gemm_output), final_output, fc2_expert_biases,
29252925
unpermuted_final_scales, unpermuted_row_to_permuted_row, permuted_row_to_unpermuted_row,
2926-
token_selected_experts, expert_first_token_offset, num_rows, hidden_size, k,
2927-
num_experts_per_node, parallelism_config, enable_alltoall, enable_pdl, stream);
2926+
token_selected_experts, expert_first_token_offset, num_rows, hidden_size,
2927+
unpadded_hidden_size, k, num_experts_per_node, parallelism_config, enable_alltoall,
2928+
enable_pdl, stream);
29282929
}
29292930

29302931
template <class T, class WeightType, class OutputType, class InputType, class ScaleBiasType,

csrc/nv_internal/tensorrt_llm/kernels/cutlass_kernels/include/moe_util_kernels.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ void finalizeMoeRoutingKernelLauncher(
7272
GemmOutputType const* expanded_permuted_rows, OutputType* reduced_unpermuted_output,
7373
ScaleBiasType const* bias, float const* final_scales, int const* unpermuted_row_to_permuted_row,
7474
int const* permuted_row_to_unpermuted_row, int const* token_selected_experts,
75-
int64_t const* expert_first_token_offset, int64_t const num_rows, int64_t const cols,
76-
int64_t const experts_per_token, int64_t const num_experts_per_node,
77-
MOEParallelismConfig parallelism_config, bool const enable_alltoall, bool enable_pdl,
78-
cudaStream_t stream);
75+
int64_t const* expert_first_token_offset, int64_t const num_rows, int64_t const padded_cols,
76+
int64_t const unpadded_cols, int64_t const experts_per_token,
77+
int64_t const num_experts_per_node, MOEParallelismConfig parallelism_config,
78+
bool const enable_alltoall, bool enable_pdl, cudaStream_t stream);
7979

8080
} // namespace cutlass_kernels
8181
} // namespace tensorrt_llm::kernels

0 commit comments

Comments
 (0)