Skip to content

Commit 270087d

Browse files
committed
apply suggestion
Signed-off-by: Siyuan Fu <[email protected]>
1 parent c210044 commit 270087d

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

csrc/trtllm_fmha_kernel_launcher.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ void trtllm_paged_attention_launcher(
120120
runner_params.vStrideBatch = kv_stride_batch;
121121
runner_params.mNumPagesInMemPool = num_pages_in_mem_pool;
122122
runner_params.stream = stream;
123+
// the scaleSoftmaxLog2Ptr and outputScalePtr have higher priority than the scaleSoftmaxLog2 and
124+
// outputScale. if they are not nullptr, then scaleSoftmaxLog2 and outputScale will be ignored
123125
runner_params.outputScale = bmm2_scale;
124126
runner_params.outputScalePtr = bmm2_scale_ptr;
125127
runner_params.scaleSoftmaxLog2 = bmm1_scale * M_LOG2E;
@@ -405,6 +407,8 @@ void trtllm_ragged_attention_launcher(
405407
runner_params.mQkvLayout = QkvLayout::SeparateQkv;
406408
runner_params.mMultiProcessorCount = sm_count;
407409
runner_params.stream = stream;
410+
// the scaleSoftmaxLog2Ptr and outputScalePtr have higher priority than the scaleSoftmaxLog2 and
411+
// outputScale. if they are not nullptr, then scaleSoftmaxLog2 and outputScale will be ignored
408412
runner_params.outputScale = bmm2_scale;
409413
runner_params.outputScalePtr = bmm2_scale_ptr;
410414
runner_params.scaleSoftmaxLog2 = bmm1_scale * M_LOG2E;

0 commit comments

Comments
 (0)