Skip to content

Commit 9285325

Browse files
authored
CUDA: fix bug in topk-moe softmax (#16711)
1 parent 03792ad commit 9285325

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/topk-moe.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float *
141141
wt_sum = warp_reduce_sum(wt_sum);
142142
const float inv_sum = 1.0f / wt_sum;
143143

144-
for (int i = threadIdx.x; i < n_expert_used; i += WARP_SIZE) {
144+
for (int i = 0; i < experts_per_thread; i++) {
145145
output_weights[i] *= inv_sum;
146146
}
147147
}

0 commit comments

Comments
 (0)