Review: format + micro-optimizations

am17an · am17an · commit 7a258bfa81dd · 2025-09-23T09:51:26.000+08:00
diff --git a/ggml/src/ggml-cuda/topk-moe.cu b/ggml/src/ggml-cuda/topk-moe.cu
@@ -32,13 +32,13 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float *
 #pragma unroll
     for (int i = 0; i < n_experts; i += WARP_SIZE) {
         const int expert        = i + threadIdx.x;
-        logits_r[i / WARP_SIZE] = expert < n_experts ? logits[expert] : -INFINITY;
+        logits_r[i / WARP_SIZE] = n_experts % WARP_SIZE == 0 || expert < n_experts ? logits[expert] : -INFINITY;
     }
 
-    float max_val = -INFINITY;
+    float max_val = logits_r[0];
 
 #pragma unroll
-    for (int i = 0; i < experts_per_thread; i++) {
+    for (int i = 1; i < experts_per_thread; i++) {
         const float val = logits_r[i];
         max_val         = max(val, max_val);
     }
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -4408,12 +4408,10 @@ struct test_argsort : public test_case {
 };
 
 struct test_topk_moe: public test_case {
-
     const std::array<int64_t, 4> ne;
     const int n_expert_used;
     test_topk_moe(std::array<int64_t, 4> ne = {10, 5, 1, 1}, int n_expert_used = 1)
-    : ne(ne),
-    n_expert_used(n_expert_used) {
+    : ne(ne), n_expert_used(n_expert_used) {
         GGML_ASSERT(n_expert_used <= ne[0]);
     }