fix conflict

jiahanc · jiahanc · commit e0efad86aca2 · 2025-10-13T19:07:20.000-07:00
Signed-off-by: jiahanc &lt;173873397+jiahanc@users.noreply.github.com&gt;
diff --git a/csrc/trtllm_fused_moe_kernel_launcher.cu b/csrc/trtllm_fused_moe_kernel_launcher.cu
@@ -355,6 +355,7 @@ void trtllm_fp8_block_scale_moe_launcher(
         << "routing_bias has incorrect shape.";
   }
 
+<<<<<<< HEAD
   if (n_group.has_value() && n_group.value() != 0) {
     TVM_FFI_ICHECK(static_cast<RoutingMethodType>(routing_method_type) ==
                    RoutingMethodType::DeepSeekV3)
@@ -382,6 +383,20 @@ void trtllm_fp8_block_scale_moe_launcher(
     TVM_FFI_ICHECK_EQ(top_k, 1)
         << "Current routing kernel (no groups, Llama4) only supports top_k=1.";
   }
+=======
+  //   if (n_group <= 0 || topk_group <= 0) {
+  //     TVM_FFI_ICHECK_EQ(top_k, 1) << "Current routing kernel (no groups) only supports top_k=1.";
+  //   } else {
+  //     TVM_FFI_ICHECK_LE(top_k, 8) << "Current routing kernel (with groups) only supports
+  //     top_k<=8."; TVM_FFI_ICHECK_LE(topk_group, 4)
+  //         << "Current routing kernel (with groups) only supports topk_group<=4.";
+  //     TVM_FFI_ICHECK_LE(topk_group, n_group) << "n_group must not be smaller than topk_group.";
+  //     TVM_FFI_ICHECK_EQ(num_experts % n_group, 0) << "num_experts must be divisible by n_group";
+  //     // This check ensures we have enough experts in the selected groups to handle the top_k
+  //     routing TVM_FFI_ICHECK_LT(top_k, (topk_group * num_experts / n_group))
+  //         << "top_k must be less than total number of experts in selected groups";
+  //   }
+>>>>>>> c9e42cacef0506c57777d8d8efcf859219529951
   TVM_FFI_ICHECK_EQ(num_experts % 4, 0)
       << "Routing kernel expects that num_experts must be divisible by 4";
   TVM_FFI_ICHECK_GT(num_experts, top_k) << "num_experts must be greater than top_k";
diff --git a/csrc/trtllm_fused_moe_routing_renormalize.cu b/csrc/trtllm_fused_moe_routing_renormalize.cu
@@ -464,6 +464,7 @@ void run(Data const& data, void* stream) {
   }
   cudaDeviceSynchronize();
   cudaError_t result = cudaGetLastError();
+  std::cout << "cudaGetLastError: " << cudaGetErrorString(result) << std::endl;
 }
 
 // void run(Data const& data, void* stream) {
diff --git a/tests/moe/test_trtllm_gen_fused_moe.py b/tests/moe/test_trtllm_gen_fused_moe.py
@@ -1836,9 +1836,9 @@ def cache_permute_indices():
     return _cache_permute_indices
 
 
-@pytest.mark.parametrize("num_tokens", [1, 8, 1024,512])
+@pytest.mark.parametrize("num_tokens", [1, 8, 1024])
 @pytest.mark.parametrize("hidden_size", [1024, 2048, 8192])
-@pytest.mark.parametrize("intermediate_size", [2048, 1024, 768, 384, 512])
+@pytest.mark.parametrize("intermediate_size", [2048, 1024, 5120, 768, 384])
 @pytest.mark.parametrize(
     "moe_impl",
     [
@@ -1905,8 +1905,8 @@ def cache_permute_indices():
         ),
         pytest.param(
             {
-                "num_experts": 256,
-                "top_k": 8,
+                "num_experts": 512,
+                "top_k": 10,
                 "padding": 8,
                 "n_groups": None,
                 "top_k_groups": None,

Original file line number	Diff line number	Diff line change
`@@ -464,6 +464,7 @@ void run(Data const& data, void* stream) {`
`464`	`464`	`}`
`465`	`465`	`cudaDeviceSynchronize();`
`466`	`466`	`cudaError_t result = cudaGetLastError();`
	`467`	`+ std::cout << "cudaGetLastError: " << cudaGetErrorString(result) << std::endl;`
`467`	`468`	`}`
`468`	`469`
`469`	`470`	`// void run(Data const& data, void* stream) {`