Skip to content

Commit 6a42960

Browse files
authored
add kernel config tuning way to get better performance. (#681)
1 parent e3eea7d commit 6a42960

18 files changed

+2297
-260
lines changed

lightllm/common/all_kernel_configs/__init__.py

Whitespace-only changes.

lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ def experts(self, input_tensor, router_logits, top_k, renormalize, use_grouped_t
5050
w1, w1_scale = self.w1
5151
w2, w2_scale = self.w2
5252
use_fp8_w8a8 = self.quant_method is not None
53-
fused_experts(
53+
54+
from lightllm.common.fused_moe.grouped_fused_moe import fused_experts_impl
55+
56+
fused_experts_impl(
5457
hidden_states=input_tensor,
5558
w1=w1,
5659
w2=w2,
@@ -61,6 +64,7 @@ def experts(self, input_tensor, router_logits, top_k, renormalize, use_grouped_t
6164
w1_scale=w1_scale,
6265
w2_scale=w2_scale,
6366
)
67+
return
6468

6569
def _fuse(self):
6670
with self.lock:

0 commit comments

Comments
 (0)