From 55beac2e011df688a21680a342c60362a75f235e Mon Sep 17 00:00:00 2001 From: gaoziyuan <88373061+gzy19990617@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:05:07 +0800 Subject: [PATCH 1/2] fix --- .../model_executor/layers/moe/fused_moe_backend_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py index 9078021fdf..2529f35807 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py @@ -168,8 +168,8 @@ def apply( Paddle Cutlass compute Fused MoE. """ if layer.ep_size > 1: - if layer.fd_config.model_config.moe_phase.phase == "prefill" and layer.layer_idx == 0: - if layer.fd_config.scheduler_config.splitwise_role == "mixed": + if layer.fd_config.model_config.moe_phase.phase == "prefill": + if layer.fd_config.scheduler_config.splitwise_role == "mixed" and layer.layer_idx == 0: self.ep_prefill_runner.clean_low_latency_buffer() return self.apply_ep_prefill(layer, x, gate) else: From cb4561aa6029294ee8e5d05a65be3621b509b955 Mon Sep 17 00:00:00 2001 From: gaoziyuan <88373061+gzy19990617@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:50:19 +0800 Subject: [PATCH 2/2] fix --- .../model_executor/layers/moe/fused_moe_backend_base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py index 2529f35807..2fcc787f3d 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_backend_base.py @@ -168,12 +168,13 @@ def apply( Paddle Cutlass compute Fused MoE. """ if layer.ep_size > 1: + is_moe_start_layer = layer.layer_idx == layer.fd_config.model_config.moe_layer_start_index if layer.fd_config.model_config.moe_phase.phase == "prefill": - if layer.fd_config.scheduler_config.splitwise_role == "mixed" and layer.layer_idx == 0: + if layer.fd_config.scheduler_config.splitwise_role == "mixed" and is_moe_start_layer: self.ep_prefill_runner.clean_low_latency_buffer() return self.apply_ep_prefill(layer, x, gate) else: - if layer.fd_config.scheduler_config.splitwise_role == "mixed" and layer.layer_idx == 0: + if layer.fd_config.scheduler_config.splitwise_role == "mixed" and is_moe_start_layer: self.ep_decoder_runner.clean_low_latency_buffer() return self.apply_ep_decode(layer, x, gate) else: