diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py index ef8cc7d17b21..b41700d9d82e 100644 --- a/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py +++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/flashinfer_nvlink_one_sided.py @@ -114,6 +114,7 @@ def prepare( payloads.append(a1q) if a1q_scale is not None: payloads.append(a1q_scale) + topk_ids_payload_index = len(payloads) payloads.append(topk_ids) payloads.append(topk_weights) @@ -122,6 +123,8 @@ def prepare( token_selected_experts=topk_ids, input_payloads=payloads, runtime_max_tokens_per_rank=self.runtime_max_tokens_per_rank, + invalid_token_expert_id=num_experts, + expert_id_payload_index=topk_ids_payload_index, ) if a1q_scale is not None: a1q_recv, a1q_scale_recv, topk_ids_recv, topk_weights_recv = recv_payloads