We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0dcdd48 commit 60c880bCopy full SHA for 60c880b
vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
@@ -731,6 +731,7 @@ def apply(
731
if self.quant_config.use_int4_w4a16
732
else scalar_types.float4_e2m1f.id
733
), # works only for w4a16
734
+ apply_router_weight_on_input=apply_router_weight_on_input,
735
global_num_experts=global_num_experts,
736
activation=activation,
737
expert_map=expert_map,
0 commit comments