Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions vllm_gaudi/v1/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4814,6 +4814,15 @@ def _sync_moe_kernel_flags(module: torch.nn.Module):
runner = getattr(experts, "runner", None)
if runner is not None and hasattr(runner, "gate"):
runner.gate = None
# Refresh the cached gate ref captured at
# FusedMoE.__init__ to the post-INC block-level gate.
# The dp_size==1 fast path (patched_fused_moe_forward)
# falls back to runner._hpu_gate_ref when runner.gate
# is None; the pre-INC reference points at the now-
# replaced module and produced shape/dtype mismatches
# under fp8.
if block_gate is not None:
object.__setattr__(runner, "_hpu_gate_ref", block_gate)
Comment on lines +4824 to +4825

if id(experts) in self._detached_moe_gates:
self._detached_moe_gates.remove(id(experts))
Expand Down
Loading