From 69ebb8ce92474fe7c955fd92788318db36c311cd Mon Sep 17 00:00:00 2001 From: Amir Klein <203507526+amirkl94@users.noreply.github.com> Date: Tue, 20 Jan 2026 13:21:30 +0200 Subject: [PATCH] Bugfix: Pass router logits dtype in nemotron shared experts Signed-off-by: Amir Klein <203507526+amirkl94@users.noreply.github.com> --- vllm/model_executor/models/nemotron_h.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index aff1d5fd4107..4808934eba97 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -143,11 +143,12 @@ def __init__( self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe + router_logits_dtype = torch.float32 self.gate = ReplicatedLinear( config.hidden_size, config.n_routed_experts, bias=False, - params_dtype=torch.float32, + params_dtype=router_logits_dtype, quant_config=None, prefix=f"{prefix}.gate", ) @@ -207,6 +208,7 @@ def __init__( enable_eplb=self.enable_eplb, num_redundant_experts=self.n_redundant_experts, is_sequence_parallel=self.is_sequence_parallel, + router_logits_dtype=router_logits_dtype, ) if self.use_latent_moe: