From ef00f304dc323a6d2f8096998b7c07771df81fa2 Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Mon, 20 Oct 2025 20:28:43 +0800 Subject: [PATCH] add shared_head to prefix of SharedHead Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/models/deepseek_mtp.py | 4 +++- vllm/model_executor/models/glm4_moe_mtp.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 576977b00e61..2d2027b9a824 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -75,7 +75,9 @@ def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: topk_indices_buffer = None self.shared_head = SharedHead( - config=config, prefix=prefix, quant_config=quant_config + config=config, + prefix=maybe_prefix(prefix, "shared_head"), + quant_config=quant_config, ) self.mtp_block = DeepseekV2DecoderLayer( vllm_config, diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py index 9fb1be7ba45c..cb88c946013e 100644 --- a/vllm/model_executor/models/glm4_moe_mtp.py +++ b/vllm/model_executor/models/glm4_moe_mtp.py @@ -79,7 +79,9 @@ def __init__( self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.eh_proj = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False) self.shared_head = SharedHead( - config=config, prefix=prefix, quant_config=quant_config + config=config, + prefix=maybe_prefix(prefix, "shared_head"), + quant_config=quant_config, ) self.mtp_block = Glm4MoeDecoderLayer( config=config,