diff --git a/python/sglang/srt/models/qwen2_eagle.py b/python/sglang/srt/models/qwen2_eagle.py index 4b4c0ec41cdc..6c2b982c1994 100644 --- a/python/sglang/srt/models/qwen2_eagle.py +++ b/python/sglang/srt/models/qwen2_eagle.py @@ -129,7 +129,7 @@ def __init__( self.lm_head = self.model.embed_tokens else: self.lm_head = ParallelLMHead( - config.vocab_size, + getattr(config, "hot_vocab_size", config.vocab_size), config.hidden_size, quant_config=quant_config, prefix=add_prefix("lm_head", prefix),