From 1b1c84bfa32d3548945c5eb4d40bfb11f978adff Mon Sep 17 00:00:00 2001 From: elvischenv <219235043+elvischenv@users.noreply.github.com> Date: Wed, 3 Dec 2025 06:25:48 -0800 Subject: [PATCH] clean up eagle --- python/sglang/srt/configs/model_config.py | 1 - .../srt/models/mistral_large_3_eagle.py | 104 ------------------ python/sglang/srt/utils/mistral_utils.py | 8 -- 3 files changed, 113 deletions(-) delete mode 100644 python/sglang/srt/models/mistral_large_3_eagle.py diff --git a/python/sglang/srt/configs/model_config.py b/python/sglang/srt/configs/model_config.py index dd04b9295c2d..87fc66b63c6f 100644 --- a/python/sglang/srt/configs/model_config.py +++ b/python/sglang/srt/configs/model_config.py @@ -338,7 +338,6 @@ def _derive_model_shapes(self): or "DotsVLMForCausalLM" in self.hf_config.architectures or "MistralLarge3ForCausalLM" in self.hf_config.architectures or "PixtralForConditionalGeneration" in self.hf_config.architectures - or "MistralLarge3ForCausalLMEagle" in self.hf_config.architectures ): self.head_dim = 256 self.attention_arch = AttentionArch.MLA diff --git a/python/sglang/srt/models/mistral_large_3_eagle.py b/python/sglang/srt/models/mistral_large_3_eagle.py deleted file mode 100644 index e85a629e76f0..000000000000 --- a/python/sglang/srt/models/mistral_large_3_eagle.py +++ /dev/null @@ -1,104 +0,0 @@ -from typing import Optional - -import torch -from torch import nn -from transformers import PretrainedConfig - -from python.sglang.srt.layers.attention.nsa.utils import is_nsa_enable_prefill_cp -from sglang.srt.distributed import get_pp_group -from sglang.srt.layers.layernorm import RMSNorm -from sglang.srt.layers.linear import RowParallelLinear -from sglang.srt.layers.quantization.base_config import QuantizationConfig -from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding -from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors -from sglang.srt.models.deepseek_v2 import DeepseekV2DecoderLayer, DeepseekV2Model -from sglang.srt.models.mistral_large_3 import MistralLarge3ForCausalLM -from sglang.srt.utils import add_prefix - - -class MistralLarge3Model(DeepseekV2Model): - def __init__( - self, - config: PretrainedConfig, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ): - nn.Module.__init__(self) - - self.config = config - self.vocab_size = config.vocab_size - assert get_pp_group().world_size == 1 - self.pp_group = get_pp_group() - self.nsa_enable_prefill_cp = is_nsa_enable_prefill_cp() - - self.embed_tokens = VocabParallelEmbedding( - config.vocab_size, - config.hidden_size, - prefix=add_prefix("embed_tokens", prefix), - ) - - self.layers = nn.ModuleList( - [ - DeepseekV2DecoderLayer( - config=config, - prefix=add_prefix(prefix, f"layers.{i}"), - quant_config=quant_config, - layer_id=i, - ) - for i in range(self.config.num_hidden_layers) - ] - ) - self.start_layer = 0 - self.end_layer = self.config.num_hidden_layers - - self.fc = RowParallelLinear( - self.config.hidden_size * 2, - self.config.hidden_size, - bias=False, - quant_config=quant_config, - prefix=add_prefix(prefix, "fc"), - input_is_parallel=False, - ) - self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) - self.layers_to_capture = [] - - def forward( - self, - input_ids: torch.Tensor, - positions: torch.Tensor, - forward_batch: ForwardBatch, - input_embeds: torch.Tensor = None, - pp_proxy_tensors: Optional[PPProxyTensors] = None, - ) -> torch.Tensor: - if input_embeds is None: - input_embeds = self.embed_tokens(input_ids) - input_embeds, _ = self.fc( - torch.cat((input_embeds, forward_batch.spec_info.hidden_states), dim=-1) - ) - output = super().forward( - input_ids, positions, forward_batch, input_embeds, pp_proxy_tensors - ) - assert isinstance(output, torch.Tensor) - return output - - -class MistralLarge3ForCausalLMEagle(MistralLarge3ForCausalLM): - remapping = MistralLarge3ForCausalLM.remapping | { - r"eagle_linear\.weight": r"model.fc.weight", - r"eagle_linear\.qscale_act": r"model.fc.input_scale", - r"eagle_linear\.qscale_weight": r"model.fc.weight_scale", - } - - def __init__( - self, - *, - config: PretrainedConfig, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ): - config.quant_config = quant_config - self.model_cls = MistralLarge3Model - super().__init__(config=config, quant_config=quant_config, prefix=prefix) - - -EntryClass = [MistralLarge3ForCausalLMEagle] diff --git a/python/sglang/srt/utils/mistral_utils.py b/python/sglang/srt/utils/mistral_utils.py index 34372fb68d4e..e23abb53c5fd 100644 --- a/python/sglang/srt/utils/mistral_utils.py +++ b/python/sglang/srt/utils/mistral_utils.py @@ -70,7 +70,6 @@ def adapt_config_dict( "encoder_args" ) ) - is_eagle = "eagle" in model assert not (is_vision and is_audio), "Vision and audio are mutually exclusive" @@ -78,8 +77,6 @@ def adapt_config_dict( config_dict = _remap_mistral_vision_args(config_dict) if is_audio: config_dict = _remap_mistral_audio_args(config_dict) - if is_eagle: - config_dict = _remap_mistral_eagle_args(config_dict) config = PretrainedConfig.from_dict(config_dict) @@ -211,11 +208,6 @@ def _remap_mistral_audio_args(config: dict) -> dict: return config -def _remap_mistral_eagle_args(config: dict) -> dict: - config["architectures"] = ["MistralLarge3ForCausalLMEagle"] - return config - - def _remap_moe_args(config: dict) -> dict: moe_config_map = { "route_every_n": "moe_layer_freq",