diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 17ddd5edeced..14b1bdbca16c 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -196,6 +196,7 @@ def __init__( quant_config: QuantizationConfig | None = None, reduce_results: bool = True, is_sequence_parallel=False, + swiglu_limit: float | None = None, prefix: str = "", ) -> None: super().__init__() @@ -226,9 +227,14 @@ def __init__( f"Unsupported activation: {hidden_act}. Only silu is supported for now." ) self.act_fn = SiluAndMul() + self.swiglu_limit = swiglu_limit def forward(self, x): gate_up, _ = self.gate_up_proj(x) + if self.swiglu_limit is not None: + lim = float(self.swiglu_limit) + g, u = gate_up.chunk(2, dim=-1) + gate_up = torch.cat([g.clamp(max=lim), u.clamp(min=-lim, max=lim)], dim=-1) x = self.act_fn(gate_up) x, _ = self.down_proj(x) return x diff --git a/vllm/model_executor/models/deepseek_v4.py b/vllm/model_executor/models/deepseek_v4.py index 8f93958b8b83..00badce02b5a 100644 --- a/vllm/model_executor/models/deepseek_v4.py +++ b/vllm/model_executor/models/deepseek_v4.py @@ -167,6 +167,7 @@ def __init__( hidden_act=config.hidden_act, quant_config=quant_config, reduce_results=False, + swiglu_limit=self.swiglu_limit, prefix=f"{prefix}.shared_experts", )