From be7fb158851a1d766df416c7670a100ae6103140 Mon Sep 17 00:00:00 2001 From: qingjun Date: Fri, 13 Jun 2025 10:26:33 +0800 Subject: [PATCH 1/2] fix fp Signed-off-by: qingjun --- vllm/model_executor/models/minimax_text_01.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 02800449bda3..d82ff2971b14 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -856,7 +856,7 @@ def layer_fn(prefix): self._dtype = _dummy.dtype del _dummy - self.minimax_cache = MinimaxCacheManager(dtype=self._dtype, + self.minimax_cache = MinimaxCacheManager(dtype=torch.float32, cache_shape=self.cache_shape) rope_theta = getattr(config, "rope_theta", 10000) From 05306ebdcfe0fecee620cbcfe922ad8ef4b7f8df Mon Sep 17 00:00:00 2001 From: qingjun Date: Fri, 13 Jun 2025 10:40:07 +0800 Subject: [PATCH 2/2] fix lm_head float Signed-off-by: qingjun --- vllm/model_executor/models/minimax_text_01.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index d82ff2971b14..87480796ae98 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -1021,7 +1021,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: else: self.lm_head = PPMissingLayer() - + self.lm_head.float() flash_layer_count = sum(1 for attn_type in self.config.attn_type_list if attn_type == 1) self.kv_cache = [torch.tensor([]) for _ in range(flash_layer_count)] @@ -1054,7 +1054,7 @@ def forward(self, def compute_logits(self, hidden_states: torch.Tensor, sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, + logits = self.logits_processor(self.lm_head, hidden_states.float(), sampling_metadata) return logits