diff --git a/vllm/model_executor/layers/deepseek_v4_attention.py b/vllm/model_executor/layers/deepseek_v4_attention.py index 847c3eee55a8..759578f6eb84 100644 --- a/vllm/model_executor/layers/deepseek_v4_attention.py +++ b/vllm/model_executor/layers/deepseek_v4_attention.py @@ -352,7 +352,6 @@ def compressor_kv_score() -> torch.Tensor: return torch.mm( hidden_states, compressor.fused_wkv_wgate.weight.T, - out_dtype=torch.float32, ) aux_fns[0] = compressor_kv_score @@ -369,7 +368,6 @@ def indexer_compressor_kv_score() -> torch.Tensor: return torch.mm( hidden_states, indexer.compressor.fused_wkv_wgate.weight.T, - out_dtype=torch.float32, ) aux_fns[1] = indexer_weights_proj