Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,13 +965,17 @@ def does_token_look_special(self, token: str | bytes) -> bool:

return seems_special

def override_tokenizer_settings(self, tokenizer):
return tokenizer

# used for GPT-2 BPE and WordPiece vocabs
def get_vocab_base(self) -> tuple[list[str], list[int], str]:
tokens: list[str] = []
toktypes: list[int] = []

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
tokenizer = self.override_tokenizer_settings(tokenizer)
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
assert max(tokenizer.vocab.values()) < vocab_size

Expand Down Expand Up @@ -7456,16 +7460,28 @@ def prepare_tensors(self):
@ModelBase.register(
"DeepseekV2ForCausalLM",
"DeepseekV3ForCausalLM",
"DeepseekV32ForCausalLM",
"KimiVLForConditionalGeneration",
"YoutuForCausalLM",
"YoutuVLForConditionalGeneration"
)
class DeepseekV2Model(TextModel):
model_arch = gguf.MODEL_ARCH.DEEPSEEK2

def override_tokenizer_settings(self, tokenizer):
# override add_bos_token setting to get pre-tokenizer recognized
if self.hparams.get("model_type") == "deepseek_v32":
tokenizer.add_bos_token = True
return tokenizer

def set_vocab(self):
try:
self._set_vocab_gpt2()
# in DeepSeek V3.2 adding BOS token is disabled in tokenizer configuration
# instead the BOS token is added in encode_messages() Python code
# therefore we have to override this setting
if self.hparams.get("model_type") == "deepseek_v32":
self.gguf_writer.add_add_bos_token(True)
return
except Exception:
pass
Expand Down Expand Up @@ -7576,7 +7592,7 @@ def set_gguf_parameters(self):

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# skip vision tensors and remove "language_model." for Kimi-VL
if "vision_tower" in name or "multi_modal_projector" in name:
if "vision_tower" in name or "multi_modal_projector" in name or "self_attn.indexer" in name:
return []
if name.startswith("siglip2.") or name.startswith("merger."):
return []
Expand Down