Add load_model

lm-sys · Oct 2, 2023 · 56e1d4c · 56e1d4c
1 parent bde1118
commit 56e1d4c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -842,6 +842,7 @@ def get_conv_template(name: str) -> Conversation:
 
 # Mistral template
 register_conv_template(
+    # source: https://docs.mistral.ai/llm/mistral-instruct-v0.1#chat-template
     Conversation(
         name="mistral",
         system_template="",

diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -1257,13 +1257,16 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
 
 
 class MistralAdapter(BaseModelAdapter):
-    """The model adapter for Mistral models"""
+    """The model adapter for Mistral AI models"""
 
     def match(self, model_path: str):
         return "mistral" in model_path.lower()
 
     def load_model(self, model_path: str, from_pretrained_kwargs: dict):
-        raise NotImplementedError()
+        model, tokenizer = super().load_model(model_path, from_pretrained_kwargs)
+        model.config.eos_token_id = tokenizer.eos_token_id
+        model.config.pad_token_id = tokenizer.pad_token_id
+        return model, tokenizer
 
     def get_default_conv_template(self, model_path: str) -> Conversation:
         return get_conv_template("mistral")