Add model_name parameter to Llava constructor

Luodian · kcz358 · commit 8aaa828108da · 2024-05-08T14:54:22.000Z
diff --git a/lmms_eval/models/llava.py b/lmms_eval/models/llava.py
@@ -57,6 +57,7 @@ def __init__(
         batch_size: Optional[Union[int, str]] = 1,
         trust_remote_code: Optional[bool] = False,
         revision=None,
+        model_name=None,
         attn_implementation=best_fit_attn_implementation,
         use_flash_attention_2=True,
         device_map="auto",
@@ -83,8 +84,20 @@ def __init__(
         llava_model_args["attn_implementation"] = attn_implementation
         if customized_config:
             llava_model_args["customized_config"] = customized_config
-        llava_model_args["use_flash_attention_2"] = False
-        self._tokenizer, self._model, self._image_processor, self._max_length = load_pretrained_model(pretrained, None, get_model_name_from_path(pretrained), device_map=self.device_map, **llava_model_args)
+        if attn_implementation is not None:
+            llava_model_args["attn_implementation"] = attn_implementation
+        if "use_flash_attention_2" in kwargs:
+            llava_model_args["use_flash_attention_2"] = kwargs["use_flash_attention_2"]
+
+        model_name = model_name if model_name is not None else get_model_name_from_path(pretrained)
+        try:
+            # Try to load the model with the multimodal argument
+            self._tokenizer, self._model, self._image_processor, self._max_length = load_pretrained_model(pretrained, None, model_name, device_map=self.device_map, **llava_model_args)
+        except TypeError:
+            # for older versions of LLaVA that don't have multimodal argument
+            llava_model_args.pop("multimodal", None)
+            self._tokenizer, self._model, self._image_processor, self._max_length = load_pretrained_model(pretrained, None, model_name, device_map=self.device_map, **llava_model_args)
+
         self._config = self._model.config
         self.model.eval()
         self.model.tie_weights()