[BugFix] Fix Granite model configuration (vllm-project#8216)

njhill · web-flow · commit baa5467547a7 · 2024-09-06T11:39:29.000+08:00
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -10,12 +10,16 @@
 
 from vllm.envs import VLLM_USE_MODELSCOPE
 from vllm.logger import init_logger
+# yapf conflicts with isort for this block
+# yapf: disable
 from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                              EAGLEConfig, ExaoneConfig,
-                                             InternVLChatConfig, JAISConfig,
-                                             MedusaConfig, MLPSpeculatorConfig,
-                                             MPTConfig, NemotronConfig,
-                                             RWConfig, UltravoxConfig)
+                                             GraniteConfig, InternVLChatConfig,
+                                             JAISConfig, MedusaConfig,
+                                             MLPSpeculatorConfig, MPTConfig,
+                                             NemotronConfig, RWConfig,
+                                             UltravoxConfig)
+# yapf: enable
 from vllm.transformers_utils.utils import check_gguf_file
 
 if VLLM_USE_MODELSCOPE:
@@ -39,6 +43,9 @@
     "internvl_chat": InternVLChatConfig,
     "nemotron": NemotronConfig,
     "ultravox": UltravoxConfig,
+    # Granite can be removed from here once we have upgraded to
+    # transformers 4.45+
+    "granite": GraniteConfig,
 }
 
 for name, cls in _CONFIG_REGISTRY.items():
@@ -62,29 +69,36 @@ def get_config(
         kwargs["gguf_file"] = Path(model).name
         model = Path(model).parent
 
-    try:
-        config = AutoConfig.from_pretrained(
-            model,
-            trust_remote_code=trust_remote_code,
-            revision=revision,
-            code_revision=code_revision,
-            **kwargs)
-    except ValueError as e:
-        if (not trust_remote_code and
-                "requires you to execute the configuration file" in str(e)):
-            err_msg = (
-                "Failed to load the model config. If the model is a custom "
-                "model not yet available in the HuggingFace transformers "
-                "library, consider setting `trust_remote_code=True` in LLM "
-                "or using the `--trust-remote-code` flag in the CLI.")
-            raise RuntimeError(err_msg) from e
-        else:
-            raise e
-    if config.model_type in _CONFIG_REGISTRY:
-        config_class = _CONFIG_REGISTRY[config.model_type]
+    config_dict, _ = PretrainedConfig.get_config_dict(
+        model, revision=revision, code_revision=code_revision, **kwargs)
+
+    # Use custom model class if it's in our registry
+    model_type = config_dict.get("model_type")
+    if model_type in _CONFIG_REGISTRY:
+        config_class = _CONFIG_REGISTRY[model_type]
         config = config_class.from_pretrained(model,
                                               revision=revision,
                                               code_revision=code_revision)
+    else:
+        try:
+            config = AutoConfig.from_pretrained(
+                model,
+                trust_remote_code=trust_remote_code,
+                revision=revision,
+                code_revision=code_revision,
+                **kwargs)
+        except ValueError as e:
+            if (not trust_remote_code
+                    and "requires you to execute the configuration file"
+                    in str(e)):
+                err_msg = (
+                    "Failed to load the model config. If the model is a custom "
+                    "model not yet available in the HuggingFace transformers "
+                    "library, consider setting `trust_remote_code=True` in LLM "
+                    "or using the `--trust-remote-code` flag in the CLI.")
+                raise RuntimeError(err_msg) from e
+            else:
+                raise e
 
     # Special architecture mapping check for GGUF models
     if is_gguf:
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
@@ -6,6 +6,7 @@
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
+from vllm.transformers_utils.configs.granite import GraniteConfig
 from vllm.transformers_utils.configs.internvl import InternVLChatConfig
 from vllm.transformers_utils.configs.jais import JAISConfig
 from vllm.transformers_utils.configs.medusa import MedusaConfig
@@ -27,4 +28,7 @@
     "MLPSpeculatorConfig",
     "NemotronConfig",
     "UltravoxConfig",
+    # Granite can be removed from here once we have upgraded to
+    # transformers 4.45+
+    "GraniteConfig",
 ]