diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d98f57bc2d353..8dbe6504d21bd 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -16,6 +16,7 @@ from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import FlexibleArgumentParser if TYPE_CHECKING: @@ -753,7 +754,7 @@ def from_cli_args(cls, args: argparse.Namespace): def create_engine_config(self) -> EngineConfig: # gguf file needs a specific model loader and doesn't use hf_repo - if self.model.endswith(".gguf"): + if check_gguf_file(self.model): self.quantization = self.load_format = "gguf" # bitsandbytes quantization needs a specific model loader diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f3ac8d3178d4e..dfe83ddb731d4 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -16,6 +16,7 @@ MedusaConfig, MLPSpeculatorConfig, MPTConfig, NemotronConfig, RWConfig, UltravoxConfig) +from vllm.transformers_utils.utils import check_gguf_file if VLLM_USE_MODELSCOPE: from modelscope import AutoConfig @@ -56,7 +57,7 @@ def get_config( ) -> PretrainedConfig: # Separate model folder from file path for GGUF models - is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf" + is_gguf = check_gguf_file(model) if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent @@ -112,7 +113,7 @@ def get_hf_image_processor_config( if VLLM_USE_MODELSCOPE: return dict() # Separate model folder from file path for GGUF models - if Path(model).is_file() and Path(model).suffix == ".gguf": + if check_gguf_file(model): model = Path(model).parent return get_image_processor_config(model, revision=revision, **kwargs) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 2866975850db3..f9fb8d1e103b7 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -12,6 +12,7 @@ from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizers import (BaichuanTokenizer, MistralTokenizer) +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import make_async logger = init_logger(__name__) @@ -96,8 +97,7 @@ def get_tokenizer( kwargs["truncation_side"] = "left" # Separate model folder from file path for GGUF models - is_gguf = Path(tokenizer_name).is_file() and Path( - tokenizer_name).suffix == ".gguf" + is_gguf = check_gguf_file(tokenizer_name) if is_gguf: kwargs["gguf_file"] = Path(tokenizer_name).name tokenizer_name = Path(tokenizer_name).parent diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py new file mode 100644 index 0000000000000..7a9041b04fbb9 --- /dev/null +++ b/vllm/transformers_utils/utils.py @@ -0,0 +1,16 @@ +from os import PathLike +from pathlib import Path +from typing import Union + + +def check_gguf_file(model: Union[str, PathLike]) -> bool: + """Check if the file is a GGUF model.""" + model = Path(model) + if not model.is_file(): + return False + elif model.suffix == ".gguf": + return True + + with open(model, "rb") as f: + header = f.read(4) + return header == b"GGUF"