Skip to content

Commit 4ca65a9

Browse files
authored
[Core][Bugfix] Accept GGUF model without .gguf extension (vllm-project#8056)
1 parent e2b2aa5 commit 4ca65a9

File tree

4 files changed

+23
-5
lines changed

4 files changed

+23
-5
lines changed

vllm/engine/arg_utils.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from vllm.executor.executor_base import ExecutorBase
1717
from vllm.logger import init_logger
1818
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
19+
from vllm.transformers_utils.utils import check_gguf_file
1920
from vllm.utils import FlexibleArgumentParser
2021

2122
if TYPE_CHECKING:
@@ -753,7 +754,7 @@ def from_cli_args(cls, args: argparse.Namespace):
753754

754755
def create_engine_config(self) -> EngineConfig:
755756
# gguf file needs a specific model loader and doesn't use hf_repo
756-
if self.model.endswith(".gguf"):
757+
if check_gguf_file(self.model):
757758
self.quantization = self.load_format = "gguf"
758759

759760
# bitsandbytes quantization needs a specific model loader

vllm/transformers_utils/config.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
MedusaConfig, MLPSpeculatorConfig,
1717
MPTConfig, NemotronConfig,
1818
RWConfig, UltravoxConfig)
19+
from vllm.transformers_utils.utils import check_gguf_file
1920

2021
if VLLM_USE_MODELSCOPE:
2122
from modelscope import AutoConfig
@@ -56,7 +57,7 @@ def get_config(
5657
) -> PretrainedConfig:
5758

5859
# Separate model folder from file path for GGUF models
59-
is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
60+
is_gguf = check_gguf_file(model)
6061
if is_gguf:
6162
kwargs["gguf_file"] = Path(model).name
6263
model = Path(model).parent
@@ -112,7 +113,7 @@ def get_hf_image_processor_config(
112113
if VLLM_USE_MODELSCOPE:
113114
return dict()
114115
# Separate model folder from file path for GGUF models
115-
if Path(model).is_file() and Path(model).suffix == ".gguf":
116+
if check_gguf_file(model):
116117
model = Path(model).parent
117118
return get_image_processor_config(model, revision=revision, **kwargs)
118119

vllm/transformers_utils/tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from vllm.lora.request import LoRARequest
1313
from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
1414
MistralTokenizer)
15+
from vllm.transformers_utils.utils import check_gguf_file
1516
from vllm.utils import make_async
1617

1718
logger = init_logger(__name__)
@@ -96,8 +97,7 @@ def get_tokenizer(
9697
kwargs["truncation_side"] = "left"
9798

9899
# Separate model folder from file path for GGUF models
99-
is_gguf = Path(tokenizer_name).is_file() and Path(
100-
tokenizer_name).suffix == ".gguf"
100+
is_gguf = check_gguf_file(tokenizer_name)
101101
if is_gguf:
102102
kwargs["gguf_file"] = Path(tokenizer_name).name
103103
tokenizer_name = Path(tokenizer_name).parent

vllm/transformers_utils/utils.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from os import PathLike
2+
from pathlib import Path
3+
from typing import Union
4+
5+
6+
def check_gguf_file(model: Union[str, PathLike]) -> bool:
7+
"""Check if the file is a GGUF model."""
8+
model = Path(model)
9+
if not model.is_file():
10+
return False
11+
elif model.suffix == ".gguf":
12+
return True
13+
14+
with open(model, "rb") as f:
15+
header = f.read(4)
16+
return header == b"GGUF"

0 commit comments

Comments
 (0)