File tree 4 files changed +23
-5
lines changed
4 files changed +23
-5
lines changed Original file line number Diff line number Diff line change 16
16
from vllm .executor .executor_base import ExecutorBase
17
17
from vllm .logger import init_logger
18
18
from vllm .model_executor .layers .quantization import QUANTIZATION_METHODS
19
+ from vllm .transformers_utils .utils import check_gguf_file
19
20
from vllm .utils import FlexibleArgumentParser
20
21
21
22
if TYPE_CHECKING :
@@ -753,7 +754,7 @@ def from_cli_args(cls, args: argparse.Namespace):
753
754
754
755
def create_engine_config (self ) -> EngineConfig :
755
756
# gguf file needs a specific model loader and doesn't use hf_repo
756
- if self .model . endswith ( ".gguf" ):
757
+ if check_gguf_file ( self .model ):
757
758
self .quantization = self .load_format = "gguf"
758
759
759
760
# bitsandbytes quantization needs a specific model loader
Original file line number Diff line number Diff line change 16
16
MedusaConfig , MLPSpeculatorConfig ,
17
17
MPTConfig , NemotronConfig ,
18
18
RWConfig , UltravoxConfig )
19
+ from vllm .transformers_utils .utils import check_gguf_file
19
20
20
21
if VLLM_USE_MODELSCOPE :
21
22
from modelscope import AutoConfig
@@ -56,7 +57,7 @@ def get_config(
56
57
) -> PretrainedConfig :
57
58
58
59
# Separate model folder from file path for GGUF models
59
- is_gguf = Path (model ). is_file () and Path ( model ). suffix == ".gguf"
60
+ is_gguf = check_gguf_file (model )
60
61
if is_gguf :
61
62
kwargs ["gguf_file" ] = Path (model ).name
62
63
model = Path (model ).parent
@@ -112,7 +113,7 @@ def get_hf_image_processor_config(
112
113
if VLLM_USE_MODELSCOPE :
113
114
return dict ()
114
115
# Separate model folder from file path for GGUF models
115
- if Path (model ). is_file () and Path ( model ). suffix == ".gguf" :
116
+ if check_gguf_file (model ):
116
117
model = Path (model ).parent
117
118
return get_image_processor_config (model , revision = revision , ** kwargs )
118
119
Original file line number Diff line number Diff line change 12
12
from vllm .lora .request import LoRARequest
13
13
from vllm .transformers_utils .tokenizers import (BaichuanTokenizer ,
14
14
MistralTokenizer )
15
+ from vllm .transformers_utils .utils import check_gguf_file
15
16
from vllm .utils import make_async
16
17
17
18
logger = init_logger (__name__ )
@@ -96,8 +97,7 @@ def get_tokenizer(
96
97
kwargs ["truncation_side" ] = "left"
97
98
98
99
# Separate model folder from file path for GGUF models
99
- is_gguf = Path (tokenizer_name ).is_file () and Path (
100
- tokenizer_name ).suffix == ".gguf"
100
+ is_gguf = check_gguf_file (tokenizer_name )
101
101
if is_gguf :
102
102
kwargs ["gguf_file" ] = Path (tokenizer_name ).name
103
103
tokenizer_name = Path (tokenizer_name ).parent
Original file line number Diff line number Diff line change
1
+ from os import PathLike
2
+ from pathlib import Path
3
+ from typing import Union
4
+
5
+
6
+ def check_gguf_file (model : Union [str , PathLike ]) -> bool :
7
+ """Check if the file is a GGUF model."""
8
+ model = Path (model )
9
+ if not model .is_file ():
10
+ return False
11
+ elif model .suffix == ".gguf" :
12
+ return True
13
+
14
+ with open (model , "rb" ) as f :
15
+ header = f .read (4 )
16
+ return header == b"GGUF"
You can’t perform that action at this time.
0 commit comments