13
13
from vllm .model_executor .models import ModelRegistry
14
14
from vllm .platforms import current_platform
15
15
from vllm .tracing import is_otel_available , otel_import_error_traceback
16
- from vllm .transformers_utils .config import (get_config ,
16
+ from vllm .transformers_utils .config import (ConfigFormat , get_config ,
17
17
get_hf_image_processor_config ,
18
18
get_hf_text_config )
19
19
from vllm .utils import (STR_NOT_IMPL_ENC_DEC_CUDAGRAPH , GiB_bytes ,
@@ -121,35 +121,37 @@ class ModelConfig:
121
121
override default neuron config that are specific to Neuron devices,
122
122
this argument will be used to configure the neuron config that
123
123
can not be gathered from the vllm arguments.
124
+ config_format: The config format which shall be loaded.
125
+ Defaults to 'auto' which defaults to 'hf'.
124
126
"""
125
127
126
- def __init__ (
127
- self ,
128
- model : str ,
129
- tokenizer : str ,
130
- tokenizer_mode : str ,
131
- trust_remote_code : bool ,
132
- dtype : Union [ str , torch . dtype ] ,
133
- seed : int ,
134
- revision : Optional [str ] = None ,
135
- code_revision : Optional [str ] = None ,
136
- rope_scaling : Optional [dict ] = None ,
137
- rope_theta : Optional [float ] = None ,
138
- tokenizer_revision : Optional [str ] = None ,
139
- max_model_len : Optional [int ] = None ,
140
- spec_target_max_model_len : Optional [int ] = None ,
141
- quantization : Optional [str ] = None ,
142
- quantization_param_path : Optional [str ] = None ,
143
- enforce_eager : Optional [bool ] = None ,
144
- max_context_len_to_capture : Optional [int ] = None ,
145
- max_seq_len_to_capture : Optional [ int ] = None ,
146
- max_logprobs : int = 20 ,
147
- disable_sliding_window : bool = False ,
148
- skip_tokenizer_init : bool = False ,
149
- served_model_name : Optional [Union [str , List [ str ] ]] = None ,
150
- limit_mm_per_prompt : Optional [ Mapping [ str , int ]] = None ,
151
- use_async_output_proc : bool = True ,
152
- override_neuron_config : Optional [ Dict [ str , Any ]] = None ) -> None :
128
+ def __init__ (self ,
129
+ model : str ,
130
+ tokenizer : str ,
131
+ tokenizer_mode : str ,
132
+ trust_remote_code : bool ,
133
+ dtype : Union [ str , torch . dtype ] ,
134
+ seed : int ,
135
+ revision : Optional [ str ] = None ,
136
+ code_revision : Optional [str ] = None ,
137
+ rope_scaling : Optional [dict ] = None ,
138
+ rope_theta : Optional [float ] = None ,
139
+ tokenizer_revision : Optional [str ] = None ,
140
+ max_model_len : Optional [int ] = None ,
141
+ spec_target_max_model_len : Optional [int ] = None ,
142
+ quantization : Optional [str ] = None ,
143
+ quantization_param_path : Optional [str ] = None ,
144
+ enforce_eager : Optional [bool ] = None ,
145
+ max_context_len_to_capture : Optional [int ] = None ,
146
+ max_seq_len_to_capture : Optional [int ] = None ,
147
+ max_logprobs : int = 20 ,
148
+ disable_sliding_window : bool = False ,
149
+ skip_tokenizer_init : bool = False ,
150
+ served_model_name : Optional [ Union [ str , List [ str ]]] = None ,
151
+ limit_mm_per_prompt : Optional [Mapping [str , int ]] = None ,
152
+ use_async_output_proc : bool = True ,
153
+ override_neuron_config : Optional [ Dict [ str , Any ]] = None ,
154
+ config_format : ConfigFormat = ConfigFormat . AUTO ) -> None :
153
155
self .model = model
154
156
self .tokenizer = tokenizer
155
157
self .tokenizer_mode = tokenizer_mode
@@ -176,7 +178,8 @@ def __init__(
176
178
self .skip_tokenizer_init = skip_tokenizer_init
177
179
178
180
self .hf_config = get_config (self .model , trust_remote_code , revision ,
179
- code_revision , rope_scaling , rope_theta )
181
+ code_revision , rope_scaling , rope_theta ,
182
+ config_format )
180
183
self .hf_text_config = get_hf_text_config (self .hf_config )
181
184
self .hf_image_processor_config = get_hf_image_processor_config (
182
185
self .model , revision )
@@ -746,6 +749,7 @@ class LoadFormat(str, enum.Enum):
746
749
SHARDED_STATE = "sharded_state"
747
750
GGUF = "gguf"
748
751
BITSANDBYTES = "bitsandbytes"
752
+ MISTRAL = "mistral"
749
753
750
754
751
755
@dataclass
0 commit comments