-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Model] Allow loading from original Mistral format #8168
Changes from 17 commits
a36ff88
a5472c5
ab85a2e
e0264e0
0c1b115
fa44979
2c0bc8c
e33e127
ffdcdaf
f13b059
f4557bb
0fbf430
8eaac25
a8091a6
b9ad975
928e47a
990ee19
4a1044b
b3814d5
bbfb843
302397f
1fc792e
0428eaa
6efb51c
5f05f1e
402d77c
a79832d
d46c359
5f41771
516b84a
43d051c
c947a16
7679950
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -119,35 +119,37 @@ class ModelConfig: | |
override default neuron config that are specific to Neuron devices, | ||
this argument will be used to configure the neuron config that | ||
can not be gathered from the vllm arguments. | ||
load_params_config: Load the config from mistral format | ||
(params.json) instead of config.json. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you made some good points on supporting checkpoint formats other than HF. Given that assumption, I think it would be best to future-proof the interface of config overrides. I would like to propose changing this parameter to something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes a lot of sense! |
||
""" | ||
|
||
def __init__( | ||
self, | ||
model: str, | ||
tokenizer: str, | ||
tokenizer_mode: str, | ||
trust_remote_code: bool, | ||
dtype: Union[str, torch.dtype], | ||
seed: int, | ||
revision: Optional[str] = None, | ||
code_revision: Optional[str] = None, | ||
rope_scaling: Optional[dict] = None, | ||
rope_theta: Optional[float] = None, | ||
tokenizer_revision: Optional[str] = None, | ||
max_model_len: Optional[int] = None, | ||
spec_target_max_model_len: Optional[int] = None, | ||
quantization: Optional[str] = None, | ||
quantization_param_path: Optional[str] = None, | ||
enforce_eager: Optional[bool] = None, | ||
max_context_len_to_capture: Optional[int] = None, | ||
max_seq_len_to_capture: Optional[int] = None, | ||
max_logprobs: int = 20, | ||
disable_sliding_window: bool = False, | ||
skip_tokenizer_init: bool = False, | ||
served_model_name: Optional[Union[str, List[str]]] = None, | ||
limit_mm_per_prompt: Optional[Mapping[str, int]] = None, | ||
use_async_output_proc: bool = True, | ||
override_neuron_config: Optional[Dict[str, Any]] = None) -> None: | ||
def __init__(self, | ||
model: str, | ||
tokenizer: str, | ||
tokenizer_mode: str, | ||
trust_remote_code: bool, | ||
dtype: Union[str, torch.dtype], | ||
seed: int, | ||
revision: Optional[str] = None, | ||
code_revision: Optional[str] = None, | ||
rope_scaling: Optional[dict] = None, | ||
rope_theta: Optional[float] = None, | ||
tokenizer_revision: Optional[str] = None, | ||
max_model_len: Optional[int] = None, | ||
spec_target_max_model_len: Optional[int] = None, | ||
quantization: Optional[str] = None, | ||
quantization_param_path: Optional[str] = None, | ||
enforce_eager: Optional[bool] = None, | ||
max_context_len_to_capture: Optional[int] = None, | ||
max_seq_len_to_capture: Optional[int] = None, | ||
max_logprobs: int = 20, | ||
disable_sliding_window: bool = False, | ||
skip_tokenizer_init: bool = False, | ||
served_model_name: Optional[Union[str, List[str]]] = None, | ||
limit_mm_per_prompt: Optional[Mapping[str, int]] = None, | ||
use_async_output_proc: bool = True, | ||
override_neuron_config: Optional[Dict[str, Any]] = None, | ||
load_params_config: bool = False) -> None: | ||
self.model = model | ||
self.tokenizer = tokenizer | ||
self.tokenizer_mode = tokenizer_mode | ||
|
@@ -174,7 +176,8 @@ def __init__( | |
self.skip_tokenizer_init = skip_tokenizer_init | ||
|
||
self.hf_config = get_config(self.model, trust_remote_code, revision, | ||
code_revision, rope_scaling, rope_theta) | ||
code_revision, rope_scaling, rope_theta, | ||
load_params_config) | ||
self.hf_text_config = get_hf_text_config(self.hf_config) | ||
self.hf_image_processor_config = get_hf_image_processor_config( | ||
self.model, revision) | ||
|
@@ -744,6 +747,7 @@ class LoadFormat(str, enum.Enum): | |
SHARDED_STATE = "sharded_state" | ||
GGUF = "gguf" | ||
BITSANDBYTES = "bitsandbytes" | ||
MISTRAL = "mistral" | ||
|
||
|
||
@dataclass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
import contextlib | ||
import json | ||
from pathlib import Path | ||
from typing import Any, Dict, Optional, Type, Union | ||
|
||
from huggingface_hub import hf_hub_download | ||
from transformers import GenerationConfig, PretrainedConfig | ||
from transformers.models.auto.image_processing_auto import ( | ||
get_image_processor_config) | ||
|
@@ -53,22 +55,26 @@ def get_config( | |
code_revision: Optional[str] = None, | ||
rope_scaling: Optional[dict] = None, | ||
rope_theta: Optional[float] = None, | ||
load_from_params: bool = False, | ||
**kwargs, | ||
) -> PretrainedConfig: | ||
|
||
# Separate model folder from file path for GGUF models | ||
|
||
is_gguf = check_gguf_file(model) | ||
if is_gguf: | ||
kwargs["gguf_file"] = Path(model).name | ||
model = Path(model).parent | ||
|
||
try: | ||
config = AutoConfig.from_pretrained( | ||
model, | ||
trust_remote_code=trust_remote_code, | ||
revision=revision, | ||
code_revision=code_revision, | ||
**kwargs) | ||
if load_from_params: | ||
config = load_params_config(model, revision) | ||
else: | ||
config = AutoConfig.from_pretrained( | ||
model, | ||
trust_remote_code=trust_remote_code, | ||
revision=revision, | ||
code_revision=code_revision, | ||
**kwargs) | ||
except ValueError as e: | ||
if (not trust_remote_code and | ||
"requires you to execute the configuration file" in str(e)): | ||
|
@@ -104,6 +110,54 @@ def get_config( | |
return config | ||
|
||
|
||
def load_params_config(model, revision) -> PretrainedConfig: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason why you have this new config? It seems to have the same information you would have in the config.json, just named differently There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main reason is because the original format is always stored in params.json which accompanies the consolidated.safetensors checkpoints. Guess there are two problems with config.json
|
||
# This function loads a params.json config which | ||
# should be used when loading models in mistral format | ||
|
||
config_file_name = "params.json" | ||
|
||
config_path = Path(model) / config_file_name | ||
|
||
if not config_path.is_file(): | ||
config_path = Path( | ||
hf_hub_download(model, config_file_name, revision=revision)) | ||
|
||
with open(config_path, 'r') as file: | ||
config_dict = json.load(file) | ||
|
||
config_mapping = { | ||
"dim": "hidden_size", | ||
"norm_eps": "rms_norm_eps", | ||
"n_kv_heads": "num_key_value_heads", | ||
"n_layers": "num_hidden_layers", | ||
"n_heads": "num_attention_heads", | ||
"hidden_dim": "intermediate_size", | ||
} | ||
|
||
def recurse_elems(elem: Any): | ||
if isinstance(elem, dict): | ||
config_dict = {} | ||
for key, value in elem.items(): | ||
key = config_mapping.get(key, key) | ||
config_dict[key] = recurse_elems(value) | ||
return PretrainedConfig(**config_dict) | ||
else: | ||
return elem | ||
|
||
config_dict["model_type"] = config_dict.get("model_type", "transformer") | ||
config_dict["hidden_act"] = config_dict.get("activation", "silu") | ||
config_dict["tie_word_embeddings"] = config_dict.get( | ||
"tie_embeddings", False) | ||
|
||
if config_dict["model_type"] == "transformer": | ||
if "moe" in config_dict: | ||
config_dict["architectures"] = ["MixtralForCausalLM"] | ||
else: | ||
config_dict["architectures"] = ["MistralForCausalLM"] | ||
|
||
return recurse_elems(config_dict) | ||
|
||
|
||
def get_hf_image_processor_config( | ||
model: Union[str, Path], | ||
revision: Optional[str] = None, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be best if you specifically made a test for "mistralai/Mistral-7B-Instruct-v0.3" where you test the model load with both "safetensors" and "mistral" (and HF to just ensure reference) since both checkpoint formats are present on that model card.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense! Thanks - will take care of it in a bit