-
Notifications
You must be signed in to change notification settings - Fork 33.7k
Fix vllm cis #45139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix vllm cis #45139
Changes from 22 commits
f36ea56
013f76a
7a6f698
04fb689
1b53a3f
4895e1f
b3febee
411eb60
55809c8
01cdc60
0d8628b
d3484b7
60fd4a1
98ae737
814c3a8
0889375
8ea59cb
1852250
9a77627
db0529e
1d81931
a104d58
30f6d73
12bf667
907402a
f924677
4cd3b43
ae4860e
c8985c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import inspect | ||
| import math | ||
| import warnings | ||
| from collections.abc import Callable | ||
|
|
@@ -626,10 +627,47 @@ def _compute_llama3_parameters( | |
| return inv_freq_llama, attention_factor | ||
|
|
||
|
|
||
| def _compute_default_rope_parameters( | ||
| config: Optional["PreTrainedConfig"] = None, | ||
| device: Optional["torch.device"] = None, | ||
| seq_len: int | None = None, | ||
| layer_type: str | None = None, | ||
| ) -> tuple["torch.Tensor", float]: | ||
| """ | ||
| Computes the inverse frequencies for the default RoPE implementation (no scaling). | ||
|
|
||
| Args: | ||
| config ([`~transformers.PreTrainedConfig`]): | ||
| The model configuration. | ||
| device (`torch.device`): | ||
| The device to use for initialization of the inverse frequencies. | ||
| seq_len (`int`, *optional*): | ||
| The current sequence length. Unused for this type of RoPE. | ||
| layer_type (`str`, *optional*): | ||
| The layer type for per-layer rope configs. | ||
|
|
||
| Returns: | ||
| Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the | ||
| post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE). | ||
| """ | ||
| config.standardize_rope_params() | ||
| rope_parameters_dict = config.rope_parameters[layer_type] if layer_type is not None else config.rope_parameters | ||
|
|
||
| base = rope_parameters_dict.get("rope_theta", getattr(config, "rope_theta", config.default_theta)) | ||
| partial_rotary_factor = rope_parameters_dict.get("partial_rotary_factor", 1.0) | ||
| head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads | ||
| dim = int(head_dim * partial_rotary_factor) | ||
| attention_factor = 1.0 # Unused in this type of RoPE | ||
|
|
||
| inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.int64).to(device=device, dtype=torch.float) / dim)) | ||
| return inv_freq, attention_factor | ||
|
|
||
|
|
||
| # This maps the "rope_type" string field in rope config to the corresponding function to compute the RoPE parameters | ||
| # from the model config. You can append new {'rope_type': callable} pairs to this rope_parameters to enable custom RoPE | ||
| # parameterizations, as long as the callable has the same signature. | ||
| ROPE_INIT_FUNCTIONS: dict[str, Callable[..., tuple["torch.Tensor", float]]] = { | ||
| "default": _compute_default_rope_parameters, | ||
| "linear": _compute_linear_scaling_rope_parameters, | ||
| "dynamic": _compute_dynamic_ntk_parameters, | ||
| "yarn": _compute_yarn_parameters, | ||
|
|
@@ -771,10 +809,25 @@ def standardize_rope_params(self): | |
|
|
||
| self.rope_parameters = rope_parameters | ||
|
|
||
| def validate_rope(self: "PreTrainedConfig"): | ||
| def validate_rope(self: "PreTrainedConfig", **kwargs): | ||
| """ | ||
| Validate the RoPE config arguments, given a `"PreTrainedConfig"` object | ||
|
|
||
| Note: the `ignore_keys` keyword argument is accepted for backward compatibility with external libraries | ||
| (e.g. vllm) but is deprecated. Set `config.ignore_keys_at_rope_validation` directly instead. | ||
| """ | ||
| if kwargs: | ||
| import warnings | ||
|
|
||
| warnings.warn( | ||
| "Passing keyword arguments to `validate_rope()` is deprecated. " | ||
| "Set `config.ignore_keys_at_rope_validation` directly instead.", | ||
| FutureWarning, | ||
| stacklevel=2, | ||
| ) | ||
| ignore_keys = kwargs.pop("ignore_keys", None) | ||
| if ignore_keys is not None: | ||
| self.ignore_keys_at_rope_validation = self.ignore_keys_at_rope_validation | ignore_keys | ||
| # Don't validate if no rope_parameters found (`None`) or if it's an empty dict | ||
| # Note that validation runs every time a new config is created, even if config is non-RoPE | ||
| rope_parameters_dict = getattr(self, "rope_parameters", None) | ||
|
|
@@ -800,11 +853,20 @@ def validate_rope(self: "PreTrainedConfig"): | |
| f"Missing validation function in 'RotaryEmbeddingConfigMixin' for 'rope_type'='{rope_type}'" | ||
| ) | ||
|
|
||
| # Override __signature__ so that @strict dataclass validation (huggingface_hub) sees only `self`. | ||
| # The method still accepts **kwargs for backward compatibility with external callers (e.g. vllm). | ||
| validate_rope.__signature__ = inspect.Signature( | ||
| [inspect.Parameter("self", inspect.Parameter.POSITIONAL_OR_KEYWORD)] | ||
| ) | ||
|
|
||
| def _validate_default_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "rope_theta"} | ||
| required_keys = {"rope_type"} | ||
| optional_keys = {"rope_theta"} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. likely needs to be done elsewhere then too? I think all need rope theta?
Comment on lines
+805
to
+806
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, this kinda defeats the point of validation because a RoPE dict with no theta isn't valid for our modules
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah but we always default to default_theta if its not there no?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. validation always happens after teh defaults are set, so ideally it shouldn't raise an error. Do we know why the theta was missing? |
||
| received_keys = set(rope_parameters.keys()) | ||
| rope_type = rope_parameters["rope_type"] | ||
| self._check_received_keys(rope_type, received_keys, required_keys, ignore_keys=ignore_keys) | ||
| self._check_received_keys( | ||
| rope_type, received_keys, required_keys, optional_keys=optional_keys, ignore_keys=ignore_keys | ||
| ) | ||
|
|
||
| def _validate_linear_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "factor", "rope_theta"} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3618,7 +3618,10 @@ def get_init_context( | |
| elif is_quantized: | ||
| init_contexts.extend([torch.device("meta"), set_quantized_state()]) | ||
| else: | ||
| init_contexts.append(torch.device("meta")) | ||
| # meta_device_safe_creation_ops patches torch.linspace to default to CPU | ||
| # so that custom models calling .item() during __init__ (e.g. drop-path | ||
| # schedules) don't crash on meta tensors. | ||
| init_contexts.extend([torch.device("meta"), init.meta_device_safe_creation_ops()]) | ||
|
|
||
| return init_contexts | ||
|
|
||
|
|
@@ -4612,7 +4615,7 @@ def mark_tied_weights_as_initialized(self, loading_info): | |
| later as they will be tied (overwritten) anyway. | ||
| This is very important as most embeddings are tied, and they are huge params (vocabularies are often 256k), so | ||
| running inits on them is very costly.""" | ||
| for tied_param in self.all_tied_weights_keys.keys(): | ||
| for tied_param in getattr(self, "all_tied_weights_keys", {}).keys(): | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix remote code |
||
| param = self.get_parameter(tied_param) | ||
| param._is_hf_initialized = True | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remote code BC?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @zucchini-nlp
It is true that remote code won't have it, but we likely would also need to refactor a lot of models, seems risky; especially for models that do have a different default init so we need to check if some code exists first and then use it as fallback