-
Notifications
You must be signed in to change notification settings - Fork 33.8k
Fix vllm cis #45139
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix vllm cis #45139
Changes from 26 commits
f36ea56
013f76a
7a6f698
04fb689
1b53a3f
4895e1f
b3febee
411eb60
55809c8
01cdc60
0d8628b
d3484b7
60fd4a1
98ae737
814c3a8
0889375
8ea59cb
1852250
9a77627
db0529e
1d81931
a104d58
30f6d73
12bf667
907402a
f924677
4cd3b43
ae4860e
c8985c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -801,34 +801,44 @@ def validate_rope(self: "PreTrainedConfig"): | |
| ) | ||
|
|
||
| def _validate_default_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "rope_theta"} | ||
| required_keys = {"rope_type"} | ||
| optional_keys = {"rope_theta"} | ||
|
Comment on lines
+805
to
+806
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, this kinda defeats the point of validation because a RoPE dict with no theta isn't valid for our modules
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah but we always default to default_theta if its not there no?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. validation always happens after teh defaults are set, so ideally it shouldn't raise an error. Do we know why the theta was missing? |
||
| received_keys = set(rope_parameters.keys()) | ||
| rope_type = rope_parameters["rope_type"] | ||
| self._check_received_keys(rope_type, received_keys, required_keys, ignore_keys=ignore_keys) | ||
| self._check_received_keys( | ||
| rope_type, received_keys, required_keys, optional_keys=optional_keys, ignore_keys=ignore_keys | ||
| ) | ||
|
|
||
| def _validate_linear_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "factor", "rope_theta"} | ||
| required_keys = {"rope_type", "factor"} | ||
| optional_keys = {"rope_theta"} | ||
| received_keys = set(rope_parameters.keys()) | ||
| rope_type = rope_parameters["rope_type"] | ||
| self._check_received_keys(rope_type, received_keys, required_keys, ignore_keys=ignore_keys) | ||
| self._check_received_keys( | ||
| rope_type, received_keys, required_keys, optional_keys=optional_keys, ignore_keys=ignore_keys | ||
| ) | ||
|
|
||
| factor = rope_parameters["factor"] | ||
| if factor is None or not isinstance(factor, float) or factor < 1.0: | ||
| logger.warning(f"`rope_parameters`'s factor field must be a float >= 1, got {factor}") | ||
|
|
||
| def _validate_dynamic_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "factor", "rope_theta"} | ||
| required_keys = {"rope_type", "factor"} | ||
| optional_keys = {"rope_theta"} | ||
| received_keys = set(rope_parameters.keys()) | ||
| rope_type = rope_parameters["rope_type"] | ||
| self._check_received_keys(rope_type, received_keys, required_keys, ignore_keys=ignore_keys) | ||
| self._check_received_keys( | ||
| rope_type, received_keys, required_keys, optional_keys=optional_keys, ignore_keys=ignore_keys | ||
| ) | ||
|
|
||
| factor = rope_parameters["factor"] | ||
| if factor is None or not isinstance(factor, float) or factor < 1.0: | ||
| logger.warning(f"`rope_parameters`'s factor field must be a float >= 1, got {factor}") | ||
|
|
||
| def _validate_yarn_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "factor", "rope_theta", "original_max_position_embeddings"} | ||
| required_keys = {"rope_type", "factor", "original_max_position_embeddings"} | ||
| optional_keys = { | ||
| "rope_theta", | ||
| "attention_factor", | ||
| "beta_fast", | ||
| "beta_slow", | ||
|
|
@@ -878,8 +888,8 @@ def _validate_yarn_rope_parameters(self, rope_parameters: dict, ignore_keys: set | |
| ) | ||
|
|
||
| def _validate_longrope_rope_parameters(self, rope_parameters: dict, ignore_keys: set | None = None): | ||
| required_keys = {"rope_type", "short_factor", "long_factor", "rope_theta", "original_max_position_embeddings"} | ||
| optional_keys = {"attention_factor", "factor"} | ||
| required_keys = {"rope_type", "short_factor", "long_factor", "original_max_position_embeddings"} | ||
| optional_keys = {"rope_theta", "attention_factor", "factor"} | ||
| received_keys = set(rope_parameters.keys()) | ||
| rope_type = rope_parameters["rope_type"] | ||
| self._check_received_keys(rope_type, received_keys, required_keys, optional_keys, ignore_keys=ignore_keys) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3618,7 +3618,10 @@ def get_init_context( | |
| elif is_quantized: | ||
| init_contexts.extend([torch.device("meta"), set_quantized_state()]) | ||
| else: | ||
| init_contexts.append(torch.device("meta")) | ||
| # meta_device_safe_creation_ops patches torch.linspace to default to CPU | ||
| # so that custom models calling .item() during __init__ (e.g. drop-path | ||
| # schedules) don't crash on meta tensors. | ||
| init_contexts.extend([torch.device("meta"), init.meta_device_safe_creation_ops()]) | ||
|
|
||
| return init_contexts | ||
|
|
||
|
|
@@ -4612,7 +4615,7 @@ def mark_tied_weights_as_initialized(self, loading_info): | |
| later as they will be tied (overwritten) anyway. | ||
| This is very important as most embeddings are tied, and they are huge params (vocabularies are often 256k), so | ||
| running inits on them is very costly.""" | ||
| for tied_param in self.all_tied_weights_keys.keys(): | ||
| for tied_param in getattr(self, "all_tied_weights_keys", {}).keys(): | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix remote code |
||
| param = self.get_parameter(tied_param) | ||
| param._is_hf_initialized = True | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
likely needs to be done elsewhere then too? I think all need rope theta?