-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[Bugfix] Fix the issue of the acceptance rate decline for Qwen3-30B-A3B-EAGLE3 #6138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -200,19 +200,49 @@ def load_model(self, model: nn.Module) -> None: | |
| ) | ||
| # If pp>1, the weights of mtp and the main model's embedding are not on the same device. | ||
| # check if mtp model use main model's embedding and LMhead | ||
| if hasattr(model, "model") and hasattr(model.model, "embed_tokens") and \ | ||
| torch.equal(self.model.model.embed_tokens.weight, | ||
| model.model.embed_tokens.weight): | ||
| logger.info( | ||
| "The EAGLE head shares the same vocab embedding" \ | ||
| " with the target model." | ||
| ) | ||
| self.model.model.embed_tokens = target_embed_tokens | ||
| share_embeddings = False | ||
| if hasattr(self.model, "has_own_embed_tokens"): | ||
| # EAGLE model | ||
| if not self.model.has_own_embed_tokens: | ||
| share_embeddings = True | ||
| logger.info( | ||
| "Detected EAGLE model without its own embed_tokens in the" | ||
| " checkpoint. Sharing target model embedding weights with the" | ||
| " draft model." | ||
| ) | ||
| elif ( | ||
| isinstance(target_embed_tokens.weight, torch.Tensor) | ||
| and isinstance(self.model.model.embed_tokens.weight, torch.Tensor) | ||
| # TODO: Offload to CPU for comparison to avoid extra NPU memory | ||
| # usage in CI testing environments with limited NPU memory | ||
| and torch.equal( | ||
| target_embed_tokens.weight.cpu(), | ||
| self.model.model.embed_tokens.weight.cpu(), | ||
| ) | ||
| ): | ||
| share_embeddings = True | ||
| logger.info( | ||
| "Detected EAGLE model with embed_tokens identical to the target" | ||
| " model. Sharing target model embedding weights with the draft" | ||
| " model." | ||
| ) | ||
| else: | ||
| logger.info( | ||
| "Detected EAGLE model with distinct embed_tokens weights. " | ||
| "Keeping separate embedding weights from the target model." | ||
| ) | ||
| else: | ||
| # MTP model | ||
| share_embeddings = True | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please follow the original logic.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, it's my mistake.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LGTM. |
||
| logger.info( | ||
| " The EAGLE head loaded its own vocab embedding" \ | ||
| " weights instead of sharing them with the target model." | ||
| "Detected MTP model. " | ||
| "Sharing target model embedding weights with the draft model." | ||
| ) | ||
|
|
||
| if share_embeddings: | ||
| if hasattr(self.model.model, "embed_tokens"): | ||
| del self.model.model.embed_tokens | ||
| self.model.model.embed_tokens = target_embed_tokens | ||
| else: | ||
| logger.info( | ||
| "Since PP > 1 or other reasons the model head loaded its own vocab embedding" \ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.