-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Fix MTP with Deepseek R1 Fp4 #7376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
65490a2
c653b1b
bf8af63
241d55f
9d6da0f
357bbf8
1baaaea
6c2c8a7
055ee31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2147,7 +2147,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal | |
| q_a_proj_weight = cached_a_proj[q_a_proj_name] | ||
| kv_a_proj_weight = cached_a_proj[kv_a_proj_name] | ||
| cat_dim = 0 | ||
| if ( | ||
| if self.quant_config is not None and ( | ||
| self.quant_config.get_name() == "awq" | ||
| or self.quant_config.get_name() == "moe_wna16" | ||
| ): | ||
|
||
|
|
@@ -2178,6 +2178,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal | |
| for scale in ["k_scale", "v_scale"]: | ||
| if scale in name: | ||
| name = name.replace(f"{scale[0]}_proj", "attn_mqa") | ||
| break | ||
|
||
| if name not in params_dict: | ||
| # modelopt ckpt contains not needed weights for MTP module: | ||
| # model.decoder.self_attn.attn_mqa.v_scale and | ||
| # model.decoder.self_attn.attn_mqa.k_scale | ||
| logger.warning(f"{name} not found in params_dict.") | ||
| continue | ||
|
||
| param = params_dict[name] | ||
| weight_loader = getattr( | ||
| param, "weight_loader", default_weight_loader | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.