-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[Misc]Main2main to 0420 #8610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Misc]Main2main to 0420 #8610
Changes from all commits
a703c5c
066f590
8f8796b
a3d9883
6c6b91c
6e28c78
e6d2eae
3b5c6ea
7f497ba
bfb5ae3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -12,6 +12,7 @@ | |||||||||||||||||||||
| from vllm_ascend.attention.sfa_v1 import AscendSFAImpl, AscendSFAMetadata, AscendSFAMetadataBuilder | ||||||||||||||||||||||
| from vllm_ascend.attention.utils import AscendCommonAttentionMetadata, enabling_mlapo, split_decodes_and_prefills | ||||||||||||||||||||||
| from vllm_ascend.ops.triton.rope import rope_forward_triton_siso | ||||||||||||||||||||||
| from vllm_ascend.utils import vllm_version_is | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| M = TypeVar("M", bound=AscendSFAMetadata) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
@@ -385,7 +386,11 @@ def indexer_select_post_process( | |||||||||||||||||||||
| actual_seq_lengths_query: torch.Tensor, | ||||||||||||||||||||||
| actual_seq_lengths_key: torch.Tensor, | ||||||||||||||||||||||
| ): | ||||||||||||||||||||||
| weights, _ = self.weights_proj(x) | ||||||||||||||||||||||
| if vllm_version_is("0.19.0"): | ||||||||||||||||||||||
| weights, _ = self.weights_proj(x) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| kw, _ = self.wk_weights_proj(x) | ||||||||||||||||||||||
| weights = kw[:, self.head_dim :] | ||||||||||||||||||||||
|
Comment on lines
+389
to
+393
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for fused weights appears to be inverted. If v0.19.0 has fused weights, it should use
Suggested change
|
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| q_li, _ = self.wq_b(q_c) # [b,s,1536] @ [1536,64*128] = [b,s,64*128] | ||||||||||||||||||||||
| q_li = q_li.view(-1, self.n_head, self.head_dim) # [n_toks,64,128] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -55,6 +55,7 @@ | |||||||||||||||||||||
| enable_dsa_cp_with_o_proj_tp, | ||||||||||||||||||||||
| get_weight_prefetch_method, | ||||||||||||||||||||||
| maybe_trans_nz, | ||||||||||||||||||||||
| vllm_version_is, | ||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| from vllm_ascend.worker.npu_input_batch import NPUInputBatch | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
@@ -438,8 +439,12 @@ def __init__( | |||||||||||||||||||||
| self.n_head: int = self.indexer.n_head # 64 | ||||||||||||||||||||||
| self.head_dim: int = self.indexer.head_dim # 128 | ||||||||||||||||||||||
| self.wq_b = self.indexer.wq_b | ||||||||||||||||||||||
| self.wk = self.indexer.wk | ||||||||||||||||||||||
| self.weights_proj = self.indexer.weights_proj | ||||||||||||||||||||||
| # upstream ac3dac545 fused wk+weights_proj into wk_weights_proj | ||||||||||||||||||||||
| if vllm_version_is("0.19.0"): | ||||||||||||||||||||||
| self.wk = self.indexer.wk | ||||||||||||||||||||||
| self.weights_proj = self.indexer.weights_proj | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| self.wk_weights_proj = self.indexer.wk_weights_proj | ||||||||||||||||||||||
|
Comment on lines
+443
to
+447
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for fused weights appears to be inverted here, which is inconsistent with the logic used in
Suggested change
|
||||||||||||||||||||||
| self.k_norm = self.indexer.k_norm | ||||||||||||||||||||||
| self.cp_size = 1 | ||||||||||||||||||||||
| self.is_rope_neox_style = True | ||||||||||||||||||||||
|
|
@@ -908,7 +913,11 @@ def indexer_select_pre_process( | |||||||||||||||||||||
| cos: torch.Tensor, | ||||||||||||||||||||||
| sin: torch.Tensor, | ||||||||||||||||||||||
| ): | ||||||||||||||||||||||
| k_li, _ = self.wk(x) # [b,s,7168] @ [7168,128] = [b,s,128] | ||||||||||||||||||||||
| if vllm_version_is("0.19.0"): | ||||||||||||||||||||||
| k_li, _ = self.wk(x) # [b,s,7168] @ [7168,128] = [b,s,128] | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| kw, _ = self.wk_weights_proj(x) | ||||||||||||||||||||||
| k_li = kw[:, : self.head_dim] | ||||||||||||||||||||||
|
Comment on lines
+916
to
+920
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for fused weights appears to be inverted. If v0.19.0 has fused weights, it should use
Suggested change
|
||||||||||||||||||||||
| k_li = self.k_norm(k_li).unsqueeze(1) | ||||||||||||||||||||||
| k_li = k_li.view(-1, 1, self.head_dim) | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
|
@@ -953,7 +962,11 @@ def indexer_select_post_process( | |||||||||||||||||||||
| actual_seq_lengths_query: torch.Tensor, | ||||||||||||||||||||||
| actual_seq_lengths_key: torch.Tensor, | ||||||||||||||||||||||
| ): | ||||||||||||||||||||||
| weights, _ = self.weights_proj(x) | ||||||||||||||||||||||
| if vllm_version_is("0.19.0"): | ||||||||||||||||||||||
| weights, _ = self.weights_proj(x) | ||||||||||||||||||||||
| else: | ||||||||||||||||||||||
| kw, _ = self.wk_weights_proj(x) | ||||||||||||||||||||||
| weights = kw[:, self.head_dim :] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| q_li, _ = self.wq_b(q_c) # [b,s,1536] @ [1536,64*128] = [b,s,64*128] | ||||||||||||||||||||||
| q_li = q_li.view(-1, self.n_head, self.head_dim) # [n_toks,64,128] | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Pull Request title and summary do not adhere to the repository style guide. Please update them according to the following suggestions:
Suggested PR Title:
Suggested PR Summary:
References