-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[Main2Main] Upgrade vllm commit to 0120 #6040
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1626bef
fc4c4f1
870faea
2cf3c88
381e951
383e461
ada2931
5aacf30
abaa10c
4f2edf4
302a1d1
f9d6bed
61d65c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,6 @@ | |
| from vllm.model_executor.layers.linear import UnquantizedLinearMethod | ||
| from vllm.triton_utils import HAS_TRITON | ||
| from vllm.v1.attention.backend import AttentionBackend, AttentionCGSupport, MLAAttentionImpl # type: ignore | ||
| from vllm.v1.attention.backends.mla.common import MLACommonMetadataBuilder | ||
| from vllm.v1.kv_cache_interface import AttentionSpec | ||
|
|
||
| from vllm_ascend import envs | ||
|
|
@@ -46,11 +45,17 @@ | |
| enable_dsa_cp, | ||
| enable_dsa_cp_with_layer_shard, | ||
| maybe_trans_nz, | ||
| vllm_version_is, | ||
| ) | ||
| from vllm_ascend.worker.npu_input_batch import NPUInputBatch | ||
|
|
||
| if TYPE_CHECKING: | ||
| from vllm.v1.core.sched.output import SchedulerOutput | ||
| if vllm_version_is("0.14.1"): | ||
| from vllm.v1.attention.backends.mla.common import MLACommonMetadataBuilder # type: ignore | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to |
||
| else: | ||
| from vllm.model_executor.layers.attention.mla_attention import MLACommonMetadataBuilder | ||
| # isort: on | ||
|
|
||
| # token count limits within bmm_transpose operator | ||
| BMM_TRANS_MAX_SUPPORTED_TOKENS = 1024 | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The use of
# type: ignorecan mask actual type issues that might arise from API changes in vLLM 0.13.0. It is best practice to resolve type incompatibilities explicitly or provide a more specific reason for ignoring if it's a known, harmless difference, to prevent potential runtime errors.