Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/format_pr_body.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

- name: Get vLLM version
run: |
VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
VLLM_COMMIT=2918c1b49c88c29783c86f78d2c4221cb9622379
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV

- name: Checkout repository
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/vllm_ascend_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: 83f478bb19489b41e9d208b47b4bb5a95ac171ac
vllm: 2918c1b49c88c29783c86f78d2c4221cb9622379
changes:
runs-on: ubuntu-latest
outputs:
Expand Down Expand Up @@ -83,7 +83,7 @@ jobs:
VLLM_USE_MODELSCOPE: True
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
steps:
- name: Install packages
run: |
Expand Down Expand Up @@ -138,7 +138,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/vllm_ascend_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [83f478bb19489b41e9d208b47b4bb5a95ac171ac, v0.11.0]
vllm_version: [2918c1b49c88c29783c86f78d2c4221cb9622379, v0.11.0]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml
Expand Down
2 changes: 1 addition & 1 deletion docs/source/community/versioning_policy.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The table below is the release compatibility matrix for vLLM Ascend release.
For main branch of vLLM Ascend, we usually make it compatible with the latest vLLM release and a newer commit hash of vLLM. Please note that this table is usually updated. Please check it regularly.
| vLLM Ascend | vLLM | Python | Stable CANN | PyTorch/torch_npu |
|-------------|--------------|------------------|-------------|--------------------|
| main | v0.11.0/83f478bb19489b41e9d208b47b4bb5a95ac171ac | >= 3.10, < 3.12 | 8.3.RC1 | 2.7.1 / 2.7.1 |
| main | v0.11.0/2918c1b49c88c29783c86f78d2c4221cb9622379 | >= 3.10, < 3.12 | 8.3.RC1 | 2.7.1 / 2.7.1 |

## Release cadence

Expand Down
9 changes: 6 additions & 3 deletions tests/ut/worker/test_worker_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from tests.ut.base import TestBase
from vllm_ascend.utils import vllm_version_is

init_cached_hf_modules_path = "vllm.utils.init_cached_hf_modules" if vllm_version_is(
"0.11.0") else "vllm.utils.import_utils.init_cached_hf_modules"


class TestNPUWorker(TestBase):

Expand Down Expand Up @@ -53,7 +56,7 @@ def setUp(self):
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_normal_case(
self,
Expand Down Expand Up @@ -115,7 +118,7 @@ def test_init_npu_worker_normal_case(
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_trust_remote_code(
self,
Expand Down Expand Up @@ -160,7 +163,7 @@ def test_init_npu_worker_with_trust_remote_code(
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_soc_version")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch("vllm.utils.init_cached_hf_modules")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_custom_cache_dtype(
self,
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/attention/attention_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@
get_decode_context_model_parallel_rank,
get_decode_context_model_parallel_world_size)
from vllm.forward_context import ForwardContext, get_forward_context
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv
Comment thread
wangxiyuan marked this conversation as resolved.

from vllm.v1.attention.backends.utils import AttentionCGSupport
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.kv_cache_interface import AttentionSpec
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/attention/mla_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@
from vllm.logger import logger
from vllm.model_executor.layers.linear import (LinearBase,
UnquantizedLinearMethod)
from vllm.utils import cdiv, round_down

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv, round_down
else:
from vllm.utils.math_utils import cdiv, round_down
Comment thread
wangxiyuan marked this conversation as resolved.

from vllm.v1.attention.backends.utils import AttentionCGSupport

from vllm_ascend import envs
Expand Down
9 changes: 8 additions & 1 deletion vllm_ascend/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@
from vllm.distributed.kv_events import KVEventBatch
from vllm.logger import logger
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.utils import cdiv

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv
Comment thread
wangxiyuan marked this conversation as resolved.

from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.core.sched.output import NewRequestData, SchedulerOutput
from vllm.v1.core.sched.scheduler import Scheduler
Expand Down
10 changes: 9 additions & 1 deletion vllm_ascend/distributed/mooncake/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@
import torch
from vllm.distributed.kv_transfer.kv_connector.v1.base import \
KVConnectorMetadata
from vllm.utils import cdiv, logger
from vllm.utils import logger

from vllm_ascend.utils import vllm_version_is

if vllm_version_is("0.11.0"):
from vllm.utils import cdiv
else:
from vllm.utils.math_utils import cdiv
Comment thread
wangxiyuan marked this conversation as resolved.

from vllm.v1.core.sched.output import NewRequestData

DEFAULT_GLOBAL_SEGMENT_SIZE = 3355443200 # 3.125 GiB
Expand Down
15 changes: 13 additions & 2 deletions vllm_ascend/models/qwen2_5_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from vllm.model_executor.models.utils import maybe_prefix
from vllm.multimodal import MULTIMODAL_REGISTRY

from vllm_ascend.ascend_forward_context import set_ascend_forward_context
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, is_enable_nz,
vllm_version_is)

Expand Down Expand Up @@ -536,7 +537,11 @@ def _process_image_input(self, image_input) -> tuple[torch.Tensor, ...]:
image_embeds = image_input["image_embeds"].type(self.visual.dtype)
else:
pixel_values = image_input["pixel_values"].type(self.visual.dtype)
image_embeds = self.visual(pixel_values, grid_thw=grid_thw)
if vllm_version_is("0.11.0"):
image_embeds = self.visual(pixel_values, grid_thw=grid_thw)
else:
with set_ascend_forward_context(None, self.vllm_config):
image_embeds = self.visual(pixel_values, grid_thw=grid_thw)

# Split concatenated embeddings for each image item.
merge_size = self.visual.spatial_merge_size
Expand All @@ -553,7 +558,13 @@ def _process_video_input(self, video_input) -> tuple[torch.Tensor, ...]:
else:
pixel_values_videos = video_input["pixel_values_videos"].type(
self.visual.dtype)
video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw)
if vllm_version_is("0.11.0"):
video_embeds = self.visual(pixel_values_videos,
grid_thw=grid_thw)
else:
with set_ascend_forward_context(None, self.vllm_config):
video_embeds = self.visual(pixel_values_videos,
grid_thw=grid_thw)

# Split concatenated embeddings for each video item.
merge_size = self.visual.spatial_merge_size
Expand Down
Loading
Loading