Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ on:
continue_on_error:
required: false
type: boolean
default: false
default: true
# The following inputs are used by comment-triggered E2E tests (/e2e <tests>).
# They carry space-separated pytest paths, categorized by runner type.
# Leave empty (default) when running label-triggered full/light suites.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dockerfiles/Dockerfile.lint
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN apt-get update -y && \

ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# For lint purpose, actually we need make a main2main matching.
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm_version: [c7aa186d67b6f051680831418e957c67f34ba7a2, v0.20.1]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
uses: ./.github/workflows/_e2e_test.yaml
Expand All @@ -102,7 +102,7 @@ jobs:
strategy:
fail-fast: false
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm_version: [v0.20.1]
needs: [parse-trigger]
if: ${{ needs.parse-trigger.outputs.allowed == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr_test_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
lint:
uses: ./.github/workflows/_pre_commit.yml
with:
vllm: d886c26d4d4fef7d079696beb4ece1cfb4b008a8
vllm: c7aa186d67b6f051680831418e957c67f34ba7a2
changes:
runs-on: linux-aarch64-a2b3-0
container:
Expand Down Expand Up @@ -154,7 +154,7 @@ jobs:
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.has_tests == 'true' }}
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm_version: [c7aa186d67b6f051680831418e957c67f34ba7a2, v0.20.1]
uses: ./.github/workflows/_optional_smart_e2e.yaml
with:
vllm: ${{ matrix.vllm_version }}
Expand All @@ -164,7 +164,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm_version: [c7aa186d67b6f051680831418e957c67f34ba7a2, v0.20.1]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/schedule_update_estimated_time.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
name: e2e-test
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm_version: [v0.20.1]
type: [full, light]
uses: ./.github/workflows/_e2e_test.yaml
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/schedule_vllm_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
fail-fast: false
matrix:
part: [0, 1, 2, 3]
vllm: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8]
vllm: [v0.20.1]
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11
env:
Expand Down
52 changes: 44 additions & 8 deletions .github/workflows/scripts/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ e2e-singlecard:
estimated_time: 222
- name: tests/e2e/singlecard/test_qwen3_multi_loras.py
estimated_time: 100
- name: tests/e2e/singlecard/test_models.py
estimated_time: 315
- name: tests/e2e/singlecard/test_models.py::test_minicpm
estimated_time: 158
- name: tests/e2e/singlecard/test_models.py::test_whisper
estimated_time: 157
is_skipped: true
- name: tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
estimated_time: 253
- name: tests/e2e/singlecard/test_quantization.py
Expand Down Expand Up @@ -112,6 +115,7 @@ e2e-multicard-2-cards:
estimated_time: 178
- name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2
estimated_time: 127
is_skipped: true
- name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2
estimated_time: 149
- name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2
Expand All @@ -130,8 +134,17 @@ e2e-multicard-2-cards:
estimated_time: 400
- name: tests/e2e/multicard/2-cards/test_quantization.py
estimated_time: 482
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py
estimated_time: 974
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep
estimated_time: 195
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_w8a8_distributed_tp2
estimated_time: 195
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_distributed_aiv_tp2
estimated_time: 195
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_distributed_tp2_ep2_mrv2
estimated_time: 195
is_skipped: true
- name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_w8a8_distributed_tp2_ep_dynamic_eplb
estimated_time: 194
- name: tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py
estimated_time: 193
- name: tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
Expand All @@ -151,12 +164,35 @@ e2e-multicard-4-cards:
estimated_time: 322
- name: tests/e2e/multicard/4-cards/test_kimi_k2.py
estimated_time: 37
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
estimated_time: 1287
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py::test_models_long_sequence_output_between_tp_and_cp
estimated_time: 257
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py::test_accuracy_dcp_only_graph
estimated_time: 257
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py::test_accuracy_dcp_only_eager
estimated_time: 257
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py::test_accuracy_pcp_only
estimated_time: 257
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py::test_models_long_sequence_cp_kv_interleave_size_output_between_tp_and_cp
estimated_time: 259
- name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py
estimated_time: 2179
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py
estimated_time: 1173
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py::test_models_chunked_prefill_mixed_length_prompts_including_1_token
estimated_time: 235
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py::test_models_chunked_prefill_with_empty_kvcache
estimated_time: 235
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py::test_models_chunked_prefill_with_cp_basic
estimated_time: 235
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py::test_models_chunked_prefill_with_cp_piecewise
estimated_time: 235
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py::test_models_chunked_prefill_with_cp_full_graph
estimated_time: 233
is_skipped: true
- name: tests/e2e/multicard/4-cards/long_sequence/test_prefix_caching_cp.py
estimated_time: 850
- name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.310p.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.a3.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} && \
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# ARG VLLM_TAG=v0.19.1
# RUN git clone --depth 1 -b $VLLM_TAG $VLLM_REPO /vllm-workspace/vllm
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=v0.20.1
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
# CANN image tag
"cann_image_tag": "8.5.1-910b-ubuntu22.04-py3.11",
# vLLM commit hash for main branch
"main_vllm_commit": "d886c26d4d4fef7d079696beb4ece1cfb4b008a8",
"main_vllm_commit": "c7aa186d67b6f051680831418e957c67f34ba7a2",
# vLLM tag for main branch
"main_vllm_tag": "v0.19.1",
"main_vllm_tag": "v0.20.1",
# Python version for main branch
"main_python_version": ">= 3.10, < 3.12",
# CANN version for main branch
Expand Down
9 changes: 4 additions & 5 deletions tests/ut/_310p/fused_moe/test_shared_fused_moe_310.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from vllm_ascend._310p.fused_moe.fused_moe import (
AscendFusedMoE310,
AscendSharedFusedMoE310,
)


Expand Down Expand Up @@ -48,8 +47,8 @@ def forward(self, hidden_states: torch.Tensor):
return out


def _build_layer(shared_experts: torch.nn.Module | None) -> AscendSharedFusedMoE310:
layer = AscendSharedFusedMoE310.__new__(AscendSharedFusedMoE310)
def _build_layer(shared_experts: torch.nn.Module | None) -> AscendFusedMoE310:
layer = AscendFusedMoE310.__new__(AscendFusedMoE310)
# The test bypasses full layer init with __new__, so we must initialize
# nn.Module internals before assigning child modules.
torch.nn.Module.__init__(layer)
Expand Down Expand Up @@ -80,7 +79,7 @@ def test_forward_impl_with_shared_experts_returns_tuple_310():
routed_out = torch.randn(3, 8)

with patch.object(AscendFusedMoE310, "forward_impl", return_value=routed_out):
shared_out, routed = layer.forward_impl(hidden_states, router_logits)
shared_out, routed = layer.shared_forward_impl(hidden_states, router_logits)

expected_shared = 0.5 * (hidden_states * 2.0 + 1.0)
torch.testing.assert_close(shared_out, expected_shared)
Expand All @@ -100,7 +99,7 @@ def test_forward_impl_without_shared_experts_returns_routed_only_310():
routed_out = torch.randn(3, 8)

with patch.object(AscendFusedMoE310, "forward_impl", return_value=routed_out):
output = layer.forward_impl(hidden_states, router_logits)
output = layer.shared_forward_impl(hidden_states, router_logits)

torch.testing.assert_close(output, routed_out)

Expand Down
6 changes: 6 additions & 0 deletions tests/ut/ops/test_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,12 @@ def moe_method(mock_dist_env):
return AscendUnquantizedFusedMoEMethod(moe)


def test_ascend_unquantized_skips_upstream_modular_kernel_init():
method = AscendUnquantizedFusedMoEMethod.maybe_make_prepare_finalize

assert method(object()) is None


class Device(TypedDict):
device_id: int
device_expert: list[int]
Expand Down
Loading
Loading