Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ on:
continue_on_error:
required: false
type: boolean
default: false
default: true
# The following inputs are used by comment-triggered E2E tests (/e2e <tests>).
# They carry space-separated pytest paths, categorized by runner type.
# Leave empty (default) when running label-triggered full/light suites.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dockerfiles/Dockerfile.lint
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN apt-get update -y && \

ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
# For lint purpose, actually we need make a main2main matching.
ARG VLLM_COMMIT=d886c26d4d4fef7d079696beb4ece1cfb4b008a8
ARG VLLM_COMMIT=856b15c62c8a574a1a0a289444d5b9a8120433e3
RUN git init /vllm-workspace/vllm && \
git -C /vllm-workspace/vllm fetch --depth 1 $VLLM_REPO $VLLM_COMMIT && \
git -C /vllm-workspace/vllm checkout FETCH_HEAD
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_test_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
vllm_version: [856b15c62c8a574a1a0a289444d5b9a8120433e3, v0.19.1]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
uses: ./.github/workflows/_e2e_test.yaml
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr_test_light.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
lint:
uses: ./.github/workflows/_pre_commit.yml
with:
vllm: d886c26d4d4fef7d079696beb4ece1cfb4b008a8
vllm: 856b15c62c8a574a1a0a289444d5b9a8120433e3
changes:
runs-on: linux-aarch64-a2b3-0
container:
Expand Down Expand Up @@ -154,7 +154,7 @@ jobs:
if: ${{ needs.lint.result == 'success' && needs.changes.outputs.has_tests == 'true' }}
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
vllm_version: [856b15c62c8a574a1a0a289444d5b9a8120433e3, v0.19.1]
uses: ./.github/workflows/_optional_smart_e2e.yaml
with:
vllm: ${{ matrix.vllm_version }}
Expand All @@ -164,7 +164,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [d886c26d4d4fef7d079696beb4ece1cfb4b008a8, v0.19.1]
vllm_version: [856b15c62c8a574a1a0a289444d5b9a8120433e3, v0.19.1]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.
Expand Down
2 changes: 1 addition & 1 deletion csrc/third_party/catlass
Submodule catlass updated from b50cad to 716fd7
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
# CANN image tag
"cann_image_tag": "8.5.1-910b-ubuntu22.04-py3.11",
# vLLM commit hash for main branch
"main_vllm_commit": "d886c26d4d4fef7d079696beb4ece1cfb4b008a8",
"main_vllm_commit": "856b15c62c8a574a1a0a289444d5b9a8120433e3",
# vLLM tag for main branch
"main_vllm_tag": "v0.19.1",
# Python version for main branch
Expand Down
19 changes: 6 additions & 13 deletions vllm_ascend/_310p/fused_moe/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,11 @@
from vllm.distributed import get_dp_group, get_ep_group, get_tp_group
from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig
from vllm.model_executor.layers.fused_moe.layer import FusedMoE, UnquantizedFusedMoEMethod
from vllm.model_executor.layers.fused_moe.shared_fused_moe import SharedFusedMoE

from vllm_ascend.ascend_forward_context import _EXTRA_CTX, MoECommType
from vllm_ascend.ops.fused_moe.experts_selector import zero_experts_compute
from vllm_ascend.ops.fused_moe.moe_comm_method import FusedExpertsResult, _MoECommMethods
from vllm_ascend.ops.fused_moe.moe_runtime_args import build_fused_experts_input
from vllm_ascend.quantization.quant_type import QuantType
from vllm_ascend.utils import vllm_version_is

from .experts_selector import select_experts
from .moe_comm_method import AllGatherCommImpl310
Expand Down Expand Up @@ -164,16 +161,14 @@ def __init__(self, *args, **kwargs):

from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner

is_legacy = vllm_version_is("0.19.1")
self.runner = AscendMoERunner(
self if is_legacy else self.layer_name,
self.layer_name,
self.moe_config,
self.router,
self._routed_input_transform,
self.gate if is_legacy else kwargs.pop("gate", None),
self.shared_experts if is_legacy else kwargs.pop("shared_experts", None),
self.runner.routed_input_transform,
kwargs.pop("gate", None),
kwargs.pop("shared_experts", None),
self.quant_method,
self.reduce_results,
self.vllm_config.parallel_config.enable_dbo,
)

Expand Down Expand Up @@ -263,7 +258,7 @@ def forward_impl( # type: ignore[override]
return routed_out


class AscendSharedFusedMoE310(SharedFusedMoE, AscendFusedMoE310):
class AscendSharedFusedMoE310(AscendFusedMoE310):
def __init__(
self,
shared_experts: torch.nn.Module,
Expand All @@ -285,16 +280,14 @@ def __init__(
# which at this point is still the stale runner built with shared_experts=None.
from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner

is_legacy = vllm_version_is("0.19.1")
self.runner = AscendMoERunner(
self if is_legacy else self.layer_name,
self.layer_name,
self.moe_config,
self.router,
self._routed_input_transform,
self._gate,
self._shared_experts,
self.quant_method,
self.reduce_results,
self.vllm_config.parallel_config.enable_dbo,
)

Expand Down
Loading
Loading