diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 920818864ec..4d92d5f6033 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -201,7 +201,7 @@ jobs: max-parallel: 2 matrix: os: [linux-arm64-npu-1] - vllm_version: [main, v0.9.1] + vllm_version: [v0.9.1] name: singlecard e2e test runs-on: ${{ matrix.os }} container: @@ -257,23 +257,7 @@ jobs: VLLM_WORKER_MULTIPROC_METHOD: spawn VLLM_USE_MODELSCOPE: True run: | - pytest -sv tests/e2e/singlecard/test_offline_inference.py - pytest -sv tests/e2e/singlecard/test_ilama_lora.py - pytest -sv tests/e2e/singlecard/test_guided_decoding.py - pytest -sv tests/e2e/singlecard/test_camem.py - pytest -sv tests/e2e/singlecard/test_embedding.py - pytest -sv tests/e2e/singlecard/ \ - --ignore=tests/e2e/singlecard/test_offline_inference.py \ - --ignore=tests/e2e/singlecard/test_ilama_lora.py \ - --ignore=tests/e2e/singlecard/test_guided_decoding.py \ - --ignore=tests/e2e/singlecard/test_camem.py \ - --ignore=tests/e2e/singlecard/test_embedding.py \ - --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py \ - --ignore=tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py - # ------------------------------------ v1 spec decode test ------------------------------------ # - VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py - # TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed - VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py + pytest -sv tests/e2e/singlecard/test_aclgraph.py - name: Run e2e test on V0 engine if: ${{ github.event_name == 'schedule' }} diff --git a/tests/e2e/singlecard/test_aclgraph.py b/tests/e2e/singlecard/test_aclgraph.py index e0bfb65cf88..4fc23aa7b39 100644 --- a/tests/e2e/singlecard/test_aclgraph.py +++ b/tests/e2e/singlecard/test_aclgraph.py @@ -29,7 +29,7 @@ from tests.conftest import VllmRunner from tests.model_utils import check_outputs_equal -MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"] +MODELS = ["Qwen/Qwen2.5-0.5B-Instruct", "vllm-ascend/Qwen3-30B-A3B-Puring"] @pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",