vllm-project · wangxiyuan · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026
@@ -69,35 +69,12 @@ jobs:
     # This is the runner with no NPU for k8s controller
     runs-on: ${{ inputs.runner }}
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
+      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu
       env:
         KUBECONFIG: /tmp/kubeconfig
-        KUBECTL: /root/.cache/.kube/kubectl
         NAMESPACE: vllm-project
         LEADER_POD: vllm-0
-        RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
     steps:
-        - name: Install system denpendencies
-          run: |
-           # configure apt and pip source
-           sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
-           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
-           pip install jinja2-cli
-
-        - name: Install kubectl
-          run: |
-            # Install kubectl
-            arch=$(uname -m)
-
-            if echo "$arch" | grep -qiE "arm|aarch64"; then
-              echo "Detected ARM architecture: $arch"
-              KUBECTL="$KUBECTL"_arm
-            fi
-            install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
-
-            # Verify kubectl installation
-            kubectl version --client=true
-
         - name: Decode kubeconfig from secrets
           run: |
             # Decode and save kubeconfig
@@ -110,8 +87,6 @@ jobs:
           run: |
             # prepare for lws entrypoint scripts
             install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
-            # clear log directory
-            rm -fr $RESULT_FILE
 
         - name: Clear resources
           run: |
@@ -157,10 +132,6 @@ jobs:
             replicas="${{ inputs.replicas }}"
             image="${{ inputs.image }}"
             config_file_path="${{ inputs.config_file_path }}"
-            vllm_version="${{ inputs.vllm_version }}"
-            vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
-            vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
-            result_file_path="$RESULT_FILE"
             fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
             echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
 
@@ -174,19 +145,17 @@ jobs:
 
             if [ "${{ inputs.soc_version }}" = "a3" ]; then
               npu_per_node=16
+              TEMPLATE_FILE="tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2"
             else
               npu_per_node=8
+              TEMPLATE_FILE="tests/e2e/nightly/multi_node/scripts/lws-a2.yaml.jinja2"
             fi
 
-            jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
+            jinja2 $TEMPLATE_FILE \
               -D size="$size" \
               -D replicas="$replicas" \
               -D image="$image" \
               -D config_file_path="$config_file_path" \
-              -D vllm_version="$vllm_version" \
-              -D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
-              -D vllm_ascend_ref="$vllm_ascend_ref" \
-              -D result_file_path="$result_file_path" \
               -D npu_per_node="$npu_per_node" \
               -D fail_tag="$fail_tag" \
               --outfile lws.yaml

@@ -93,13 +93,13 @@ jobs:
     uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
     with:
       soc_version: a2
-      runner: linux-aarch64-a2-0
+      runner: linux-amd64-cpu-8-hk
       image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
       replicas: 1
       size: ${{ matrix.test_config.size }}
       config_file_path: ${{ matrix.test_config.config_file_path }}
     secrets:
-      KUBECONFIG_B64: ${{ secrets.KUBECONFIG_A2_B64 }}
+      KUBECONFIG_B64: ${{ secrets.KUBECONFIG_HK_001_INTERNAL_B64 }}
 
   single-node-accuracy-tests:
     if: >-

@@ -0,0 +1,138 @@
+apiVersion: leaderworkerset.x-k8s.io/v1
+kind: LeaderWorkerSet
+metadata:
+  name: vllm
+  namespace: vllm-project
+spec:
+  replicas: {{ replicas | default(1) }}
+  leaderWorkerTemplate:
+    size: {{ size | default(2) }}
+    restartPolicy: None
+    leaderTemplate:
+      metadata:
+        labels:
+          role: leader
+      spec:
+        schedulerName: volcano
+        tolerations:
+          - key: "instance"
+            operator: "Equal"
+            value: "vllm"
+            effect: "NoSchedule"
+        containers:
+          - name: vllm-leader
+            imagePullPolicy: Always
+            image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2") }}
+            env:
+              - name: CONFIG_YAML_PATH
+                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
+              - name: WORKSPACE
+                value: "/vllm-workspace"
+              - name: FAIL_TAG
+                value: {{ fail_tag | default("FAIL_TAG") }}
+            command:
+              - sh
+              - -c
+              - |
+                bash /root/.cache/tests/run.sh
+            resources:
+              limits:
+                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
+                memory: 512Gi
+                ephemeral-storage: 100Gi
+              requests:
+                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
+                ephemeral-storage: 100Gi
+                cpu: 125
+            ports:
+              - containerPort: 8080
+            # readinessProbe:
+            #   tcpSocket:
+            #     port: 8080
+            #   initialDelaySeconds: 15
+            #   periodSeconds: 10
+            volumeMounts:
+              - mountPath: /root/.cache
+                name: shared-volume
+              - mountPath: /usr/local/Ascend/driver/tools
+                name: driver-tools
+              - mountPath: /dev/shm
+                name: dshm
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: 15Gi
+        - name: shared-volume
+          persistentVolumeClaim:
+            claimName: vllm-project-hk001
+        - name: driver-tools
+          hostPath:
+            path: /usr/local/Ascend/driver/tools
+    workerTemplate:
+      spec:
+        schedulerName: volcano
+        tolerations:
+          - key: "instance"
+            operator: "Equal"
+            value: "vllm"
+            effect: "NoSchedule"
+        containers:
+          - name: vllm-worker
+            imagePullPolicy: Always
+            image: {{ image | default("swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2") }}
+            env:
+              - name: CONFIG_YAML_PATH
+                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
+              - name: WORKSPACE
+                value: "/vllm-workspace"
+              - name: FAIL_TAG
+                value: {{ fail_tag | default("FAIL_TAG") }}
+            command:
+              - sh
+              - -c
+              - |
+                bash /root/.cache/tests/run.sh
+            resources:
+              limits:
+                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
+                memory: 512Gi
+                ephemeral-storage: 100Gi
+              requests:
+                huawei.com/ascend-1980: {{ npu_per_node | default("16") }}
+                ephemeral-storage: 100Gi
+                cpu: 125
+            volumeMounts:
+              - mountPath: /root/.cache
+                name: shared-volume
+              - mountPath: /usr/local/Ascend/driver/tools
+                name: driver-tools
+              - mountPath: /dev/shm
+                name: dshm
+        volumes:
+        - name: dshm
+          emptyDir:
+            medium: Memory
+            sizeLimit: 15Gi
+        - name: shared-volume
+          persistentVolumeClaim:
+            claimName: vllm-project-hk001
+        - name: driver-tools
+          hostPath:
+            path: /usr/local/Ascend/driver/tools
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vllm-leader
+  namespace: vllm-project
+spec:
+  ports:
+    - name: http
+      port: 8080
+      protocol: TCP
+      targetPort: 8080
+  selector:
+    leaderworkerset.sigs.k8s.io/name: vllm
+    role: leader
+  type: ClusterIP
@@ -22,13 +22,6 @@ spec:
                 value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
               - name: WORKSPACE
                 value: "/vllm-workspace"
-              # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
-              - name: VLLM_ASCEND_VERSION
-                value: {{ vllm_ascend_ref | default("main") }}
-              - name: VLLM_ASCEND_REMOTE_URL
-                value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
-              - name: RESULT_FILE_PATH
-                value: {{ result_file_path | default("/root/.cache/tests/ret") }}
               - name: FAIL_TAG
                 value: {{ fail_tag | default("FAIL_TAG") }}
             command:
@@ -81,13 +74,6 @@ spec:
                 value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
               - name: WORKSPACE
                 value: "/vllm-workspace"
-              # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
-              - name: VLLM_ASCEND_VERSION
-                value: {{ vllm_ascend_ref | default("main") }}
-              - name: VLLM_ASCEND_REMOTE_URL
-                value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
-              - name: RESULT_FILE_PATH
-                value: {{ result_file_path | default("/root/.cache/tests/ret") }}
               - name: FAIL_TAG
                 value: {{ fail_tag | default("FAIL_TAG") }}
             command:

@@ -167,8 +167,8 @@ run_tests_with_log() {
         if [ $ret -eq 0 ]; then
             print_success "All tests passed!"
         else
-            print_failure "Some tests failed, please check the error stack above for details.\
-            If this is insufficient to pinpoint the error, please download and review the logs of all other nodes from the job's summary."
+            print_failure "Some tests failed, please check the error stack above for details. \
+If this is insufficient to pinpoint the error, please download and review the logs of all other nodes from the job's summary."
         fi
     fi
 }