Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions .github/workflows/_e2e_nightly_single_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ on:
type: string
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11"
tests:
required: true
required: false
type: string
config_file_path:
required: false
type: string
name:
required: false
Expand All @@ -44,12 +47,12 @@ defaults:
# only cancel in-progress runs of the same workflow
# and ignore the lint / 1 card / 4 cards test type
concurrency:
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path || inputs.tests }}
cancel-in-progress: true

jobs:
e2e-nightly:
name: ${{ inputs.tests }}
name: ${{ inputs.name || inputs.config_file_path || inputs.tests }}
runs-on: ${{ inputs.runner }}
timeout-minutes: 600
container:
Expand Down Expand Up @@ -114,14 +117,33 @@ jobs:
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20

- name: Run vllm-project/vllm-ascend test
- name: Validate Inputs
run: |
if [[ -z "${{ inputs.tests }}" && -z "${{ inputs.config_file_path }}" ]]; then
echo "Error: Either 'tests' or 'config_file_path' must be provided."
exit 1
fi

- name: Run Pytest (py-driven)
if: ${{ inputs.tests != '' }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
VLLM_CI_RUNNER: ${{ inputs.runner }}
BENCHMARK_HOME: /vllm-workspace/vllm-ascend/benchmark
working-directory: /vllm-workspace/vllm-ascend
run: |
# ignore test_dispatch_ffn_combine until the test is fixed
pytest -sv ${{ inputs.tests }} \
echo "Running pytest with tests path: ${{ inputs.tests }}"
pytest -sv "${{ inputs.tests }}" \
--ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py

- name: Run Pytest (YAML-driven)
if: ${{ always() && inputs.config_file_path != '' }}
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
VLLM_USE_MODELSCOPE: True
VLLM_CI_RUNNER: ${{ inputs.runner }}
CONFIG_YAML_PATH: ${{ inputs.config_file_path }}
working-directory: /vllm-workspace/vllm-ascend
run: |
echo "Running YAML-driven test with config: ${{ inputs.config_file_path }}"
pytest -sv tests/e2e/nightly/single_node/models/scripts/test_single_node.py
34 changes: 24 additions & 10 deletions .github/workflows/schedule_nightly_test_a2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,6 @@ jobs:
fail-fast: false
matrix:
test_config:
- name: qwen3-next
os: linux-aarch64-a2b3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_next.py
- name: qwen3-32b
os: linux-aarch64-a2b3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b.py
- name: qwen3-32b-in8-a2
os: linux-aarch64-a2b3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py
- name: test_custom_op
os: linux-aarch64-a2b3-1
tests: tests/e2e/nightly/single_node/ops/singlecard_ops
Expand All @@ -71,10 +62,33 @@ jobs:
name: ${{ matrix.test_config.name }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'

single-node-yaml-tests:
name: single-node
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
strategy:
fail-fast: false
matrix:
test_config:
- name: qwen3-32b
os: linux-aarch64-a2b3-4
config_file_path: Qwen3-32B.yaml
- name: qwen3-next-80b-a3b-instruct
os: linux-aarch64-a2b3-4
config_file_path: Qwen3-Next-80B-A3B-Instruct-A2.yaml
- name: qwen3-32b-int8
os: linux-aarch64-a2b3-4
config_file_path: Qwen3-32B-Int8-A2.yaml
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
runner: ${{ matrix.test_config.os }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
config_file_path: ${{ matrix.test_config.config_file_path }}
name: ${{ matrix.test_config.name }}

multi-node-tests:
name: multi-node
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
needs: single-node-tests
needs: [single-node-tests, single-node-yaml-tests]
strategy:
fail-fast: false
max-parallel: 1
Expand Down
100 changes: 55 additions & 45 deletions .github/workflows/schedule_nightly_test_a3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,73 +109,83 @@ jobs:
single-node-tests:
name: single-node
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
needs: multi-node-tests
needs: [multi-node-tests]
strategy:
fail-fast: false
matrix:
test_config:
- name: qwen3-32b-in8-a3
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b_int8.py
- name: qwen3-32b-int8-a3-feature-stack3
- name: qwen3-30b-acc
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_32b_int8_a3_feature_stack3.py
- name: qwen3-235b-a22b-w8a8-eplb
tests: tests/e2e/weekly/single_node/models/test_qwen3_30b_acc.py
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
runner: ${{ matrix.test_config.os }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
tests: ${{ matrix.test_config.tests }}
name: ${{ matrix.test_config.name }}

single-node-yaml-tests:
name: single-node
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
needs: [multi-node-tests]
strategy:
fail-fast: false
matrix:
test_config:
# YAML-driven tests
- name: deepseek-r1-0528-w8a8
os: linux-aarch64-a3-16
config_file_path: DeepSeek-R1-0528-W8A8.yaml
- name: deepseek-r1-w8a8-hbm
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_qwen3_235b_a22b_w8a8_eplb.py
- name: deepseek-r1-w8a8-eplb
config_file_path: DeepSeek-R1-W8A8-HBM.yaml
- name: deepseek-v3-2-w8a8
os: linux-aarch64-a3-16
config_file_path: DeepSeek-V3.2-W8A8.yaml
- name: kimi-k2-thinking
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8_eplb.py
- name: deepseek-r1-w8a8-mtpx
config_file_path: Kimi-K2-Thinking.yaml
- name: mtpx-deepseek-r1-0528-w8a8
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_mtpx_deepseek_r1_0528_w8a8.py
config_file_path: MTPX-DeepSeek-R1-0528-W8A8.yaml
- name: qwen3-235b-a22b-w8a8
os: linux-aarch64-a3-16
config_file_path: Qwen3-235B-A22B-W8A8.yaml
- name: qwen3-30b-a3b-w8a8
os: linux-aarch64-a3-4
config_file_path: Qwen3-30B-A3B-W8A8.yaml
- name: qwen3-next-80b-a3b-instruct-w8a8
os: linux-aarch64-a3-4
config_file_path: Qwen3-Next-80B-A3B-Instruct-W8A8.yaml
- name: qwq-32b
os: linux-aarch64-a3-4
config_file_path: QwQ-32B.yaml
- name: qwen3-32b-int8
os: linux-aarch64-a3-4
config_file_path: Qwen3-32B-Int8.yaml
- name: qwen2-5-vl-7b
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b.py
config_file_path: Qwen2.5-VL-7B-Instruct.yaml
- name: qwen2-5-vl-7b-epd
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_7b_epd.py
config_file_path: Qwen2.5-VL-7B-Instruct-EPD.yaml
- name: qwen2-5-vl-32b
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen2_5_vl_32b.py
config_file_path: Qwen2.5-VL-32B-Instruct.yaml
- name: qwen3-32b-int8-a3-feature-stack3
os: linux-aarch64-a3-4
config_file_path: Qwen3-32B-Int8-A3-Feature-Stack3.yaml
- name: qwen3-32b-int8-prefix-cache
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_prefix_cache_qwen3_32b_int8.py
- name: deepseek-r1-0528-w8a8
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_deepseek_r1_0528_w8a8.py
config_file_path: Prefix-Cache-Qwen3-32B-Int8.yaml
- name: deepseek-r1-0528-w8a8-prefix-cache
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_prefix_cache_deepseek_r1_0528_w8a8.py
- name: qwq-32b-a3
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwq_32b.py
- name: qwen3-30b-w8a8
os: linux-aarch64-a3-2
tests: tests/e2e/nightly/single_node/models/test_qwen3_30b_w8a8.py
- name: qwen3-235b-w8a8
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_qwen3_235b_w8a8.py
- name: qwen3-next-w8a8
os: linux-aarch64-a3-4
tests: tests/e2e/nightly/single_node/models/test_qwen3_next_w8a8.py
- name: kimi-k2-thinking
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_kimi_k2_thinking.py
- name: deepseek-r1-w8a8-hbm
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_deepseek_r1_w8a8_hbm.py
- name: deepseek3_2-w8a8
os: linux-aarch64-a3-16
tests: tests/e2e/nightly/single_node/models/test_deepseek_v3_2_w8a8.py
- name: qwen3-30b-acc
os: linux-aarch64-a3-4
tests: tests/e2e/weekly/single_node/models/test_qwen3_30b_acc.py
config_file_path: Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
runner: ${{ matrix.test_config.os }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
tests: ${{ matrix.test_config.tests }}
config_file_path: ${{ matrix.test_config.config_file_path }}
name: ${{ matrix.test_config.name }}

custom-ops-tests:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# ==========================================
# Shared Configurations
# ==========================================

_envs: &envs
OMP_NUM_THREADS: "10"
OMP_PROC_BIND: "false"
HCCL_BUFFSIZE: "1024"
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
SERVER_PORT: "DEFAULT_PORT"

_server_cmd: &server_cmd
- "--quantization"
- "ascend"
- "--data-parallel-size"
- "2"
- "--tensor-parallel-size"
- "8"
- "--enable-expert-parallel"
- "--port"
- "$SERVER_PORT"
- "--seed"
- "1024"
- "--max-model-len"
- "36864"
- "--max-num-batched-tokens"
- "4096"
- "--max-num-seqs"
- "16"
- "--trust-remote-code"
- "--gpu-memory-utilization"
- "0.9"
- "--speculative-config"
- '{"num_speculative_tokens": 1, "method": "mtp"}'
- "--additional-config"
- '{"enable_weight_nz_layout": true}'

_benchmarks_acc: &benchmarks_acc
acc:
case_type: accuracy
dataset_path: vllm-ascend/gsm8k-lite
request_conf: vllm_api_general_chat
dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
max_out_len: 32768
batch_size: 32
baseline: 95
threshold: 5

_benchmarks_perf: &benchmarks_perf
perf:
case_type: performance
dataset_path: vllm-ascend/GSM8K-in3500-bs400
request_conf: vllm_api_stream_chat
dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
num_prompts: 400
max_out_len: 1500
batch_size: 1000
baseline: 1
threshold: 0.97

# ==========================================
# ACTUAL TEST CASES
# ==========================================

test_cases:
- name: "DeepSeek-R1-0528-W8A8-single"
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
envs:
<<: *envs
server_cmd: *server_cmd
server_cmd_extra:
- "--enforce-eager"
benchmarks:

- name: "DeepSeek-R1-0528-W8A8-aclgraph"
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
envs:
<<: *envs
server_cmd: *server_cmd
benchmarks:
<<: *benchmarks_acc
<<: *benchmarks_perf

- name: "DeepSeek-R1-0528-W8A8-EPLB"
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
envs:
<<: *envs
DYNAMIC_EPLB: "true"
server_cmd: *server_cmd
server_cmd_extra:
- "--additional-config"
- '{"enable_weight_nz_layout": true, "eplb_config": {"dynamic_eplb": "true", "expert_heat_collection_interval": 1000, "algorithm_execution_interval": 50, "eplb_policy_type": 3}}'
benchmarks:
<<: *benchmarks_acc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# ==========================================
# ACTUAL TEST CASES
# ==========================================

test_cases:
- name: "DeepSeek-R1-W8A8-HBM-single"
model: "vllm-ascend/DeepSeek-R1-W8A8"
envs:
HCCL_BUFFSIZE: "1024"
SERVER_PORT: "DEFAULT_PORT"
server_cmd:
- "--quantization"
- "ascend"
- "--port"
- "$SERVER_PORT"
- "--data-parallel-size"
- "8"
- "--data-parallel-size-local"
- "8"
- "--data-parallel-rpc-port"
- "13389"
- "--tensor-parallel-size"
- "2"
- "--enable-expert-parallel"
- "--seed"
- "1024"
- "--max-num-seqs"
- "32"
- "--max-model-len"
- "6000"
- "--max-num-batched-tokens"
- "6000"
- "--trust-remote-code"
- "--gpu-memory-utilization"
- "0.92"
- "--no-enable-prefix-caching"
- "--reasoning-parser"
- "deepseek_r1"
- "--enforce-eager"
- "--additional-config"
- '{"ascend_scheduler_config": {"enabled": false}, "torchair_graph_config": {"enabled": false, "enable_multistream_shared_expert": false}}'
benchmarks:
Loading