Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 118 additions & 5 deletions .github/workflows/_e2e_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ on:
contains_310:
required: true
type: boolean
continue_on_error:
required: false
type: boolean
default: false

jobs:
e2e-light:
Expand Down Expand Up @@ -80,7 +84,29 @@ jobs:
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-singlecard-light \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1 \
--auto-upgrade-estimated-times \
--continue-on-error
else
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-singlecard-light \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1
fi


- name: Upload timing data
uses: actions/upload-artifact@v4
if: ${{ inputs.continue_on_error == true }}
with:
name: timing-data-singlecard-light-part${{ matrix.part }}
path: test_timing_data.json
if-no-files-found: warn
retention-days: 5

e2e-full:
name: singlecard-full
Expand Down Expand Up @@ -146,7 +172,28 @@ jobs:
VLLM_WORKER_MULTIPROC_METHOD: spawn
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
run: |
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-singlecard \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 2 \
--auto-upgrade-estimated-times \
--continue-on-error
else
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-singlecard \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 2
fi

- name: Upload timing data
uses: actions/upload-artifact@v4
if: ${{ inputs.continue_on_error == true }}
with:
name: timing-data-singlecard-full-part${{ matrix.part }}
path: test_timing_data.json
if-no-files-found: warn
retention-days: 5

e2e-2-cards-light:
name: multicard-2-light
Expand Down Expand Up @@ -210,7 +257,29 @@ jobs:
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-2card-light \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1 \
--auto-upgrade-estimated-times \
--continue-on-error
else
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-2card-light \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1
fi


- name: Upload timing data
uses: actions/upload-artifact@v4
if: ${{ inputs.continue_on_error == true }}
with:
name: timing-data-2card-light-part${{ matrix.part }}
path: test_timing_data.json
if-no-files-found: warn
retention-days: 5

e2e-2-cards-full:
name: multicard-2-full
Expand Down Expand Up @@ -274,7 +343,29 @@ jobs:
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-multicard-2-cards \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1 \
--auto-upgrade-estimated-times \
--continue-on-error
else
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-multicard-2-cards \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1
fi


- name: Upload timing data
uses: actions/upload-artifact@v4
if: ${{ inputs.continue_on_error == true }}
with:
name: timing-data-2card-full-part${{ matrix.part }}
path: test_timing_data.json
if-no-files-found: warn
retention-days: 5

- name: Run vllm-project/vllm-ascend test (non triton)
if: ${{ inputs.type == 'full' && matrix.part == 0 }}
Expand Down Expand Up @@ -346,7 +437,29 @@ jobs:
env:
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: |
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-multicard-4-cards \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1 \
--auto-upgrade-estimated-times \
--continue-on-error
else
python3 .github/workflows/scripts/run_suite.py \
--suite e2e-multicard-4-cards \
--auto-partition-id "${{ matrix.part }}" \
--auto-partition-size 1
fi


- name: Upload timing data
uses: actions/upload-artifact@v4
if: ${{ inputs.continue_on_error == true }}
with:
name: timing-data-4card-full-part${{ matrix.part }}
path: test_timing_data.json
if-no-files-found: warn
retention-days: 5

e2e_310p:
name: 310p singlecard
Expand Down
111 changes: 111 additions & 0 deletions .github/workflows/schedule_update_estimated_time.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Update estimated test times

on:
schedule:
- cron: '0 2 * * 1' # Every Monday at 02:00 UTC
workflow_dispatch:
pull_request:
branches:
- 'main'
paths:
- '.github/workflows/schedule_update_estimated_time.yaml'

permissions:
contents: write
pull-requests: write

concurrency:
group: update-estimated-times-${{ github.ref }}
cancel-in-progress: true

jobs:
e2e-test:
name: e2e-test
strategy:
matrix:
vllm_version: [15d76f74e2fdb12a95ea00f0ca283acf6219a2b7]
type: [full, light]
uses: ./.github/workflows/_e2e_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:main
contains_310: false
type: ${{ matrix.type }}
continue_on_error: true # Continue even if some tests fail, we want to collect as much timing data as possible

update-estimated-times:
name: Update estimated_time in config.yaml
needs: [e2e-test]
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4

- name: Download all timing artifacts
uses: actions/download-artifact@v4
with:
pattern: timing-data-*
path: timing-artifacts/
merge-multiple: false

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pyyaml

- name: Update config.yaml from timing data
run: |
python3 .github/workflows/scripts/update_estimated_time.py \
--timing-dir timing-artifacts/ \
--config .github/workflows/scripts/config.yaml

- name: Check for changes
id: check_changes
run: |
if git diff --quiet .github/workflows/scripts/config.yaml; then
echo "changed=false" >> "$GITHUB_OUTPUT"
echo "No changes to config.yaml."
else
echo "changed=true" >> "$GITHUB_OUTPUT"
echo "config.yaml has been updated:"
git diff .github/workflows/scripts/config.yaml
fi

- name: Create pull request
if: steps.check_changes.outputs.changed == 'true' && github.event_name != 'pull_request'
env:
GH_TOKEN: ${{ github.token }}
run: |
BRANCH="auto/update-estimated-times-${{ github.run_id }}"
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git checkout -b "$BRANCH"
git add .github/workflows/scripts/config.yaml
git commit -m "[CI] Auto-update estimated test times in config.yaml

Computed from timing-data artifacts of workflow run ${{ github.run_id }}.
Buffer ratio: 1.1x median, rounded to the nearest 10 s."
git push origin "$BRANCH"
gh pr create \
--repo "${{ github.repository }}" \
--base main \
--head "$BRANCH" \
--title "chore: Auto-update estimated test times in config.yaml" \
--body "## Summary

This PR was auto-generated by the **Update estimated test times** workflow.

It updates the \`estimated_time\` values in \`.github/workflows/scripts/config.yaml\`
based on actual elapsed times collected from workflow run \`${{ github.run_id }}\`.

### Methodology
- Timing data is uploaded as \`timing-data-*\` artifacts by each e2e test job.
- For each test file, the **median** of all collected elapsed times is taken.
- A **10 % safety buffer** is applied and the result is rounded to the nearest 10 s.

Please review the diff and merge if the new values look reasonable."
Loading