Skip to content

Commit 2b81d5f

Browse files
committed
Merge branch 'main' into copilot/fix-c6914add-1b66-46d0-9948-c2e7b6f2259f
# Conflicts: # tests/compile/piecewise/test_multiple_graphs.py
2 parents dd72729 + 8ef6b8a commit 2b81d5f

File tree

198 files changed

+6428
-6785
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

198 files changed

+6428
-6785
lines changed

.buildkite/generate_index.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
<html>
99
<body>
1010
<h1>Links for vLLM</h1/>
11-
<a href="../{wheel_html_escaped}">{wheel}</a><br/>
11+
<a href="../{x86_wheel_html_escaped}">{x86_wheel}</a><br/>
12+
<a href="../{arm_wheel_html_escaped}">{arm_wheel}</a><br/>
1213
</body>
1314
</html>
1415
"""
@@ -21,7 +22,25 @@
2122

2223
with open("index.html", "w") as f:
2324
print(f"Generated index.html for {args.wheel}")
25+
# sync the abi tag with .buildkite/scripts/upload-wheels.sh
26+
if "x86_64" in filename:
27+
x86_wheel = filename
28+
arm_wheel = filename.replace("x86_64", "aarch64").replace(
29+
"manylinux1", "manylinux2014"
30+
)
31+
elif "aarch64" in filename:
32+
x86_wheel = filename.replace("aarch64", "x86_64").replace(
33+
"manylinux2014", "manylinux1"
34+
)
35+
arm_wheel = filename
36+
else:
37+
raise ValueError(f"Unsupported wheel: {filename}")
2438
# cloudfront requires escaping the '+' character
2539
f.write(
26-
template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
40+
template.format(
41+
x86_wheel=x86_wheel,
42+
x86_wheel_html_escaped=x86_wheel.replace("+", "%2B"),
43+
arm_wheel=arm_wheel,
44+
arm_wheel_html_escaped=arm_wheel.replace("+", "%2B"),
45+
)
2746
)

.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml

Lines changed: 0 additions & 12 deletions
This file was deleted.

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6-
Meta-Llama-3-8B-QQQ.yaml

.buildkite/release-pipeline.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,12 @@ steps:
2727
env:
2828
DOCKER_BUILDKIT: "1"
2929

30+
- block: "Build CUDA 12.6 wheel"
31+
key: block-build-cu126-wheel
32+
depends_on: ~
33+
3034
- label: "Build wheel - CUDA 12.6"
35+
depends_on: block-build-cu126-wheel
3136
id: build-wheel-cuda-12-6
3237
agents:
3338
queue: cpu_queue_postmerge

.buildkite/scripts/hardware_ci/run-cpu-test.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ function cpu_tests() {
4646
set -e
4747
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
4848

49+
# Run kernel tests
50+
docker exec cpu-test-"$NUMA_NODE" bash -c "
51+
set -e
52+
pytest -v -s tests/kernels/test_onednn.py"
53+
4954
# Run basic model test
5055
docker exec cpu-test-"$NUMA_NODE" bash -c "
5156
set -e
@@ -99,4 +104,4 @@ function cpu_tests() {
99104

100105
# All of CPU tests are expected to be finished less than 40 mins.
101106
export -f cpu_tests
102-
timeout 1.5h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
107+
timeout 2h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"

.buildkite/scripts/tpu/cleanup_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ if [ "$disk_usage" -gt "$threshold" ]; then
1717
# Remove dangling images (those that are not tagged and not used by any container)
1818
docker image prune -f
1919
# Remove unused volumes / force the system prune for old images as well.
20-
docker volume prune -f && docker system prune --force --filter "until=72h" --all
20+
docker volume prune -f && docker system prune --force --filter "until=24h" --all
2121
echo "Docker images and volumes cleanup completed."
2222
else
2323
echo "Disk usage is below $threshold%. No cleanup needed."

.buildkite/scripts/upload-wheels.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,19 @@ fi
1414
# Get the single wheel file
1515
wheel="${wheel_files[0]}"
1616

17-
# Rename 'linux' to 'manylinux1' in the wheel filename
18-
new_wheel="${wheel/linux/manylinux1}"
17+
# Detect architecture and rename 'linux' to appropriate manylinux version
18+
arch=$(uname -m)
19+
if [[ $arch == "x86_64" ]]; then
20+
manylinux_version="manylinux1"
21+
elif [[ $arch == "aarch64" ]]; then
22+
manylinux_version="manylinux2014"
23+
else
24+
echo "Warning: Unknown architecture $arch, using manylinux1 as default"
25+
manylinux_version="manylinux1"
26+
fi
27+
28+
# Rename 'linux' to the appropriate manylinux version in the wheel filename
29+
new_wheel="${wheel/linux/$manylinux_version}"
1930
mv -- "$wheel" "$new_wheel"
2031
wheel="$new_wheel"
2132

.buildkite/test-pipeline.yaml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ steps:
328328
- pytest -v -s compile/test_sequence_parallelism.py
329329
- pytest -v -s compile/test_async_tp.py
330330
- pytest -v -s compile/test_fusion_all_reduce.py
331+
- pytest -v -s compile/test_decorator.py
331332

332333
- label: PyTorch Fullgraph Smoke Test # 9min
333334
mirror_hardwares: [amdexperimental]
@@ -341,6 +342,7 @@ steps:
341342
- pytest -v -s compile/piecewise/test_simple.py
342343
- pytest -v -s compile/piecewise/test_toy_llama.py
343344
- pytest -v -s compile/piecewise/test_full_cudagraph.py
345+
- pytest -v -s compile/piecewise/test_multiple_graphs.py
344346

345347
- label: PyTorch Fullgraph Test # 18min
346348
mirror_hardwares: [amdexperimental]
@@ -543,6 +545,15 @@ steps:
543545
commands:
544546
- pytest -v -s models/language/pooling -m 'not core_model'
545547

548+
- label: Multi-Modal Processor Test
549+
source_file_dependencies:
550+
- vllm/
551+
- tests/models/multimodal
552+
commands:
553+
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
554+
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
555+
- pytest -v -s models/multimodal/processing/test_tensor_schema.py
556+
546557
- label: Multi-Modal Models Test (Standard)
547558
mirror_hardwares: [amdexperimental]
548559
torch_nightly: true
@@ -552,9 +563,7 @@ steps:
552563
commands:
553564
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
554565
- pip freeze | grep -E 'torch'
555-
- pytest -v -s models/multimodal/processing
556-
- pytest -v -s --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/test_tensor_schema.py models/multimodal -m core_model
557-
- pytest -v -s models/multimodal/test_tensor_schema.py -m core_model # Needs mp_method="spawn"
566+
- pytest -v -s models/multimodal -m core_model --ignore models/multimodal/generation/test_whisper.py --ignore models/multimodal/processing
558567
- cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
559568

560569
- label: Multi-Modal Models Test (Extended) 1
@@ -565,7 +574,7 @@ steps:
565574
- tests/models/multimodal
566575
commands:
567576
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
568-
- pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model'
577+
- pytest -v -s models/multimodal -m 'not core_model' --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing
569578

570579
- label: Multi-Modal Models Test (Extended) 2
571580
mirror_hardwares: [amdexperimental]
@@ -646,6 +655,7 @@ steps:
646655
- pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py
647656
- pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
648657
- pytest -v -s tests/kernels/moe/test_nvfp4_moe.py
658+
- pytest -v -s tests/kernels/moe/test_mxfp4_moe.py
649659
# Fusion
650660
- pytest -v -s tests/compile/test_fusion_all_reduce.py
651661
- pytest -v -s tests/compile/test_fusion_attn.py::test_attention_quant_pattern

.github/workflows/lint-and-deploy.yaml

Lines changed: 0 additions & 89 deletions
This file was deleted.

.github/workflows/publish.yml

Lines changed: 0 additions & 111 deletions
This file was deleted.

0 commit comments

Comments
 (0)