Skip to content

Commit d7edca1

Browse files
[CI/Build] Adding timeout in CPU CI to avoid CPU test queue blocking (#6892)
Signed-off-by: DarkLight1337 <[email protected]> Co-authored-by: DarkLight1337 <[email protected]>
1 parent 127c074 commit d7edca1

File tree

2 files changed

+79
-69
lines changed

2 files changed

+79
-69
lines changed

.buildkite/run-cpu-test-ppc64le.sh

+31-26
Original file line numberDiff line numberDiff line change
@@ -17,30 +17,35 @@ source /etc/environment
1717
#docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
1818
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN="$HF_TOKEN" --name cpu-test cpu-test
1919

20-
# Run basic model test
21-
docker exec cpu-test bash -c "
22-
set -e
23-
pip install pytest pytest-asyncio \
24-
decord einops librosa peft Pillow sentence-transformers soundfile \
25-
transformers_stream_generator matplotlib datamodel_code_generator
26-
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
27-
# Embedding models are not supported for CPU yet
28-
# pytest -v -s tests/models/embedding/language
29-
pytest -v -s tests/models/encoder_decoder/language
30-
pytest -v -s tests/models/decoder_only/language/test_models.py
31-
# Chunked prefill not supported for CPU yet
32-
# pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
33-
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
20+
function cpu_tests() {
21+
# Run basic model test
22+
docker exec cpu-test bash -c "
23+
set -e
24+
pip install pytest pytest-asyncio \
25+
decord einops librosa peft Pillow sentence-transformers soundfile \
26+
transformers_stream_generator matplotlib datamodel_code_generator
27+
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
28+
# Embedding models are not supported for CPU yet
29+
# pytest -v -s tests/models/embedding/language
30+
pytest -v -s tests/models/encoder_decoder/language
31+
pytest -v -s tests/models/decoder_only/language/test_models.py
32+
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
33+
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
3434

35-
# online inference
36-
docker exec cpu-test bash -c "
37-
set -e
38-
python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m &
39-
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
40-
python3 benchmarks/benchmark_serving.py \
41-
--backend vllm \
42-
--dataset-name random \
43-
--model facebook/opt-125m \
44-
--num-prompts 20 \
45-
--endpoint /v1/completions \
46-
--tokenizer facebook/opt-125m"
35+
# online inference
36+
docker exec cpu-test bash -c "
37+
set -e
38+
python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m &
39+
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
40+
python3 benchmarks/benchmark_serving.py \
41+
--backend vllm \
42+
--dataset-name random \
43+
--model facebook/opt-125m \
44+
--num-prompts 20 \
45+
--endpoint /v1/completions \
46+
--tokenizer facebook/opt-125m"
47+
}
48+
49+
# All of CPU tests are expected to be finished less than 25 mins.
50+
export -f cpu_tests
51+
timeout 25m bash -c "cpu_tests"

.buildkite/run-cpu-test.sh

+48-43
Original file line numberDiff line numberDiff line change
@@ -19,50 +19,55 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
1919
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=48-95 \
2020
--cpuset-mems=1 --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2 cpu-test-avx2
2121

22-
# offline inference
23-
docker exec cpu-test-avx2 bash -c "
24-
set -e
25-
python3 examples/offline_inference.py"
22+
function cpu_tests() {
23+
# offline inference
24+
docker exec cpu-test-avx2 bash -c "
25+
set -e
26+
python3 examples/offline_inference.py"
2627

27-
# Run basic model test
28-
docker exec cpu-test bash -c "
29-
set -e
30-
pip install pytest pytest-asyncio \
31-
decord einops librosa peft Pillow sentence-transformers soundfile \
32-
transformers_stream_generator matplotlib datamodel_code_generator
33-
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
34-
# Embedding models are not supported for CPU yet
35-
# pytest -v -s tests/models/embedding/language
36-
pytest -v -s tests/models/encoder_decoder/language
37-
pytest -v -s tests/models/decoder_only/language/test_models.py
38-
# Chunked prefill not supported for CPU yet
39-
# pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
40-
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
28+
# Run basic model test
29+
docker exec cpu-test bash -c "
30+
set -e
31+
pip install pytest pytest-asyncio \
32+
decord einops librosa peft Pillow sentence-transformers soundfile \
33+
transformers_stream_generator matplotlib datamodel_code_generator
34+
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
35+
# Embedding models are not supported for CPU yet
36+
# pytest -v -s tests/models/embedding/language
37+
pytest -v -s tests/models/encoder_decoder/language
38+
pytest -v -s tests/models/decoder_only/language/test_models.py
39+
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
40+
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
4141

42-
# Run compressed-tensor test
43-
docker exec cpu-test bash -c "
44-
set -e
45-
pytest -s -v \
46-
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
47-
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
42+
# Run compressed-tensor test
43+
docker exec cpu-test bash -c "
44+
set -e
45+
pytest -s -v \
46+
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
47+
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
4848

49-
# Run AWQ test
50-
docker exec cpu-test bash -c "
51-
set -e
52-
pytest -s -v \
53-
tests/quantization/test_ipex_quant.py"
49+
# Run AWQ test
50+
docker exec cpu-test bash -c "
51+
set -e
52+
pytest -s -v \
53+
tests/quantization/test_ipex_quant.py"
5454

55-
# online inference
56-
docker exec cpu-test bash -c "
57-
set -e
58-
export VLLM_CPU_KVCACHE_SPACE=10
59-
export VLLM_CPU_OMP_THREADS_BIND=48-92
60-
python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m --dtype half &
61-
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
62-
python3 benchmarks/benchmark_serving.py \
63-
--backend vllm \
64-
--dataset-name random \
65-
--model facebook/opt-125m \
66-
--num-prompts 20 \
67-
--endpoint /v1/completions \
68-
--tokenizer facebook/opt-125m"
55+
# online inference
56+
docker exec cpu-test bash -c "
57+
set -e
58+
export VLLM_CPU_KVCACHE_SPACE=10
59+
export VLLM_CPU_OMP_THREADS_BIND=48-92
60+
python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m --dtype half &
61+
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
62+
python3 benchmarks/benchmark_serving.py \
63+
--backend vllm \
64+
--dataset-name random \
65+
--model facebook/opt-125m \
66+
--num-prompts 20 \
67+
--endpoint /v1/completions \
68+
--tokenizer facebook/opt-125m"
69+
}
70+
71+
# All of CPU tests are expected to be finished less than 25 mins.
72+
export -f cpu_tests
73+
timeout 25m bash -c "cpu_tests"

0 commit comments

Comments
 (0)