File tree 2 files changed +43
-5
lines changed
2 files changed +43
-5
lines changed Original file line number Diff line number Diff line change
1
+ # This script build the CPU docker image and run the offline inference inside the container.
2
+ # It serves a sanity check for compilation and basic model usage.
3
+ set -ex
4
+
5
+ # Try building the docker image
6
+ docker build -t cpu-test -f Dockerfile.ppc64le .
7
+
8
+ # Setup cleanup
9
+ remove_docker_container () { docker rm -f cpu-test || true ; }
10
+ trap remove_docker_container EXIT
11
+ remove_docker_container
12
+
13
+ # Run the image, setting --shm-size=4g for tensor parallel.
14
+ # docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test
15
+ docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --name cpu-test cpu-test
16
+
17
+ # Run basic model test
18
+ docker exec cpu-test bash -c "
19
+ pip install pytest matplotlib einops transformers_stream_generator
20
+ pytest -v -s tests/models -m \" not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported
21
+
22
+ # online inference
23
+ docker exec cpu-test bash -c "
24
+ python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m &
25
+ timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
26
+ python3 benchmarks/benchmark_serving.py \
27
+ --backend vllm \
28
+ --dataset-name random \
29
+ --model facebook/opt-125m \
30
+ --num-prompts 20 \
31
+ --endpoint /v1/completions \
32
+ --tokenizer facebook/opt-125m"
Original file line number Diff line number Diff line change @@ -2,21 +2,27 @@ FROM mambaorg/micromamba
2
2
ARG MAMBA_DOCKERFILE_ACTIVATE=1
3
3
USER root
4
4
5
- RUN apt-get update -y && apt-get install -y git wget vim numactl gcc-12 g++-12 protobuf-compiler libprotobuf-dev && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
5
+ ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/"
6
+
7
+ RUN apt-get update -y && apt-get install -y git wget vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential
6
8
7
9
# Some packages in requirements-cpu are installed here
8
10
# IBM provides optimized packages for ppc64le processors in the open-ce project for mamba
9
11
# Currently these may not be available for venv or pip directly
10
- RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 pytorch-cpu=2.1.2 torchvision-cpu=0.16.2 && micromamba clean --all --yes
12
+ RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 torchvision-cpu=0.16.2 rust && micromamba clean --all --yes
11
13
12
14
COPY ./ /workspace/vllm
13
15
14
16
WORKDIR /workspace/vllm
15
17
16
18
# These packages will be in rocketce eventually
17
- RUN pip install -v -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
19
+ RUN pip install -v cmake torch==2.3.1 uvloop==0.20.0 -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing
18
20
19
21
RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install
20
22
21
- WORKDIR /vllm-workspace
22
- ENTRYPOINT ["/opt/conda/bin/python3", "-m", "vllm.entrypoints.openai.api_server"]
23
+ WORKDIR /workspace/
24
+
25
+ RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks
26
+
27
+ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
28
+
You can’t perform that action at this time.
0 commit comments