Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions docker/Dockerfile.slim
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
FROM python:3.12.9-slim AS builder

WORKDIR /tmp

# Install packages in a temporary directory
RUN pip install --no-cache-dir vllm==0.10.0 -t /tmp/python-packages
Comment thread
aoyshi marked this conversation as resolved.
Outdated

# Separate the nvidia packages (2.7 GB) into cudnn (1 GB), cublas (600 MB), and all else (1.2 GB)
# rm -rf needed at the end to remove the now-empty dirs after mv
RUN mkdir -p /chunk-nvidia/chunk-cudnn && \
mkdir -p /chunk-nvidia/chunk-cublas && \
mkdir -p /chunk-nvidia/other && \
mv /tmp/python-packages/nvidia/cudnn /chunk-nvidia/chunk-cudnn && \
mv /tmp/python-packages/nvidia/cublas /chunk-nvidia/chunk-cublas && \
mv /tmp/python-packages/nvidia/* /chunk-nvidia/other && \
rm -rf /chunk-nvidia/other/cudnn /chunk-nvidia/other/cublas

# Separate the torch packages (1.7 GB)
RUN mkdir -p /chunk-torch && \
mv /tmp/python-packages/torch /chunk-torch/

# Separate the vllm packages (800 MB)
RUN mkdir -p /chunk-vllm && \
mv /tmp/python-packages/vllm /chunk-vllm/

# Move the rest of the packages (1.8 GB)
# rm -rf needed at the end to remove the now-empty dirs after mv
RUN mkdir -p /chunk-other && \
mv /tmp/python-packages/* /chunk-other/ && \
rm -rf /chunk-other/nvidia /chunk-other/torch /chunk-other/vllm

# This is the final image
FROM python:3.12.9-slim

WORKDIR /app

# Copy each chunk into the final image into cohesive wholes
# each of these will be pulled concurrently during docker pull
COPY --from=builder /chunk-nvidia/chunk-cudnn/cudnn /usr/local/lib/python3.12/site-packages/nvidia/cudnn
COPY --from=builder /chunk-nvidia/chunk-cublas/cublas /usr/local/lib/python3.12/site-packages/nvidia/cublas
COPY --from=builder /chunk-nvidia/other /usr/local/lib/python3.12/site-packages/nvidia/
COPY --from=builder /chunk-torch /usr/local/lib/python3.12/site-packages/
COPY --from=builder /chunk-vllm /usr/local/lib/python3.12/site-packages/
COPY --from=builder /chunk-other /usr/local/lib/python3.12/site-packages/

# Install FlashInfer
RUN pip install "https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.6.post1%2Bcu128torch2.7-cp39-abi3-linux_x86_64.whl"
Comment thread
aoyshi marked this conversation as resolved.
Outdated

ENV MODEL_PATH "/app/models/custom_model"

# Install GCC
RUN apt-get update && apt-get install -y build-essential
Comment thread
aoyshi marked this conversation as resolved.
Outdated

ENTRYPOINT ["sh", "-c", "python3 -m vllm.entrypoints.openai.api_server --model $MODEL_PATH $VLLM_ARGS"]
Comment thread
aoyshi marked this conversation as resolved.