diff --git a/.github/workflows/ci-examples.yaml b/.github/workflows/ci-examples.yaml index 981253f44..8e02c9b2e 100644 --- a/.github/workflows/ci-examples.yaml +++ b/.github/workflows/ci-examples.yaml @@ -27,7 +27,7 @@ jobs: sudo apt-get update sudo add-apt-repository ppa:deadsnakes/ppa -y sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format - cp -a /var/cache/apt/archives/*.deb apt-archives + cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true - name: Extract Go version from go.mod run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 8eaf3b65f..2a5ecfb29 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -28,7 +28,7 @@ jobs: sudo apt-get update sudo add-apt-repository ppa:deadsnakes/ppa -y sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format - cp -a /var/cache/apt/archives/*.deb apt-archives + cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true - name: Sanity check repo contents run: ls -la @@ -42,8 +42,9 @@ jobs: go-version: "${{ env.GO_VERSION }}" cache-dependency-path: ./go.sum - - name: Install dependencies - run: go mod download + # Set up the Python virtual environment (includes Python config verification) + - name: Run setup-venv + run: make setup-venv - name: Cache Python (pip) dependencies uses: actions/cache@v4 @@ -56,9 +57,10 @@ jobs: restore-keys: | ${{ runner.os }}-pip- - # Set up the Python virtual environment (includes Python config verification) - - name: Run setup-venv - run: make setup-venv + - name: Install dependencies + run: | + go mod download + make install-python-deps - name: Install golangci-lint without running it uses: golangci/golangci-lint-action@v8 @@ -79,10 +81,10 @@ jobs: - name: Run C/C++/CUDA formatting check run: make clang + - name: Run make build shell: bash - run: | - make build + run: make build - name: Run make test shell: bash diff --git a/Dockerfile b/Dockerfile index 3a88941d1..afbb48616 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,25 +14,16 @@ FROM python:3.12-slim AS python-builder -ARG TARGETARCH +ARG TARGETOS=linux +ARG TARGETARCH=amd64 WORKDIR /workspace RUN apt-get update && apt-get install -y --no-install-recommends build-essential +COPY Makefile Makefile COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/ -# Create venv and install vLLM based on architecture using pre-built wheels -RUN python3.12 -m venv /workspace/build/venv && \ - . /workspace/build/venv/bin/activate && \ - pip install --upgrade pip && \ - VLLM_VERSION="0.14.0" && \ - if [ "$TARGETARCH" = "arm64" ]; then \ - pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ - elif [ "$TARGETARCH" = "amd64" ]; then \ - pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ - else \ - echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \ - fi +RUN TARGETOS=${TARGETOS} TARGETARCH=${TARGETARCH} make install-python-deps # Build Stage: using Go 1.24.1 image FROM quay.io/projectquay/golang:1.24 AS builder @@ -60,17 +51,18 @@ RUN go mod download # Copy the source code. COPY . . -# HuggingFace tokenizer bindings -RUN mkdir -p lib -ARG RELEASE_VERSION=v1.22.1 -RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib -RUN ranlib lib/*.a - # Copy this project's own Python source code into the final image COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /workspace/pkg/preprocessing/chat_completions RUN make setup-venv COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages -RUN make build + +# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's +# Python code and the installed libraries (site-packages) are found at runtime. +ENV PYTHONPATH=/workspace/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages +RUN python3.12 -c "import tokenizer_wrapper" + +ARG RELEASE_VERSION=v1.22.1 +RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details @@ -85,14 +77,15 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9. # Copy this project's own Python source code into the final image COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions -COPY --from=python-builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages # Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's # Python code and the installed libraries (site-packages) are found at runtime. -ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/usr/lib64/python3.12/site-packages +ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages +RUN python3.12 -c "import tokenizer_wrapper" # Copy the compiled Go application -COPY --from=builder /workspace/bin/llm-d-kv-cache-manager /app/kv-cache-manager +COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager USER 65532:65532 # Set the entrypoint to the kv-cache-manager binary diff --git a/Makefile b/Makefile index 947b6cfd5..6b417935a 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ help: ## Print help TOKENIZER_LIB = lib/libtokenizers.a # Extract RELEASE_VERSION from Dockerfile -TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2) +TOKENIZER_VERSION ?= $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2) .PHONY: download-tokenizer download-tokenizer: $(TOKENIZER_LIB) @@ -116,7 +116,7 @@ detect-python: ## Detects Python and prints the configuration. .PHONY: setup-venv setup-venv: detect-python ## Sets up the Python virtual environment. @printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n" - @if [ ! -f "$(VENV_BIN)/pip" ]; then \ + @if [ ! -f "$(VENV_BIN)/python" ]; then \ echo "Creating virtual environment..."; \ $(PYTHON_EXE) -m venv $(VENV_DIR) || { \ echo "ERROR: Failed to create virtual environment."; \ @@ -132,17 +132,15 @@ setup-venv: detect-python ## Sets up the Python virtual environment. .PHONY: install-python-deps install-python-deps: setup-venv ## installs dependencies. @printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n" - @if [ ! -f "$(VENV_BIN)/pip" ]; then \ - echo "Creating virtual environment..."; \ - $(PYTHON_EXE) -m venv $(VENV_DIR) || { \ - echo "ERROR: Failed to create virtual environment."; \ - echo "Your Python installation may be missing the 'venv' module."; \ - echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \ - exit 1; \ - }; \ + @if [ ! -f "$(VENV_BIN)/python" ]; then \ + echo "ERROR: Virtual environment not found. Run 'make setup-venv' first."; \ + exit 1; \ + fi + @if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \ + echo "vllm is already installed, skipping..."; \ + exit 0; \ fi - @echo "Upgrading pip and installing dependencies..." - + @echo "Installing vllm..." @if [ "$(TARGETOS)" = "linux" ]; then \ if [ "$(TARGETARCH)" = "amd64" ]; then \ echo "Installing vLLM pre-built wheel for x86_64..."; \