From 0494dbd298aaa237137b5b78fe75cf54a45eb617 Mon Sep 17 00:00:00 2001 From: HyunKyun Moon Date: Mon, 26 Jan 2026 10:44:26 +0000 Subject: [PATCH 1/2] edit build Signed-off-by: HyunKyun Moon --- .github/workflows/ci-examples.yaml | 2 +- .github/workflows/ci-pr-checks.yaml | 18 +++++---- Dockerfile | 39 ++++++++---------- Makefile | 62 ++++++++++++++--------------- 4 files changed, 57 insertions(+), 64 deletions(-) diff --git a/.github/workflows/ci-examples.yaml b/.github/workflows/ci-examples.yaml index 981253f44..8e02c9b2e 100644 --- a/.github/workflows/ci-examples.yaml +++ b/.github/workflows/ci-examples.yaml @@ -27,7 +27,7 @@ jobs: sudo apt-get update sudo add-apt-repository ppa:deadsnakes/ppa -y sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format - cp -a /var/cache/apt/archives/*.deb apt-archives + cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true - name: Extract Go version from go.mod run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 8eaf3b65f..2a5ecfb29 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -28,7 +28,7 @@ jobs: sudo apt-get update sudo add-apt-repository ppa:deadsnakes/ppa -y sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format - cp -a /var/cache/apt/archives/*.deb apt-archives + cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true - name: Sanity check repo contents run: ls -la @@ -42,8 +42,9 @@ jobs: go-version: "${{ env.GO_VERSION }}" cache-dependency-path: ./go.sum - - name: Install dependencies - run: go mod download + # Set up the Python virtual environment (includes Python config verification) + - name: Run setup-venv + run: make setup-venv - name: Cache Python (pip) dependencies uses: actions/cache@v4 @@ -56,9 +57,10 @@ jobs: restore-keys: | ${{ runner.os }}-pip- - # Set up the Python virtual environment (includes Python config verification) - - name: Run setup-venv - run: make setup-venv + - name: Install dependencies + run: | + go mod download + make install-python-deps - name: Install golangci-lint without running it uses: golangci/golangci-lint-action@v8 @@ -79,10 +81,10 @@ jobs: - name: Run C/C++/CUDA formatting check run: make clang + - name: Run make build shell: bash - run: | - make build + run: make build - name: Run make test shell: bash diff --git a/Dockerfile b/Dockerfile index 3a88941d1..afbb48616 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,25 +14,16 @@ FROM python:3.12-slim AS python-builder -ARG TARGETARCH +ARG TARGETOS=linux +ARG TARGETARCH=amd64 WORKDIR /workspace RUN apt-get update && apt-get install -y --no-install-recommends build-essential +COPY Makefile Makefile COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/ -# Create venv and install vLLM based on architecture using pre-built wheels -RUN python3.12 -m venv /workspace/build/venv && \ - . /workspace/build/venv/bin/activate && \ - pip install --upgrade pip && \ - VLLM_VERSION="0.14.0" && \ - if [ "$TARGETARCH" = "arm64" ]; then \ - pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ - elif [ "$TARGETARCH" = "amd64" ]; then \ - pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ - else \ - echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \ - fi +RUN TARGETOS=${TARGETOS} TARGETARCH=${TARGETARCH} make install-python-deps # Build Stage: using Go 1.24.1 image FROM quay.io/projectquay/golang:1.24 AS builder @@ -60,17 +51,18 @@ RUN go mod download # Copy the source code. COPY . . -# HuggingFace tokenizer bindings -RUN mkdir -p lib -ARG RELEASE_VERSION=v1.22.1 -RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib -RUN ranlib lib/*.a - # Copy this project's own Python source code into the final image COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /workspace/pkg/preprocessing/chat_completions RUN make setup-venv COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages -RUN make build + +# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's +# Python code and the installed libraries (site-packages) are found at runtime. +ENV PYTHONPATH=/workspace/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages +RUN python3.12 -c "import tokenizer_wrapper" + +ARG RELEASE_VERSION=v1.22.1 +RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details @@ -85,14 +77,15 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9. # Copy this project's own Python source code into the final image COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions -COPY --from=python-builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages +COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages # Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's # Python code and the installed libraries (site-packages) are found at runtime. -ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/usr/lib64/python3.12/site-packages +ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages +RUN python3.12 -c "import tokenizer_wrapper" # Copy the compiled Go application -COPY --from=builder /workspace/bin/llm-d-kv-cache-manager /app/kv-cache-manager +COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager USER 65532:65532 # Set the entrypoint to the kv-cache-manager binary diff --git a/Makefile b/Makefile index 947b6cfd5..6a19db2e8 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ help: ## Print help TOKENIZER_LIB = lib/libtokenizers.a # Extract RELEASE_VERSION from Dockerfile -TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2) +TOKENIZER_VERSION ?= $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2) .PHONY: download-tokenizer download-tokenizer: $(TOKENIZER_LIB) @@ -116,7 +116,7 @@ detect-python: ## Detects Python and prints the configuration. .PHONY: setup-venv setup-venv: detect-python ## Sets up the Python virtual environment. @printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n" - @if [ ! -f "$(VENV_BIN)/pip" ]; then \ + @if [ ! -f "$(VENV_BIN)/python" ]; then \ echo "Creating virtual environment..."; \ $(PYTHON_EXE) -m venv $(VENV_DIR) || { \ echo "ERROR: Failed to create virtual environment."; \ @@ -132,40 +132,38 @@ setup-venv: detect-python ## Sets up the Python virtual environment. .PHONY: install-python-deps install-python-deps: setup-venv ## installs dependencies. @printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n" - @if [ ! -f "$(VENV_BIN)/pip" ]; then \ - echo "Creating virtual environment..."; \ - $(PYTHON_EXE) -m venv $(VENV_DIR) || { \ - echo "ERROR: Failed to create virtual environment."; \ - echo "Your Python installation may be missing the 'venv' module."; \ - echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \ - exit 1; \ - }; \ + @if [ ! -f "$(VENV_BIN)/python" ]; then \ + echo "ERROR: Virtual environment not found. Run 'make setup-venv' first."; \ + exit 1; \ fi - @echo "Upgrading pip and installing dependencies..." - - @if [ "$(TARGETOS)" = "linux" ]; then \ - if [ "$(TARGETARCH)" = "amd64" ]; then \ - echo "Installing vLLM pre-built wheel for x86_64..."; \ - $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ - elif [ "$(TARGETARCH)" = "arm64" ]; then \ - echo "Installing vLLM pre-built wheel for aarch64..."; \ - $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ - else \ - echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \ + @if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \ + echo "vllm is already installed, skipping..."; \ + else \ + echo "Installing vllm..."; \ + if [ "$(TARGETOS)" = "linux" ]; then \ + if [ "$(TARGETARCH)" = "amd64" ]; then \ + echo "Installing vLLM pre-built wheel for x86_64..."; \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ + elif [ "$(TARGETARCH)" = "arm64" ]; then \ + echo "Installing vLLM pre-built wheel for aarch64..."; \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ + else \ + echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \ + PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ + fi; \ + elif [ "$(TARGETOS)" = "darwin" ]; then \ + echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \ PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ + else \ + echo "Unsupported OS: $(TARGETOS)"; \ + exit 1; \ fi; \ - elif [ "$(TARGETOS)" = "darwin" ]; then \ - echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \ - PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ - else \ - echo "Unsupported OS: $(TARGETOS)"; \ - exit 1; \ + echo "Verifying vllm installation..."; \ + $(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \ + echo "ERROR: vllm library not properly installed in venv."; \ + exit 1; \ + }; \ fi - @echo "Verifying vllm installation..." - @$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \ - echo "ERROR: vllm library not properly installed in venv."; \ - exit 1; \ - } .PHONY: install-hf-cli install-hf-cli: From 433a2be58ae528780c9fbeb23a93e20b1e477e44 Mon Sep 17 00:00:00 2001 From: HyunKyun Moon Date: Mon, 26 Jan 2026 10:47:00 +0000 Subject: [PATCH 2/2] ealry exit Signed-off-by: HyunKyun Moon --- Makefile | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 6a19db2e8..6b417935a 100644 --- a/Makefile +++ b/Makefile @@ -138,32 +138,32 @@ install-python-deps: setup-venv ## installs dependencies. fi @if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \ echo "vllm is already installed, skipping..."; \ - else \ - echo "Installing vllm..."; \ - if [ "$(TARGETOS)" = "linux" ]; then \ - if [ "$(TARGETARCH)" = "amd64" ]; then \ - echo "Installing vLLM pre-built wheel for x86_64..."; \ - $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ - elif [ "$(TARGETARCH)" = "arm64" ]; then \ - echo "Installing vLLM pre-built wheel for aarch64..."; \ - $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ - else \ - echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \ - PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ - fi; \ - elif [ "$(TARGETOS)" = "darwin" ]; then \ - echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \ - PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ + exit 0; \ + fi + @echo "Installing vllm..." + @if [ "$(TARGETOS)" = "linux" ]; then \ + if [ "$(TARGETARCH)" = "amd64" ]; then \ + echo "Installing vLLM pre-built wheel for x86_64..."; \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ + elif [ "$(TARGETARCH)" = "arm64" ]; then \ + echo "Installing vLLM pre-built wheel for aarch64..."; \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ else \ - echo "Unsupported OS: $(TARGETOS)"; \ - exit 1; \ + echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \ + PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ fi; \ - echo "Verifying vllm installation..."; \ - $(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \ - echo "ERROR: vllm library not properly installed in venv."; \ - exit 1; \ - }; \ + elif [ "$(TARGETOS)" = "darwin" ]; then \ + echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \ + PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \ + else \ + echo "Unsupported OS: $(TARGETOS)"; \ + exit 1; \ fi + @echo "Verifying vllm installation..." + @$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \ + echo "ERROR: vllm library not properly installed in venv."; \ + exit 1; \ + } .PHONY: install-hf-cli install-hf-cli: