Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
sudo apt-get update
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format
cp -a /var/cache/apt/archives/*.deb apt-archives
cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true

- name: Extract Go version from go.mod
run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
sudo apt-get update
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format
cp -a /var/cache/apt/archives/*.deb apt-archives
cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true

- name: Sanity check repo contents
run: ls -la
Expand All @@ -42,8 +42,9 @@ jobs:
go-version: "${{ env.GO_VERSION }}"
cache-dependency-path: ./go.sum

- name: Install dependencies
run: go mod download
# Set up the Python virtual environment (includes Python config verification)
- name: Run setup-venv
run: make setup-venv

- name: Cache Python (pip) dependencies
uses: actions/cache@v4
Expand All @@ -56,9 +57,10 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-

# Set up the Python virtual environment (includes Python config verification)
- name: Run setup-venv
run: make setup-venv
- name: Install dependencies
run: |
go mod download
make install-python-deps

- name: Install golangci-lint without running it
uses: golangci/golangci-lint-action@v8
Expand All @@ -79,10 +81,10 @@ jobs:

- name: Run C/C++/CUDA formatting check
run: make clang

- name: Run make build
shell: bash
run: |
make build
run: make build

- name: Run make test
shell: bash
Expand Down
39 changes: 16 additions & 23 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,16 @@

FROM python:3.12-slim AS python-builder

ARG TARGETARCH
ARG TARGETOS=linux
ARG TARGETARCH=amd64

WORKDIR /workspace

RUN apt-get update && apt-get install -y --no-install-recommends build-essential

COPY Makefile Makefile
COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/
# Create venv and install vLLM based on architecture using pre-built wheels
RUN python3.12 -m venv /workspace/build/venv && \
. /workspace/build/venv/bin/activate && \
pip install --upgrade pip && \
VLLM_VERSION="0.14.0" && \
if [ "$TARGETARCH" = "arm64" ]; then \
pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
elif [ "$TARGETARCH" = "amd64" ]; then \
pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
else \
echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \
fi
RUN TARGETOS=${TARGETOS} TARGETARCH=${TARGETARCH} make install-python-deps

# Build Stage: using Go 1.24.1 image
FROM quay.io/projectquay/golang:1.24 AS builder
Expand Down Expand Up @@ -60,17 +51,18 @@ RUN go mod download
# Copy the source code.
COPY . .

# HuggingFace tokenizer bindings
RUN mkdir -p lib
ARG RELEASE_VERSION=v1.22.1
RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
RUN ranlib lib/*.a

# Copy this project's own Python source code into the final image
COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /workspace/pkg/preprocessing/chat_completions
RUN make setup-venv
COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
RUN make build

# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's
# Python code and the installed libraries (site-packages) are found at runtime.
ENV PYTHONPATH=/workspace/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
RUN python3.12 -c "import tokenizer_wrapper"

ARG RELEASE_VERSION=v1.22.1
RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand All @@ -85,14 +77,15 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.

# Copy this project's own Python source code into the final image
COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions
COPY --from=python-builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages

# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's
# Python code and the installed libraries (site-packages) are found at runtime.
ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/usr/lib64/python3.12/site-packages
ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
Comment on lines 79 to +84
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file paths are inconsistent in the final image stage. Line 76 copies Python source code to '/app/pkg/preprocessing/chat_completions', while line 77 copies site-packages to '/workspace/build/venv/lib/python3.12/site-packages'. The PYTHONPATH on line 81 reflects this mixed structure with both '/app/' and '/workspace/' prefixes. While this will likely function correctly, it's inconsistent and harder to maintain. Consider organizing all Python-related files under a single directory prefix (either '/app/' or '/workspace/') for better clarity and maintainability.

Copilot uses AI. Check for mistakes.
RUN python3.12 -c "import tokenizer_wrapper"

# Copy the compiled Go application
COPY --from=builder /workspace/bin/llm-d-kv-cache-manager /app/kv-cache-manager
COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager
USER 65532:65532

# Set the entrypoint to the kv-cache-manager binary
Expand Down
22 changes: 10 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ help: ## Print help
TOKENIZER_LIB = lib/libtokenizers.a

# Extract RELEASE_VERSION from Dockerfile
TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2)
TOKENIZER_VERSION ?= $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2)

.PHONY: download-tokenizer
download-tokenizer: $(TOKENIZER_LIB)
Comment on lines 31 to 35
Copy link

Copilot AI Jan 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TOKENIZER_VERSION defined here is used by the download-tokenizer target below to fetch a precompiled native libtokenizers archive from https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/... via curl | tar without any checksum, signature, or content-hash verification, and the resulting library is linked into your binary. If the upstream GitHub release, the tag $(TOKENIZER_VERSION), or the network path is compromised, an attacker can silently inject arbitrary native code into the built kv-cache binary, leading to full compromise of any environment running this image. To mitigate, pin the dependency to an immutable identifier (e.g., a specific release asset hash or commit), verify the downloaded archive against a known checksum or signature before extraction, or vendor the library into the repository or a trusted internal artifact store.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dependency will be removed in the upcoming refactor. Given the short lifespan, I'd prefer to keep it simple.

Expand Down Expand Up @@ -116,7 +116,7 @@ detect-python: ## Detects Python and prints the configuration.
.PHONY: setup-venv
setup-venv: detect-python ## Sets up the Python virtual environment.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
@if [ ! -f "$(VENV_BIN)/python" ]; then \
echo "Creating virtual environment..."; \
$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
echo "ERROR: Failed to create virtual environment."; \
Expand All @@ -132,17 +132,15 @@ setup-venv: detect-python ## Sets up the Python virtual environment.
.PHONY: install-python-deps
install-python-deps: setup-venv ## installs dependencies.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
echo "Creating virtual environment..."; \
$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
echo "ERROR: Failed to create virtual environment."; \
echo "Your Python installation may be missing the 'venv' module."; \
echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
exit 1; \
}; \
@if [ ! -f "$(VENV_BIN)/python" ]; then \
Comment thread
hyeongyun0916 marked this conversation as resolved.
echo "ERROR: Virtual environment not found. Run 'make setup-venv' first."; \
exit 1; \
fi
@if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \
echo "vllm is already installed, skipping..."; \
exit 0; \
fi
@echo "Upgrading pip and installing dependencies..."

@echo "Installing vllm..."
@if [ "$(TARGETOS)" = "linux" ]; then \
if [ "$(TARGETARCH)" = "amd64" ]; then \
echo "Installing vLLM pre-built wheel for x86_64..."; \
Expand Down
Loading