Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ venv
__pycache__

# Docker files
Dockerfile
Dockerfile
6 changes: 4 additions & 2 deletions .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ jobs:
src:
- '**/*.go'
- '**/*.py'
- Dockerfile.epp
- Dockerfile.sidecar
- Makefile*
lint-and-test:
needs: check-changes
if: ${{ needs.check-changes.outputs.src == 'true' }}
Expand Down Expand Up @@ -62,8 +65,7 @@ jobs:
- name: Install dependencies
run: |
go mod tidy
sudo -E env "PATH=$PATH" make install-dependencies
sudo -E env "PATH=$PATH" make install-python-deps
sudo -E env "PATH=$PATH" make install-dependencies install-python-deps

- name: Run lint checks
uses: golangci/golangci-lint-action@v9
Expand Down
20 changes: 15 additions & 5 deletions Dockerfile.epp
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,27 @@ RUN go mod download
# Extract version dynamically and copy to a known location
RUN KV_CACHE_PKG=$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache) && \
mkdir -p /workspace/kv-cache && \
cp -r $KV_CACHE_PKG/* /workspace/kv-cache && \
chmod +x /workspace/kv-cache/pkg/preprocessing/chat_completions/setup.sh
cp -r $KV_CACHE_PKG/* /workspace/kv-cache

FROM python:3.12-slim AS python-builder

RUN apt-get update && apt-get install -y --no-install-recommends build-essential
ARG TARGETARCH

COPY --from=go-deps /workspace/kv-cache /workspace/kv-cache
WORKDIR /workspace/kv-cache
# llm-d-kv-cache's Makefile. not llm-d-inference-scheduler's
RUN KV_CACHE_PKG=/workspace/kv-cache make install-python-deps

# Create venv and install vLLM based on architecture using pre-built wheels
RUN python3.12 -m venv /workspace/kv-cache/build/venv && \
. /workspace/kv-cache/build/venv/bin/activate && \
pip install --upgrade pip && \
VLLM_VERSION="0.14.0" && \
Comment thread
elevran marked this conversation as resolved.
if [ "$TARGETARCH" = "arm64" ]; then \
pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
elif [ "$TARGETARCH" = "amd64" ]; then \
pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
else \
echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \
fi

# Go build stage
FROM quay.io/projectquay/golang:1.24 AS go-builder
Expand Down
31 changes: 24 additions & 7 deletions Makefile.tools.mk
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ GINKGO_VERSION ?= v2.27.2
GOLANGCI_LINT_VERSION ?= v2.1.6
KUSTOMIZE_VERSION ?= v5.5.0
TYPOS_VERSION ?= v1.34.0
VLLM_VERSION ?= 0.14.0

## Python Configuration
PYTHON_VERSION ?= 3.12
Expand Down Expand Up @@ -194,14 +195,30 @@ install-python-deps: setup-venv ## installs dependencies.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@echo "install vllm..."
@KV_CACHE_PKG=$${KV_CACHE_PKG:-$$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache 2>/dev/null)}; \
if [ -n "$$KV_CACHE_PKG" ] && [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" ]; then \
echo "Running kv-cache setup script..."; \
cp "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" build/kv-cache-setup.sh; \
chmod +x build/kv-cache-setup.sh; \
cd build && PATH=$(VENV_BIN):$$PATH ./kv-cache-setup.sh && cd ..; \
else \
echo "ERROR: kv-cache package not found or setup script missing."; \
if [ -z "$$KV_CACHE_PKG" ]; then \
echo "ERROR: kv-cache package not found."; \
Comment thread
elevran marked this conversation as resolved.
exit 1; \
fi; \
if [ "$(TARGETOS)" = "darwin" ]; then \
if [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" ]; then \
echo "Running kv-cache setup script for macOS..."; \
cp "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" build/kv-cache-setup.sh; \
chmod +wx build/kv-cache-setup.sh; \
cd build && PATH=$(VENV_BIN):$$PATH ./kv-cache-setup.sh && cd ..; \
else \
echo "ERROR: setup script not found at $$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh"; \
exit 1; \
fi; \
else \
echo "Installing vLLM for Linux $(TARGETARCH)..."; \
if [ "$(TARGETARCH)" = "arm64" ]; then \
$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v$(VLLM_VERSION)/vllm-$(VLLM_VERSION)+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
elif [ "$(TARGETARCH)" = "amd64" ]; then \
$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v$(VLLM_VERSION)/vllm-$(VLLM_VERSION)+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
else \
echo "ERROR: Unsupported architecture: $(TARGETARCH). Only arm64 and amd64 are supported."; \
exit 1; \
fi; \
fi
@echo "Verifying vllm installation..."
@$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/google/uuid v1.6.0
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/jellydator/ttlcache/v3 v3.4.0
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4
github.com/onsi/ginkgo/v2 v2.27.5
github.com/onsi/gomega v1.39.0
github.com/openai/openai-go v1.12.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383 h1:cHA1+Qe27oYDsWTmNToiidMBty6bxfuCUXvyhGPJ2FM=
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383/go.mod h1:XyhzHBYeOWamBMPkuRySB5nJ0zzQpK/mbuXKqJRFT6A=
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4 h1:3LHSnDQ2tLsSIbh4BgN+7RYz/Wi+KjvIigcxVHb3mkE=
github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4/go.mod h1:XyhzHBYeOWamBMPkuRySB5nJ0zzQpK/mbuXKqJRFT6A=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=
Expand Down