diff --git a/.dockerignore b/.dockerignore index 4eb2972cba..73ae7ffbe1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -18,4 +18,4 @@ venv __pycache__ # Docker files -Dockerfile \ No newline at end of file +Dockerfile diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 0bd12a87a3..ae6e47cc1c 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -23,6 +23,9 @@ jobs: src: - '**/*.go' - '**/*.py' + - Dockerfile.epp + - Dockerfile.sidecar + - Makefile* lint-and-test: needs: check-changes if: ${{ needs.check-changes.outputs.src == 'true' }} @@ -62,8 +65,7 @@ jobs: - name: Install dependencies run: | go mod tidy - sudo -E env "PATH=$PATH" make install-dependencies - sudo -E env "PATH=$PATH" make install-python-deps + sudo -E env "PATH=$PATH" make install-dependencies install-python-deps - name: Run lint checks uses: golangci/golangci-lint-action@v9 diff --git a/Dockerfile.epp b/Dockerfile.epp index f344914dd8..9d1e89fdb5 100644 --- a/Dockerfile.epp +++ b/Dockerfile.epp @@ -18,17 +18,27 @@ RUN go mod download # Extract version dynamically and copy to a known location RUN KV_CACHE_PKG=$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache) && \ mkdir -p /workspace/kv-cache && \ - cp -r $KV_CACHE_PKG/* /workspace/kv-cache && \ - chmod +x /workspace/kv-cache/pkg/preprocessing/chat_completions/setup.sh + cp -r $KV_CACHE_PKG/* /workspace/kv-cache FROM python:3.12-slim AS python-builder -RUN apt-get update && apt-get install -y --no-install-recommends build-essential +ARG TARGETARCH COPY --from=go-deps /workspace/kv-cache /workspace/kv-cache WORKDIR /workspace/kv-cache -# llm-d-kv-cache's Makefile. not llm-d-inference-scheduler's -RUN KV_CACHE_PKG=/workspace/kv-cache make install-python-deps + +# Create venv and install vLLM based on architecture using pre-built wheels +RUN python3.12 -m venv /workspace/kv-cache/build/venv && \ + . /workspace/kv-cache/build/venv/bin/activate && \ + pip install --upgrade pip && \ + VLLM_VERSION="0.14.0" && \ + if [ "$TARGETARCH" = "arm64" ]; then \ + pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ + elif [ "$TARGETARCH" = "amd64" ]; then \ + pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ + else \ + echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \ + fi # Go build stage FROM quay.io/projectquay/golang:1.24 AS go-builder diff --git a/Makefile.tools.mk b/Makefile.tools.mk index 54eb912e9b..7bf41b3694 100644 --- a/Makefile.tools.mk +++ b/Makefile.tools.mk @@ -18,6 +18,7 @@ GINKGO_VERSION ?= v2.27.2 GOLANGCI_LINT_VERSION ?= v2.1.6 KUSTOMIZE_VERSION ?= v5.5.0 TYPOS_VERSION ?= v1.34.0 +VLLM_VERSION ?= 0.14.0 ## Python Configuration PYTHON_VERSION ?= 3.12 @@ -194,14 +195,30 @@ install-python-deps: setup-venv ## installs dependencies. @printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n" @echo "install vllm..." @KV_CACHE_PKG=$${KV_CACHE_PKG:-$$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache 2>/dev/null)}; \ - if [ -n "$$KV_CACHE_PKG" ] && [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" ]; then \ - echo "Running kv-cache setup script..."; \ - cp "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" build/kv-cache-setup.sh; \ - chmod +x build/kv-cache-setup.sh; \ - cd build && PATH=$(VENV_BIN):$$PATH ./kv-cache-setup.sh && cd ..; \ - else \ - echo "ERROR: kv-cache package not found or setup script missing."; \ + if [ -z "$$KV_CACHE_PKG" ]; then \ + echo "ERROR: kv-cache package not found."; \ exit 1; \ + fi; \ + if [ "$(TARGETOS)" = "darwin" ]; then \ + if [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" ]; then \ + echo "Running kv-cache setup script for macOS..."; \ + cp "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" build/kv-cache-setup.sh; \ + chmod +wx build/kv-cache-setup.sh; \ + cd build && PATH=$(VENV_BIN):$$PATH ./kv-cache-setup.sh && cd ..; \ + else \ + echo "ERROR: setup script not found at $$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh"; \ + exit 1; \ + fi; \ + else \ + echo "Installing vLLM for Linux $(TARGETARCH)..."; \ + if [ "$(TARGETARCH)" = "arm64" ]; then \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v$(VLLM_VERSION)/vllm-$(VLLM_VERSION)+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \ + elif [ "$(TARGETARCH)" = "amd64" ]; then \ + $(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v$(VLLM_VERSION)/vllm-$(VLLM_VERSION)+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \ + else \ + echo "ERROR: Unsupported architecture: $(TARGETARCH). Only arm64 and amd64 are supported."; \ + exit 1; \ + fi; \ fi @echo "Verifying vllm installation..." @$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \ diff --git a/go.mod b/go.mod index 1736a145db..72a76aac8c 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/google/uuid v1.6.0 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/jellydator/ttlcache/v3 v3.4.0 - github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383 + github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4 github.com/onsi/ginkgo/v2 v2.27.5 github.com/onsi/gomega v1.39.0 github.com/openai/openai-go v1.12.0 diff --git a/go.sum b/go.sum index 9394de89cb..3fdd19345a 100644 --- a/go.sum +++ b/go.sum @@ -183,8 +183,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383 h1:cHA1+Qe27oYDsWTmNToiidMBty6bxfuCUXvyhGPJ2FM= -github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383/go.mod h1:XyhzHBYeOWamBMPkuRySB5nJ0zzQpK/mbuXKqJRFT6A= +github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4 h1:3LHSnDQ2tLsSIbh4BgN+7RYz/Wi+KjvIigcxVHb3mkE= +github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260121180456-e3fafddd09f4/go.mod h1:XyhzHBYeOWamBMPkuRySB5nJ0zzQpK/mbuXKqJRFT6A= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=