From 0494dbd298aaa237137b5b78fe75cf54a45eb617 Mon Sep 17 00:00:00 2001
From: HyunKyun Moon <mhg5303@gmail.com>
Date: Mon, 26 Jan 2026 10:44:26 +0000
Subject: [PATCH 1/2] edit build

Signed-off-by: HyunKyun Moon <mhg5303@gmail.com>
---
 .github/workflows/ci-examples.yaml  |  2 +-
 .github/workflows/ci-pr-checks.yaml | 18 +++++----
 Dockerfile                          | 39 ++++++++----------
 Makefile                            | 62 ++++++++++++++---------------
 4 files changed, 57 insertions(+), 64 deletions(-)

diff --git a/.github/workflows/ci-examples.yaml b/.github/workflows/ci-examples.yaml
index 981253f44..8e02c9b2e 100644
--- a/.github/workflows/ci-examples.yaml
+++ b/.github/workflows/ci-examples.yaml
@@ -27,7 +27,7 @@ jobs:
           sudo apt-get update
           sudo add-apt-repository ppa:deadsnakes/ppa -y
           sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format
-          cp -a /var/cache/apt/archives/*.deb apt-archives
+          cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true
 
       - name: Extract Go version from go.mod
         run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml
index 8eaf3b65f..2a5ecfb29 100644
--- a/.github/workflows/ci-pr-checks.yaml
+++ b/.github/workflows/ci-pr-checks.yaml
@@ -28,7 +28,7 @@ jobs:
           sudo apt-get update
           sudo add-apt-repository ppa:deadsnakes/ppa -y
           sudo apt-get install -y libzmq3-dev pkg-config python3.12 python3.12-dev python3.12-venv clang-format
-          cp -a /var/cache/apt/archives/*.deb apt-archives
+          cp -a /var/cache/apt/archives/*.deb apt-archives 2>/dev/null || true
 
       - name: Sanity check repo contents
         run: ls -la
@@ -42,8 +42,9 @@ jobs:
           go-version: "${{ env.GO_VERSION }}"
           cache-dependency-path: ./go.sum
 
-      - name: Install dependencies
-        run: go mod download
+      #  Set up the Python virtual environment (includes Python config verification)
+      - name: Run setup-venv
+        run: make setup-venv
 
       - name: Cache Python (pip) dependencies
         uses: actions/cache@v4
@@ -56,9 +57,10 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-pip-
 
-      #  Set up the Python virtual environment (includes Python config verification)
-      - name: Run setup-venv
-        run: make setup-venv
+      - name: Install dependencies
+        run: |
+          go mod download
+          make install-python-deps
 
       - name: Install golangci-lint without running it
         uses: golangci/golangci-lint-action@v8
@@ -79,10 +81,10 @@ jobs:
 
       - name: Run C/C++/CUDA formatting check
         run: make clang
+
       - name: Run make build
         shell: bash
-        run: |
-          make build
+        run: make build
 
       - name: Run make test
         shell: bash
diff --git a/Dockerfile b/Dockerfile
index 3a88941d1..afbb48616 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,25 +14,16 @@
 
 FROM python:3.12-slim AS python-builder
 
-ARG TARGETARCH
+ARG TARGETOS=linux
+ARG TARGETARCH=amd64
 
 WORKDIR /workspace
 
 RUN apt-get update && apt-get install -y --no-install-recommends build-essential
 
+COPY Makefile Makefile
 COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/
-# Create venv and install vLLM based on architecture using pre-built wheels
-RUN python3.12 -m venv /workspace/build/venv && \
-    . /workspace/build/venv/bin/activate && \
-    pip install --upgrade pip && \
-    VLLM_VERSION="0.14.0" && \
-    if [ "$TARGETARCH" = "arm64" ]; then \
-        pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
-    elif [ "$TARGETARCH" = "amd64" ]; then \
-        pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
-    else \
-        echo "ERROR: Unsupported architecture: $TARGETARCH. Only arm64 and amd64 are supported." && exit 1; \
-    fi
+RUN TARGETOS=${TARGETOS} TARGETARCH=${TARGETARCH} make install-python-deps
 
 # Build Stage: using Go 1.24.1 image
 FROM quay.io/projectquay/golang:1.24 AS builder
@@ -60,17 +51,18 @@ RUN go mod download
 # Copy the source code.
 COPY . .
 
-# HuggingFace tokenizer bindings
-RUN mkdir -p lib
-ARG RELEASE_VERSION=v1.22.1
-RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
-RUN ranlib lib/*.a
-
 # Copy this project's own Python source code into the final image
 COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /workspace/pkg/preprocessing/chat_completions
 RUN make setup-venv
 COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
-RUN make build
+
+# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's
+# Python code and the installed libraries (site-packages) are found at runtime.
+ENV PYTHONPATH=/workspace/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
+RUN python3.12 -c "import tokenizer_wrapper"
+
+ARG RELEASE_VERSION=v1.22.1
+RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build
 
 # Use distroless as minimal base image to package the manager binary
 # Refer to https://github.com/GoogleContainerTools/distroless for more details
@@ -85,14 +77,15 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.
 
 # Copy this project's own Python source code into the final image
 COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions
-COPY --from=python-builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
 
 # Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's
 # Python code and the installed libraries (site-packages) are found at runtime.
-ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/usr/lib64/python3.12/site-packages
+ENV PYTHONPATH=/app/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
+RUN python3.12 -c "import tokenizer_wrapper"
 
 # Copy the compiled Go application
-COPY --from=builder /workspace/bin/llm-d-kv-cache-manager /app/kv-cache-manager
+COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager
 USER 65532:65532
 
 # Set the entrypoint to the kv-cache-manager binary
diff --git a/Makefile b/Makefile
index 947b6cfd5..6a19db2e8 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ help: ## Print help
 TOKENIZER_LIB = lib/libtokenizers.a
 
 # Extract RELEASE_VERSION from Dockerfile
-TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2)
+TOKENIZER_VERSION ?= $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2)
 
 .PHONY: download-tokenizer
 download-tokenizer: $(TOKENIZER_LIB)
@@ -116,7 +116,7 @@ detect-python: ## Detects Python and prints the configuration.
 .PHONY: setup-venv
 setup-venv: detect-python ## Sets up the Python virtual environment.
 	@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
-	@if [ ! -f "$(VENV_BIN)/pip" ]; then \
+	@if [ ! -f "$(VENV_BIN)/python" ]; then \
 		echo "Creating virtual environment..."; \
 		$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
 			echo "ERROR: Failed to create virtual environment."; \
@@ -132,40 +132,38 @@ setup-venv: detect-python ## Sets up the Python virtual environment.
 .PHONY: install-python-deps
 install-python-deps: setup-venv ## installs dependencies.
 	@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
-	@if [ ! -f "$(VENV_BIN)/pip" ]; then \
-		echo "Creating virtual environment..."; \
-		$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
-			echo "ERROR: Failed to create virtual environment."; \
-			echo "Your Python installation may be missing the 'venv' module."; \
-			echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
-			exit 1; \
-		}; \
+	@if [ ! -f "$(VENV_BIN)/python" ]; then \
+		echo "ERROR: Virtual environment not found. Run 'make setup-venv' first."; \
+		exit 1; \
 	fi
-	@echo "Upgrading pip and installing dependencies..."
-	
-	@if [ "$(TARGETOS)" = "linux" ]; then \
-		if [ "$(TARGETARCH)" = "amd64" ]; then \
-			echo "Installing vLLM pre-built wheel for x86_64..."; \
-			$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
-		elif [ "$(TARGETARCH)" = "arm64" ]; then \
-			echo "Installing vLLM pre-built wheel for aarch64..."; \
-			$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
-		else \
-			echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \
+	@if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \
+		echo "vllm is already installed, skipping..."; \
+	else \
+		echo "Installing vllm..."; \
+		if [ "$(TARGETOS)" = "linux" ]; then \
+			if [ "$(TARGETARCH)" = "amd64" ]; then \
+				echo "Installing vLLM pre-built wheel for x86_64..."; \
+				$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
+			elif [ "$(TARGETARCH)" = "arm64" ]; then \
+				echo "Installing vLLM pre-built wheel for aarch64..."; \
+				$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
+			else \
+				echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \
+				PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
+			fi; \
+		elif [ "$(TARGETOS)" = "darwin" ]; then \
+			echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \
 			PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
+		else \
+			echo "Unsupported OS: $(TARGETOS)"; \
+			exit 1; \
 		fi; \
-	elif [ "$(TARGETOS)" = "darwin" ]; then \
-		echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \
-		PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
-	else \
-		echo "Unsupported OS: $(TARGETOS)"; \
-		exit 1; \
+		echo "Verifying vllm installation..."; \
+		$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
+			echo "ERROR: vllm library not properly installed in venv."; \
+			exit 1; \
+		}; \
 	fi
-	@echo "Verifying vllm installation..."
-	@$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
-		echo "ERROR: vllm library not properly installed in venv."; \
-		exit 1; \
-	}
 
 .PHONY: install-hf-cli
 install-hf-cli:

From 433a2be58ae528780c9fbeb23a93e20b1e477e44 Mon Sep 17 00:00:00 2001
From: HyunKyun Moon <mhg5303@gmail.com>
Date: Mon, 26 Jan 2026 10:47:00 +0000
Subject: [PATCH 2/2] ealry exit

Signed-off-by: HyunKyun Moon <mhg5303@gmail.com>
---
 Makefile | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile
index 6a19db2e8..6b417935a 100644
--- a/Makefile
+++ b/Makefile
@@ -138,32 +138,32 @@ install-python-deps: setup-venv ## installs dependencies.
 	fi
 	@if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \
 		echo "vllm is already installed, skipping..."; \
-	else \
-		echo "Installing vllm..."; \
-		if [ "$(TARGETOS)" = "linux" ]; then \
-			if [ "$(TARGETARCH)" = "amd64" ]; then \
-				echo "Installing vLLM pre-built wheel for x86_64..."; \
-				$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
-			elif [ "$(TARGETARCH)" = "arm64" ]; then \
-				echo "Installing vLLM pre-built wheel for aarch64..."; \
-				$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
-			else \
-				echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \
-				PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
-			fi; \
-		elif [ "$(TARGETOS)" = "darwin" ]; then \
-			echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \
-			PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
+		exit 0; \
+	fi
+	@echo "Installing vllm..."
+	@if [ "$(TARGETOS)" = "linux" ]; then \
+		if [ "$(TARGETARCH)" = "amd64" ]; then \
+			echo "Installing vLLM pre-built wheel for x86_64..."; \
+			$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cpu; \
+		elif [ "$(TARGETARCH)" = "arm64" ]; then \
+			echo "Installing vLLM pre-built wheel for aarch64..."; \
+			$(VENV_BIN)/pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_aarch64.whl; \
 		else \
-			echo "Unsupported OS: $(TARGETOS)"; \
-			exit 1; \
+			echo "Unsupported Linux architecture: $(TARGETARCH). Falling back to setup.sh..."; \
+			PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
 		fi; \
-		echo "Verifying vllm installation..."; \
-		$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
-			echo "ERROR: vllm library not properly installed in venv."; \
-			exit 1; \
-		}; \
+	elif [ "$(TARGETOS)" = "darwin" ]; then \
+		echo "Building vLLM from source for macOS (pre-built wheels not available)..."; \
+		PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh; \
+	else \
+		echo "Unsupported OS: $(TARGETOS)"; \
+		exit 1; \
 	fi
+	@echo "Verifying vllm installation..."
+	@$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
+		echo "ERROR: vllm library not properly installed in venv."; \
+		exit 1; \
+	}
 
 .PHONY: install-hf-cli
 install-hf-cli: