diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000..4eb2972cba
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,21 @@
+# Git
+.git
+.gitignore
+
+# Build artifacts
+bin
+build
+
+# IDE and OS files
+.idea
+.vscode
+*.DS_Store
+
+# Local virtual environments
+venv
+
+# Python cache files
+__pycache__
+
+# Docker files
+Dockerfile
\ No newline at end of file
diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml
index dbf6e3dd98..0bd12a87a3 100644
--- a/.github/workflows/ci-pr-checks.yaml
+++ b/.github/workflows/ci-pr-checks.yaml
@@ -12,7 +12,7 @@ jobs:
   check-changes:
     runs-on: ubuntu-latest
     outputs:
-      docs: ${{ steps.filter.outputs.docs }}
+      src: ${{ steps.filter.outputs.src }}
     steps:
       - name: Checkout source
         uses: actions/checkout@v6
@@ -20,14 +20,19 @@ jobs:
         id: filter
         with:
           filters: |
-            docs:
-              - 'README.md'
-              - 'docs/**'
+            src:
+              - '**/*.go'
+              - '**/*.py'
   lint-and-test:
     needs: check-changes
-    if: ${{ needs.check-changes.outputs.docs == 'false' }}
+    if: ${{ needs.check-changes.outputs.src == 'true' }}
     runs-on: ubuntu-latest
     steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: false
+
       - name: Checkout source
         uses: actions/checkout@v6
 
@@ -43,9 +48,6 @@ jobs:
           go-version: "${{ env.GO_VERSION }}"
           cache-dependency-path: ./go.sum
 
-      - name: Install dependencies
-        run: sudo make install-dependencies
-
       - name: Configure CGO for Python
         run: |
           PYTHON_INCLUDE=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))")
@@ -57,13 +59,16 @@ jobs:
       - name: Set PKG_CONFIG_PATH
         run: echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV
 
-      - name: go mod tidy
-        run: go mod tidy
+      - name: Install dependencies
+        run: |
+          go mod tidy
+          sudo -E env "PATH=$PATH" make install-dependencies
+          sudo -E env "PATH=$PATH" make install-python-deps
 
       - name: Run lint checks
         uses: golangci/golangci-lint-action@v9
         with:
-          version: 'v2.1.6'
+          version: "v2.1.6"
           args: "--config=./.golangci.yml"
         env:
           CGO_ENABLED: ${{ env.CGO_ENABLED }}
@@ -74,10 +79,8 @@ jobs:
 
       - name: Run make build
         shell: bash
-        run: |
-          make build
+        run: make build
 
       - name: Run make test
         shell: bash
-        run: |
-          make test
+        run: make test
diff --git a/.gitignore b/.gitignore
index f94c6c6ce6..0af36bc0c6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,8 @@
 main
 bin/
 
+*debug_bin*
+
 # Test binary, built with `go test -c`
 *.test
 
diff --git a/Dockerfile.epp b/Dockerfile.epp
index 915a34a0af..f344914dd8 100644
--- a/Dockerfile.epp
+++ b/Dockerfile.epp
@@ -1,13 +1,42 @@
 ## Minimal runtime Dockerfile (microdnf-only, no torch, wrapper in site-packages)
-# Build Stage: using Go 1.24 image
-FROM quay.io/projectquay/golang:1.24 AS builder
+# Go dependencies stage: download go modules and extract kv-cache
+FROM quay.io/projectquay/golang:1.24 AS go-deps
+
+WORKDIR /workspace
+
+# Copy the Go Modules manifests
+COPY go.mod go.mod
+COPY go.sum go.sum
+
+# Copy the go source
+COPY cmd/ cmd/
+COPY pkg/ pkg/
+
+RUN go mod download
+
+# Copy Python wrapper and requirements from llm-d-kv-cache dependency
+# Extract version dynamically and copy to a known location
+RUN KV_CACHE_PKG=$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache) && \
+    mkdir -p /workspace/kv-cache && \
+    cp -r $KV_CACHE_PKG/* /workspace/kv-cache && \
+    chmod +x /workspace/kv-cache/pkg/preprocessing/chat_completions/setup.sh
+
+FROM python:3.12-slim AS python-builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends build-essential
+
+COPY --from=go-deps /workspace/kv-cache /workspace/kv-cache
+WORKDIR /workspace/kv-cache
+# llm-d-kv-cache's Makefile. not llm-d-inference-scheduler's
+RUN KV_CACHE_PKG=/workspace/kv-cache make install-python-deps
+
+# Go build stage
+FROM quay.io/projectquay/golang:1.24 AS go-builder
 
 ARG TARGETOS
 ARG TARGETARCH
 ARG PYTHON_VERSION=3.12
-
 ENV PYTHON=python${PYTHON_VERSION}
-ENV PYTHONPATH=/usr/lib64/${PYTHON}/site-packages:/usr/lib/${PYTHON}/site-packages
 
 # Install build tools
 # The builder is based on UBI8, so we need epel-release-8.
@@ -16,52 +45,22 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.
     dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig ${PYTHON}-devel ${PYTHON}-pip git && \
     dnf clean all
 
+COPY --from=go-deps /workspace /workspace
+COPY --from=go-deps /go/pkg/mod /go/pkg/mod
 
 WORKDIR /workspace
 
-# Copy the Go Modules manifests
-COPY go.mod go.mod
-COPY go.sum go.sum
+COPY Makefile* ./
 
-# Copy the go source
-COPY cmd/ cmd/
-COPY pkg/ pkg/
+COPY --from=python-builder /workspace/kv-cache/pkg/preprocessing/chat_completions /workspace/kv-cache/pkg/preprocessing/chat_completions
+RUN make setup-venv
+COPY --from=python-builder /workspace/kv-cache/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
 
-RUN go mod download
+ENV PYTHONPATH=/workspace/kv-cache/pkg/preprocessing/chat_completions:/workspace/build/venv/lib/python3.12/site-packages
+RUN python3.12 -c "import tokenizer_wrapper"  # verify tokenizer_wrapper is correctly installed
 
-# Copy Python wrapper and requirements from llm-d-kv-cache-manager dependency
-# Extract version dynamically and copy to a known location
-# We need to keep llm-d-kv-cache-manager as go module path is kept the old name
-RUN KVCACHE_MANAGER_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \
-    mkdir -p /workspace/kv-cache && \
-    cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KVCACHE_MANAGER_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \
-       /workspace/kv-cache/render_jinja_template_wrapper.py && \
-    cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KVCACHE_MANAGER_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \
-       /workspace/kv-cache/requirements.txt
-
-# HuggingFace tokenizer bindings (static lib)
-RUN mkdir -p lib
-# Ensure that the RELEASE_VERSION matches the one used in the imported llm-d-kv-cache-manager version
 ARG RELEASE_VERSION=v1.22.1
-RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
-RUN ranlib lib/*.a
-
-# Build
-# the GOARCH has not a default value to allow the binary be built according to the host where the command
-# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
-# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
-# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
-ENV CGO_ENABLED=1
-ENV GOOS=${TARGETOS:-linux}
-ENV GOARCH=${TARGETARCH}
-
-
-ARG COMMIT_SHA=unknown
-ARG BUILD_REF
-RUN CGO_CFLAGS="$(${PYTHON}-config --cflags) -I/workspace/lib" && \
-    CGO_LDFLAGS="$(${PYTHON}-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \
-    export CGO_CFLAGS CGO_LDFLAGS && \
-    go build -a -o bin/epp -ldflags="-extldflags '-L$(pwd)/lib' -X sigs.k8s.io/gateway-api-inference-extension/version.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/version.BuildRef=${BUILD_REF}" cmd/epp/main.go
+RUN TOKENIZER_VERSION=${RELEASE_VERSION} make build-epp
 
 # Runtime stage
 # Use ubi9 as a minimal base image to package the manager binary
@@ -69,7 +68,7 @@ RUN CGO_CFLAGS="$(${PYTHON}-config --cflags) -I/workspace/lib" && \
 FROM registry.access.redhat.com/ubi9/ubi-minimal:9.7
 ARG PYTHON_VERSION=3.12
 WORKDIR /
-COPY --from=builder /workspace/bin/epp /app/epp
+COPY --from=go-builder /workspace/bin/epp /app/epp
 
 USER root
 
@@ -87,24 +86,11 @@ RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-
     ln -sf /usr/bin/${PYTHON} /usr/bin/python3 && \
     ln -sf /usr/bin/${PYTHON} /usr/bin/python
 
+# Copy Python kv-cache package and site-packages from the python-builder stage
+COPY --from=python-builder /workspace/kv-cache /workspace/kv-cache
+ENV PYTHONPATH=/workspace/kv-cache/pkg/preprocessing/chat_completions:/workspace/kv-cache/build/venv/lib/python3.12/site-packages
+RUN ${PYTHON} -c "import tokenizer_wrapper"  # verify tokenizer_wrapper is correctly installed
 
-# Install wrapper as a module in site-packages
-RUN mkdir -p /usr/local/lib/${PYTHON}/site-packages/
-COPY --from=builder /workspace/kv-cache/render_jinja_template_wrapper.py /usr/local/lib/${PYTHON}/site-packages/
-
-# Python deps (no cache, single target) – filter out torch
-ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
-COPY --from=builder /workspace/kv-cache/requirements.txt /tmp/requirements.txt
-RUN sed '/^torch\b/d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \
-    ${PYTHON} -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
-    ${PYTHON} -m pip install --no-cache-dir --target /usr/local/lib/${PYTHON}/site-packages -r /tmp/requirements.notorch.txt && \
-    ${PYTHON} -m pip install --no-cache-dir --target /usr/local/lib/${PYTHON}/site-packages PyYAML && \
-    rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \
-    rm -rf /root/.cache/pip
-
-# Python env
-ENV PYTHONPATH="/usr/local/lib/${PYTHON}/site-packages:/usr/lib/${PYTHON}/site-packages"
-ENV PATH=/usr/bin:/usr/local/bin:$PATH
 ENV HF_HOME="/tmp/.cache"
 
 USER 65532:65532
diff --git a/Makefile b/Makefile
index 19e722ee22..27736606fe 100644
--- a/Makefile
+++ b/Makefile
@@ -157,7 +157,7 @@ test-unit: test-unit-epp test-unit-sidecar ## Run unit tests
 .PHONY: test-unit-%
 test-unit-%: download-tokenizer install-python-deps check-dependencies ## Run unit tests
 	@printf "\033[33;1m==== Running Unit Tests ====\033[0m\n"
-	@KV_CACHE_PKG=$$(go list -m -f '{{.Dir}}/pkg/preprocessing/chat_completions' github.com/llm-d/llm-d-kv-cache-manager 2>/dev/null || echo ""); \
+	@KV_CACHE_PKG=$$(go list -m -f '{{.Dir}}/pkg/preprocessing/chat_completions' github.com/llm-d/llm-d-kv-cache 2>/dev/null || echo ""); \
 	PYTHONPATH="$$KV_CACHE_PKG:$(VENV_DIR)/lib/python$(PYTHON_VERSION)/site-packages" \
 	CGO_CFLAGS=${$*_CGO_CFLAGS} CGO_LDFLAGS=${$*_CGO_LDFLAGS} go test $($*_LDFLAGS) -v $$($($*_TEST_FILES) | tr '\n' ' ')
 
@@ -169,7 +169,7 @@ test-filter: download-tokenizer install-python-deps check-dependencies ## Run fi
 	fi
 	@TEST_TYPE="$(if $(TYPE),$(TYPE),epp)"; \
 	printf "\033[33;1m==== Running Filtered Tests (pattern: $(PATTERN), type: $$TEST_TYPE) ====\033[0m\n"; \
-	KV_CACHE_PKG=$$(go list -m -f '{{.Dir}}/pkg/preprocessing/chat_completions' github.com/llm-d/llm-d-kv-cache-manager 2>/dev/null || echo ""); \
+	KV_CACHE_PKG=$$(go list -m -f '{{.Dir}}/pkg/preprocessing/chat_completions' github.com/llm-d/llm-d-kv-cache 2>/dev/null || echo ""); \
 	if [ "$$TEST_TYPE" = "epp" ]; then \
 		PYTHONPATH="$$KV_CACHE_PKG:$(VENV_DIR)/lib/python$(PYTHON_VERSION)/site-packages" \
 		CGO_CFLAGS=$(epp_CGO_CFLAGS) CGO_LDFLAGS=$(epp_CGO_LDFLAGS) \
diff --git a/Makefile.tools.mk b/Makefile.tools.mk
index e750cd41b1..54eb912e9b 100644
--- a/Makefile.tools.mk
+++ b/Makefile.tools.mk
@@ -22,7 +22,7 @@ TYPOS_VERSION ?= v1.34.0
 ## Python Configuration
 PYTHON_VERSION ?= 3.12
 # Extract RELEASE_VERSION from Dockerfile
-TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile.epp | cut -d'=' -f2)
+TOKENIZER_VERSION ?= $(shell grep '^ARG RELEASE_VERSION=' Dockerfile.epp | cut -d'=' -f2)
 
 # Python executable for creating venv
 PYTHON_EXE := $(shell command -v python$(PYTHON_VERSION) || command -v python3)
@@ -151,33 +151,63 @@ $(TOKENIZER_LIB): | $(LOCALLIB)
 	@ranlib $(LOCALLIB)/*.a
 	@echo "Tokenizer bindings downloaded successfully."
 
-
-.PHONY: install-python-deps
-install-python-deps: ## Sets up Python virtual environment and installs dependencies
-	@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
+.PHONY: detect-python
+detect-python: ## Detects Python and prints the configuration.
+	@printf "\033[33;1m==== Python Configuration ====\033[0m\n"
 	@if [ -z "$(PYTHON_EXE)" ]; then \
 		echo "ERROR: Python 3 not found in PATH."; \
 		exit 1; \
 	fi
+	@# Verify the version of the found python executable using its exit code
+	@if ! $(PYTHON_EXE) -c "import sys; sys.exit(0 if sys.version_info[:2] == ($(shell echo $(PYTHON_VERSION) | cut -d. -f1), $(shell echo $(PYTHON_VERSION) | cut -d. -f2)) else 1)"; then \
+		echo "ERROR: Found Python at '$(PYTHON_EXE)' but it is not version $(PYTHON_VERSION)."; \
+		echo "Please ensure 'python$(PYTHON_VERSION)' or a compatible 'python3' is in your PATH."; \
+		exit 1; \
+	fi
+	@echo "Python executable: $(PYTHON_EXE) ($$($(PYTHON_EXE) --version))"
+	@echo "Python CFLAGS:     $(PYTHON_CFLAGS)"
+	@echo "Python LDFLAGS:    $(PYTHON_LDFLAGS)"
+	@if [ -z "$(PYTHON_CFLAGS)" ]; then \
+		echo "ERROR: Python development headers not found. See installation instructions above."; \
+		exit 1; \
+	fi
+	@printf "\033[33;1m==============================\033[0m\n"
+
+.PHONY: setup-venv
+setup-venv: detect-python ## Sets up the Python virtual environment.
+	@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
 	@if [ ! -f "$(VENV_BIN)/pip" ]; then \
 		echo "Creating virtual environment..."; \
 		$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
 			echo "ERROR: Failed to create virtual environment."; \
 			echo "Your Python installation may be missing the 'venv' module."; \
+			echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
 			exit 1; \
 		}; \
 	fi
-	@echo "Upgrading pip and installing dependencies..."
-	@$(VENV_BIN)/pip install --upgrade pip --quiet
-	@KV_CACHE_PKG=$$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache-manager 2>/dev/null); \
-	if [ -n "$$KV_CACHE_PKG" ] && [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/requirements.txt" ]; then \
-		echo "Installing Python dependencies from kv-cache-manager..."; \
-		$(VENV_BIN)/pip install --quiet -r "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/requirements.txt"; \
+	@echo "Upgrading pip..."
+	@$(VENV_BIN)/pip install --upgrade pip
+	@echo "Python virtual environment setup complete."
+
+.PHONY: install-python-deps
+install-python-deps: setup-venv ## installs dependencies.
+	@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
+	@echo "install vllm..."
+	@KV_CACHE_PKG=$${KV_CACHE_PKG:-$$(go list -m -f '{{.Dir}}' github.com/llm-d/llm-d-kv-cache 2>/dev/null)}; \
+	if [ -n "$$KV_CACHE_PKG" ] && [ -f "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" ]; then \
+		echo "Running kv-cache setup script..."; \
+		cp "$$KV_CACHE_PKG/pkg/preprocessing/chat_completions/setup.sh" build/kv-cache-setup.sh; \
+		chmod +x build/kv-cache-setup.sh; \
+		cd build && PATH=$(VENV_BIN):$$PATH ./kv-cache-setup.sh && cd ..; \
 	else \
-		echo "WARNING: Could not find kv-cache-manager requirements.txt, installing minimal deps..."; \
-		$(VENV_BIN)/pip install --quiet 'transformers>=4.53.0' 'jinja2>=2.11'; \
+		echo "ERROR: kv-cache package not found or setup script missing."; \
+		exit 1; \
 	fi
-	@echo "✅ Python dependencies installed in venv"
+	@echo "Verifying vllm installation..."
+	@$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
+		echo "ERROR: vllm library not properly installed in venv."; \
+		exit 1; \
+	}
 
 .PHONY: check-tools
 check-tools: check-go check-ginkgo check-golangci-lint check-kustomize check-envsubst check-container-tool check-kubectl check-buildah check-typos ## Check that all required tools are installed
diff --git a/deploy/config/epp-precise-prefix-cache-config.yaml b/deploy/config/epp-precise-prefix-cache-config.yaml
index 1575052781..39b0bb2850 100644
--- a/deploy/config/epp-precise-prefix-cache-config.yaml
+++ b/deploy/config/epp-precise-prefix-cache-config.yaml
@@ -7,10 +7,10 @@ plugins:
   - type: decode-filter
   - type: precise-prefix-cache-scorer
     parameters:
+      tokenProcessorConfig:
+        blockSize: 64                 # must match vLLM block size
+        hashSeed: "42"                # must match vLLM PYTHONHASHSEED env var
       indexerConfig:
-        tokenProcessorConfig:
-          blockSize: 64                 # must match vLLM block size
-          hashSeed: "42"                # must match vLLM PYTHONHASHSEED env var
         kvBlockIndexConfig:
           enableMetrics: true           # enable kv-block index metrics (prometheus)
   - type: kv-cache-utilization-scorer
diff --git a/deploy/config/sim-epp-kvcache-config.yaml b/deploy/config/sim-epp-kvcache-config.yaml
index 566e92437a..7850950ef7 100644
--- a/deploy/config/sim-epp-kvcache-config.yaml
+++ b/deploy/config/sim-epp-kvcache-config.yaml
@@ -6,15 +6,16 @@ plugins:
 - type: prefix-cache-scorer
   parameters:
     mode: cache_tracking
+    tokenProcessorConfig:
+      blockSize: 16                         # must match vLLM block size if not default (16)
+      hashSeed: "42"                        # must match PYTHONHASHSEED in vLLM pods
     kvEventsConfig:
       zmqEndpoint: tcp://0.0.0.0:5557
     indexerConfig:
       prefixStoreConfig:
         blockSize: 16 
-      tokenProcessorConfig:
-        blockSize: 16                         # must match vLLM block size if not default (16)
-        hashSeed: "42"                        # must match PYTHONHASHSEED in vLLM pods
       tokenizersPoolConfig:
+        modelName: <model-name>            # specify the model name to use for tokenizer loading
         hf:
           tokenizersCacheDir: "/cache/tokenizers"
       kvBlockIndexConfig:
diff --git a/docs/architecture.md b/docs/architecture.md
index b3215815c5..c51a67293b 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -308,12 +308,14 @@ Configuration:
 
 - **Type**: `precise-prefix-cache-scorer`
 - **Parameters**:
+  - `tokenProcessorConfig`: Configuration for the `kvblock.TokenProcessor`.
   - `indexerConfig`: Configuration for the `kvcache.Indexer`.
   - `kvEventsConfig`: Configuration for the `kvevents.Pool`.
 
 See list of parameters at [llm-d-kv-cache/docs/configuration.md](https://github.com/llm-d/llm-d-kv-cache/blob/fa85b60207ba0a09daf23071e10ccb62d7977b40/docs/configuration.md).
 
 Note that in most cases you will only need to set:
+- Model name in the `tokenizersPoolConfig` to match the model used in the vLLM deployment.
 - HuggingFace token for the `tokenizersPoolConfig` or the `tokenizersCacheDir` to a mounted directory containing the tokenizers.
   - For the HuggingFace token, the inference-scheduler also accepts the environment variable `HF_TOKEN` - this is the practical option for security. 
 - **IMPORTANT**: Token processor's block-size and hash-seed to match those used in the vLLM deployment.
@@ -325,15 +327,16 @@ Example configuration with the above parameters set:
 plugins:
   - type: precise-prefix-cache-scorer
     parameters:
+      tokenProcessorConfig:
+        blockSize: 64                    # must match vLLM block size
+        hashSeed: "12345"                # must match vLLM PYTHONHASHSEED env var
       indexerConfig:
-        tokenProcessorConfig:
-          blockSize: 64
-          hashSeed: "12345"
-      tokenizersPoolConfig:
-        hf:
-          huggingFaceToken: your_hf_token_here    # automatically set by `HF_TOKEN` environment variable
-      kvBlockIndexConfig:
-        enableMetrics: true
+        kvBlockIndexConfig:
+          enableMetrics: true    
+        tokenizersPoolConfig:
+          modelName: hf-repo/model-name
+          hf:
+            huggingFaceToken: your_hf_token_here    # automatically set by `HF_TOKEN` environment variable
 ```
 
 Example configuration with all parameters set:
@@ -342,23 +345,24 @@ Example configuration with all parameters set:
 plugins:
   - type: precise-prefix-cache-scorer
     parameters:
+        tokenProcessorConfig:
+          blockSize: 16
+          hashSeed: "12345"
         kvEventsConfig:
           zmqEndpoint: tcp://*:5557
           topicFilter: kv@
           concurrency: 8
-        kvCacheIndexerConfig:
+        indexerConfig:
           prefixStoreConfig:
             cacheSize: 500000
             blockSize: 256
-          tokenProcessorConfig:
-            blockSize: 16
-            hashSeed: "12345"
           kvBlockIndexConfig:
             inMemoryConfig:
               size: 100000000
               podCacheSize: 10
             enableMetrics: true
           tokenizersPoolConfig:
+            modelName: hf-repo/model-name
             workersCount: 8
             hf:
               huggingFaceToken: your_hf_token_here    # automatically set by `HF_TOKEN` environment variable
diff --git a/go.mod b/go.mod
index 24f0e3d063..2d709788fa 100644
--- a/go.mod
+++ b/go.mod
@@ -10,7 +10,7 @@ require (
 	github.com/google/uuid v1.6.0
 	github.com/hashicorp/golang-lru/v2 v2.0.7
 	github.com/jellydator/ttlcache/v3 v3.4.0
-	github.com/llm-d/llm-d-kv-cache-manager v0.4.0
+	github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383
 	github.com/onsi/ginkgo/v2 v2.27.5
 	github.com/onsi/gomega v1.39.0
 	github.com/openai/openai-go v1.12.0
diff --git a/go.sum b/go.sum
index 814a9e50d3..9e559e42e8 100644
--- a/go.sum
+++ b/go.sum
@@ -183,8 +183,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
-github.com/llm-d/llm-d-kv-cache-manager v0.4.0 h1:MBWVpDW0PWsqNJEEAW1esrJW+Xavb0a7w14tCJWWyRY=
-github.com/llm-d/llm-d-kv-cache-manager v0.4.0/go.mod h1:ZlK7MCuz5D/weLeHyNKEmVF/eJZDyYn3XyRowTihq9o=
+github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383 h1:cHA1+Qe27oYDsWTmNToiidMBty6bxfuCUXvyhGPJ2FM=
+github.com/llm-d/llm-d-kv-cache v0.4.1-0.20260120091923-2d261e30d383/go.mod h1:XyhzHBYeOWamBMPkuRySB5nJ0zzQpK/mbuXKqJRFT6A=
 github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
 github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
 github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=
diff --git a/pkg/plugins/scorer/precise_prefix_cache.go b/pkg/plugins/scorer/precise_prefix_cache.go
index 2ce6551c0f..000d9845d1 100644
--- a/pkg/plugins/scorer/precise_prefix_cache.go
+++ b/pkg/plugins/scorer/precise_prefix_cache.go
@@ -7,9 +7,10 @@ import (
 	"fmt"
 	"os"
 
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache"
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
-	preprocessing "github.com/llm-d/llm-d-kv-cache-manager/pkg/preprocessing/chat_completions"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvcache"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvcache/kvblock"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvevents"
+	preprocessing "github.com/llm-d/llm-d-kv-cache/pkg/preprocessing/chat_completions"
 	"sigs.k8s.io/controller-runtime/pkg/log"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
@@ -26,6 +27,9 @@ const (
 // PrecisePrefixCachePluginConfig holds the configuration for the
 // PrecisePrefixCacheScorer plugin.
 type PrecisePrefixCachePluginConfig struct {
+	// TokenProcessorConfig holds the configuration for the `kvblock.TokenProcessor` which is
+	// used to process tokens into KV-block keys.
+	TokenProcessorConfig *kvblock.TokenProcessorConfig `json:"tokenProcessorConfig"`
 	// IndexerConfig holds the configuration for the `kvcache.Indexer` which is
 	// used to score pods based on the KV-cache index state.
 	IndexerConfig *kvcache.Config `json:"indexerConfig"`
@@ -53,18 +57,24 @@ func PrecisePrefixCachePluginFactory(name string, rawParameters json.RawMessage,
 		KVEventsConfig: kvevents.DefaultConfig(),
 	}
 
-	// read hugging face token from environment variable if set
+	if rawParameters != nil {
+		if err := json.Unmarshal(rawParameters, &parameters); err != nil {
+			return nil, fmt.Errorf("failed to parse %s plugin config: %w", PrecisePrefixCachePluginType, err)
+		}
+	}
+
+	// Apply HF token from environment if not already set
 	if token := os.Getenv("HF_TOKEN"); token != "" &&
 		parameters.IndexerConfig != nil &&
 		parameters.IndexerConfig.TokenizersPoolConfig != nil &&
-		parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig != nil {
+		parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig != nil &&
+		parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig.HuggingFaceToken == "" {
 		parameters.IndexerConfig.TokenizersPoolConfig.HFTokenizerConfig.HuggingFaceToken = token
 	}
 
-	if rawParameters != nil {
-		if err := json.Unmarshal(rawParameters, &parameters); err != nil {
-			return nil, fmt.Errorf("failed to parse %s plugin config: %w", PrecisePrefixCachePluginType, err)
-		}
+	// Validate model name is set
+	if parameters.IndexerConfig == nil || parameters.IndexerConfig.TokenizersPoolConfig == nil || parameters.IndexerConfig.TokenizersPoolConfig.ModelName == "" {
+		return nil, errors.New("modelName is required in indexerConfig.tokenizersPoolConfig")
 	}
 
 	scorer, err := New(handle.Context(), parameters)
@@ -85,8 +95,14 @@ func PrecisePrefixCachePluginFactory(name string, rawParameters json.RawMessage,
 // If the configuration is invalid or if the indexer fails to initialize,
 // an error is returned.
 func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePrefixCacheScorer, error) {
+	if config.TokenProcessorConfig == nil {
+		config.TokenProcessorConfig = kvblock.DefaultTokenProcessorConfig()
+	}
+
+	tokenProcessor := kvblock.NewChunkedTokenDatabase(config.TokenProcessorConfig)
+
 	// initialize the indexer
-	kvCacheIndexer, err := kvcache.NewKVCacheIndexer(ctx, config.IndexerConfig)
+	kvCacheIndexer, err := kvcache.NewKVCacheIndexer(ctx, config.IndexerConfig, tokenProcessor)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create `kvcache.Indexer`: %w", err)
 	}
@@ -94,7 +110,7 @@ func New(ctx context.Context, config PrecisePrefixCachePluginConfig) (*PrecisePr
 	go kvCacheIndexer.Run(ctx)
 
 	// initialize the KV-events pool
-	pool := kvevents.NewPool(config.KVEventsConfig, kvCacheIndexer.KVBlockIndex())
+	pool := kvevents.NewPool(config.KVEventsConfig, kvCacheIndexer.KVBlockIndex(), tokenProcessor)
 	pool.Start(ctx)
 
 	return &PrecisePrefixCacheScorer{
@@ -186,8 +202,17 @@ func (s *PrecisePrefixCacheScorer) getScores(ctx context.Context, request *types
 			traceLogger.Info("Both chat/completions and completions present; defaulting to chat/completions")
 		}
 
-		renderReq := &preprocessing.RenderJinjaTemplateRequest{
-			Conversations:             make([]preprocessing.ChatMessage, 0),
+		// Convert messages to conversation format
+		conversations := make([]preprocessing.Conversation, len(request.Body.ChatCompletions.Messages))
+		for i, msg := range request.Body.ChatCompletions.Messages {
+			conversations[i] = preprocessing.Conversation{
+				Role:    msg.Role,
+				Content: msg.Content.Raw,
+			}
+		}
+
+		renderReq := &preprocessing.ApplyChatTemplateRequest{
+			Conversation:              [][]preprocessing.Conversation{conversations},
 			Tools:                     request.Body.ChatCompletions.Tools,
 			Documents:                 request.Body.ChatCompletions.Documents,
 			ChatTemplate:              request.Body.ChatCompletions.ChatTemplate,
@@ -197,16 +222,8 @@ func (s *PrecisePrefixCacheScorer) getScores(ctx context.Context, request *types
 			ChatTemplateKWArgs:        request.Body.ChatCompletions.ChatTemplateKWArgs,
 		}
 
-		// Convert messages to the format expected by the renderer
-		for _, msg := range request.Body.ChatCompletions.Messages {
-			renderReq.Conversations = append(renderReq.Conversations, preprocessing.ChatMessage{
-				Role:    msg.Role,
-				Content: msg.Content.Raw,
-			})
-		}
-
 		traceLogger.Info("Processing chat completion request",
-			"messagesCount", len(renderReq.Conversations),
+			"messagesCount", len(conversations),
 			"toolsCount", len(renderReq.Tools),
 			"documentsCount", len(renderReq.Documents))
 
diff --git a/pkg/plugins/scorer/precise_prefix_cache_test.go b/pkg/plugins/scorer/precise_prefix_cache_test.go
index eb7284b95c..1a8bf9eec1 100644
--- a/pkg/plugins/scorer/precise_prefix_cache_test.go
+++ b/pkg/plugins/scorer/precise_prefix_cache_test.go
@@ -6,11 +6,11 @@ import (
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache"
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
-	preprocessing "github.com/llm-d/llm-d-kv-cache-manager/pkg/preprocessing/chat_completions"
-	"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvcache"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvcache/kvblock"
+	"github.com/llm-d/llm-d-kv-cache/pkg/kvevents"
+	preprocessing "github.com/llm-d/llm-d-kv-cache/pkg/preprocessing/chat_completions"
+	"github.com/llm-d/llm-d-kv-cache/pkg/tokenization"
 	"github.com/stretchr/testify/require"
 	k8stypes "k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -39,7 +39,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 		name                string
 		pods                []types.Pod
 		request             *types.LLMRequest
-		kvBlockData         func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry
+		kvBlockData         func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry
 		wantScoresByAddress map[string]float64
 	}{
 		{
@@ -111,20 +111,20 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
+			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry {
 				require.NotNil(t, req.Completions, "req expected to use Completions API")
 				prompt := req.Completions.Prompt
 
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
 				require.NoError(t, err)
 
 				// use the actual tokenizer on the test prompt
-				tokens, _, err := testTokenizer.Encode(prompt, model)
+				tokens, _, err := testTokenizer.Encode(prompt, model, true)
 				require.NoError(t, err)
 
 				// compute chunk hashes using the default block size
 				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(kvblock.EmptyBlockHash, tokens, model)
 
 				require.GreaterOrEqual(t, len(chunkKeys), 3, "Need at least 3 chunks for test")
 
@@ -138,17 +138,17 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				//   pod-c: 1 chunk (0) -> score 1
 				// Normalized: (3-1)/(3-1) = 1.0, (2-1)/(3-1) = 0.5, (1-1)/(3-1) = 0.0
 
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+				return map[kvblock.BlockHash][]kvblock.PodEntry{
+					chunkKeys[0]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 						{PodIdentifier: "10.0.0.3:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+					chunkKeys[1]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[2].ChunkHash}: {
+					chunkKeys[2]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 					},
 				}
@@ -187,7 +187,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				Body: &types.LLMRequestBody{
 					ChatCompletions: &types.ChatCompletionsRequest{
 						ChatTemplate: `{% for message in messages %}{{ message.role }}: {{ message.content }}
-{% endfor %}`,
+		{% endfor %}`,
 						Messages: []types.Message{
 							{
 								Role:    "user",
@@ -205,46 +205,53 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
+			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry {
 				require.NotNil(t, req.ChatCompletions, "req expected to use ChatCompletions API")
 
 				// convert to preprocessing format
-				var chatMessages []preprocessing.ChatMessage
+				var conversations []preprocessing.Conversation
 				for _, msg := range req.ChatCompletions.Messages {
-					chatMessages = append(chatMessages, preprocessing.ChatMessage{
+					conversations = append(conversations, preprocessing.Conversation{
 						Role:    msg.Role,
 						Content: msg.Content.Raw,
 					})
 				}
 
+				processor := preprocessing.NewChatTemplatingProcessor()
+				tokenizerCacheKey, err := processor.GetOrCreateTokenizerKey(t.Context(), &preprocessing.GetOrCreateTokenizerKeyRequest{
+					IsLocal: true,
+					Model:   "testdata/" + model,
+				})
+				require.NoError(t, err)
+
 				// render the chat template
-				renderReq := &preprocessing.RenderJinjaTemplateRequest{
-					Conversations: chatMessages,
-					ChatTemplate:  req.ChatCompletions.ChatTemplate,
+				renderReq := &preprocessing.ApplyChatTemplateRequest{
+					Key:          tokenizerCacheKey,
+					Conversation: [][]preprocessing.Conversation{conversations},
+					ChatTemplate: req.ChatCompletions.ChatTemplate,
 				}
-				processor := preprocessing.NewChatTemplatingProcessor()
-				rendered, err := processor.RenderChatTemplate(t.Context(), renderReq)
+				rendered, err := processor.ApplyChatTemplate(t.Context(), renderReq)
 				require.NoError(t, err)
 
 				// tokenize rendered prompt
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
 				require.NoError(t, err)
 
-				tokens, _, err := testTokenizer.Encode(rendered.RenderedChats[0], model)
+				tokens, _, err := testTokenizer.Encode(rendered, model, false)
 				require.NoError(t, err)
 
 				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(kvblock.EmptyBlockHash, tokens, model)
 
 				require.GreaterOrEqual(t, len(chunkKeys), 2, "Need at least 2 chunks for test")
 
 				// pod-a has both chunks, pod-b has only the first
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+				return map[kvblock.BlockHash][]kvblock.PodEntry{
+					chunkKeys[0]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+					chunkKeys[1]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 					},
 				}
@@ -294,17 +301,17 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
+			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry {
 				require.NotNil(t, req.Completions, "req expected to use Completions API")
 
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
 				require.NoError(t, err)
 
-				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model)
+				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model, true)
 				require.NoError(t, err)
 
 				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(kvblock.EmptyBlockHash, tokens, model)
 
 				require.GreaterOrEqual(t, len(chunkKeys), 3, "Need at least 3 chunks for test")
 
@@ -317,16 +324,16 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				//   pod-a: has chunks 0,1,2 contiguously -> score 3
 				//   pod-b: has chunks 0,2 (missing 1) -> prefix stops at chunk0 -> score 1
 				//   pod-c: has only chunk 0 -> score 1
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+				return map[kvblock.BlockHash][]kvblock.PodEntry{
+					chunkKeys[0]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 						{PodIdentifier: "10.0.0.3:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+					chunkKeys[1]: {
 						{PodIdentifier: "10.0.0.1:8080"}, // only pod-a has chunk1
 					},
-					{ModelName: model, ChunkHash: chunkKeys[2].ChunkHash}: {
+					chunkKeys[2]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"}, // pod-b has chunk2 but missing chunk1
 					},
@@ -341,62 +348,6 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				"10.0.0.3:8080": 0.0,
 			},
 		},
-		{
-			name: "different model names",
-			pods: []types.Pod{
-				&types.PodMetrics{
-					Pod: &backend.Pod{
-						NamespacedName: k8stypes.NamespacedName{Name: "pod-a"},
-						Address:        "10.0.0.1:8080",
-					},
-				},
-				&types.PodMetrics{
-					Pod: &backend.Pod{
-						NamespacedName: k8stypes.NamespacedName{Name: "pod-b"},
-						Address:        "10.0.0.2:8080",
-					},
-				},
-			},
-			request: &types.LLMRequest{
-				RequestId:   "test-request",
-				TargetModel: "test-model",
-				Body: &types.LLMRequestBody{
-					Completions: &types.CompletionsRequest{
-						Prompt: prompt,
-					},
-				},
-			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
-				require.NotNil(t, req.Completions, "req expected to use Completions API")
-
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
-				require.NoError(t, err)
-
-				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model)
-				require.NoError(t, err)
-
-				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
-
-				require.GreaterOrEqual(t, len(chunkKeys), 1, "Need at least 1 chunk for test")
-
-				// Populate the index with blocks for model `different-model`
-				// The request will ask for "test-model" but the cache only has "different-model"
-				// This should result in no cache hits since models don't share cache
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: "different-model", ChunkHash: chunkKeys[0].ChunkHash}: {
-						{PodIdentifier: "10.0.0.1:8080"},
-						{PodIdentifier: "10.0.0.2:8080"},
-					},
-				}
-			},
-			wantScoresByAddress: map[string]float64{
-				// Even though both pods have the chunk cached, it's for a different model
-				// so there should be no cache hits for the requested model
-				"10.0.0.1:8080": 0.0,
-				"10.0.0.2:8080": 0.0,
-			},
-		},
 		{
 			name: "single pod",
 			pods: []types.Pod{
@@ -419,26 +370,26 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
+			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry {
 				require.NotNil(t, req.Completions, "req expected to use Completions API")
 
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
 				require.NoError(t, err)
 
-				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model)
+				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model, true)
 				require.NoError(t, err)
 
 				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(kvblock.EmptyBlockHash, tokens, model)
 
 				require.GreaterOrEqual(t, len(chunkKeys), 2, "Need at least 2 chunks for test")
 
 				// Single pod has 2 chunks cached
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+				return map[kvblock.BlockHash][]kvblock.PodEntry{
+					chunkKeys[0]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+					chunkKeys[1]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 					},
 				}
@@ -518,28 +469,28 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.Key][]kvblock.PodEntry {
+			kvBlockData: func(req *types.LLMRequestBody, model string) map[kvblock.BlockHash][]kvblock.PodEntry {
 				require.NotNil(t, req.Completions, "req expected to use Completions API")
 
-				testTokenizer, err := tokenization.NewCachedLocalTokenizer(localTokenizerConfig)
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
 				require.NoError(t, err)
 
-				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model)
+				tokens, _, err := testTokenizer.Encode(req.Completions.Prompt, model, true)
 				require.NoError(t, err)
 
 				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
-				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(kvblock.EmptyBlockHash, tokens, model)
 
 				require.GreaterOrEqual(t, len(chunkKeys), 2, "Need at least 2 chunks for test")
 
 				// all pods have the same 2 chunks cached
-				return map[kvblock.Key][]kvblock.PodEntry{
-					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+				return map[kvblock.BlockHash][]kvblock.PodEntry{
+					chunkKeys[0]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 						{PodIdentifier: "10.0.0.3:8080"},
 					},
-					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+					chunkKeys[1]: {
 						{PodIdentifier: "10.0.0.1:8080"},
 						{PodIdentifier: "10.0.0.2:8080"},
 						{PodIdentifier: "10.0.0.3:8080"},
@@ -562,6 +513,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 
 			kvcacheConfig, err := kvcache.NewDefaultConfig()
 			kvcacheConfig.TokenizersPoolConfig = &tokenization.Config{
+				ModelName:             "test-model",
 				WorkersCount:          1,
 				MinPrefixOverlapRatio: 0.8,
 				LocalTokenizerConfig:  &localTokenizerConfig,
@@ -580,7 +532,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				kvBlockIndex := prefixCacheScorer.kvCacheIndexer.KVBlockIndex()
 				blockData := tt.kvBlockData(tt.request.Body, tt.request.TargetModel)
 				for key, entries := range blockData {
-					err := kvBlockIndex.Add(ctx, []kvblock.Key{key}, entries)
+					err := kvBlockIndex.Add(ctx, []kvblock.BlockHash{kvblock.EmptyBlockHash}, []kvblock.BlockHash{key}, entries)
 					require.NoError(t, err)
 				}
 			}
diff --git a/scripts/fetch-python-wrapper.sh b/scripts/fetch-python-wrapper.sh
deleted file mode 100755
index 0c8bc1d681..0000000000
--- a/scripts/fetch-python-wrapper.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env bash
-# fetch-python-wrapper.sh
-# Fetches the Python wrapper file (render_jinja_template_wrapper.py) from llm-d-kv-cache-manager
-# for use in Docker builds and local development.
-# Version can be provided as CLI arg or via KVCACHE_MANAGER_VERSION env var (default v0.3.2).
-#
-# This script replicates the original Dockerfile logic:
-# 1. Creates a temporary directory
-# 2. Clones the repo into that directory
-# 3. Creates the output directory structure
-# 4. Copies the wrapper file to the output location
-# 5. Cleans up the temporary directory
-
-set -euo pipefail
-
-VERSION="${1:-${KVCACHE_MANAGER_VERSION:-v0.3.2}}"
-OUTPUT_DIR="${2:-llm-d-kv-cache-manager/pkg/preprocessing/chat_completions}"
-
-REPO_URL="https://github.com/llm-d/llm-d-kv-cache-manager.git"
-WRAPPER_FILE="pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py"
-
-# Create temporary directory (equivalent to: mkdir -p /tmp/kv-cache-manager)
-# TEMP_DIR will be an absolute path like /tmp/tmp.XXXXXX
-TEMP_DIR=$(mktemp -d)
-trap "rm -rf ${TEMP_DIR}" EXIT
-
-echo "Fetching Python wrapper from llm-d-kv-cache-manager@${VERSION}..."
-
-# Equivalent to: cd /tmp/kv-cache-manager && git clone ... .
-# (clones repo contents directly into TEMP_DIR - using absolute path, no need to cd)
-git clone --depth 1 --branch "${VERSION}" "${REPO_URL}" "${TEMP_DIR}"
-
-# Create output directory if it doesn't exist
-# (equivalent to: mkdir -p /workspace/llm-d-kv-cache-manager/pkg/preprocessing/chat_completions)
-# OUTPUT_DIR is relative to current working directory (relative path, same as original)
-mkdir -p "${OUTPUT_DIR}"
-
-# Copy wrapper file
-# Source: absolute path ${TEMP_DIR}/${WRAPPER_FILE} (e.g., /tmp/tmp.XXXXXX/pkg/.../wrapper.py)
-# Destination: relative path ${OUTPUT_DIR}/ (e.g., llm-d-kv-cache-manager/pkg/.../)
-# (equivalent to original: cp pkg/.../wrapper.py /workspace/... from within temp dir)
-cp "${TEMP_DIR}/${WRAPPER_FILE}" "${OUTPUT_DIR}/"
-
-# Cleanup happens automatically via trap (equivalent to: rm -rf /tmp/kv-cache-manager)
-
-echo "Successfully fetched render_jinja_template_wrapper.py to ${OUTPUT_DIR}/"
-
diff --git a/scripts/kubernetes-dev-env.sh b/scripts/kubernetes-dev-env.sh
index 6cd8c4456a..215fe86a4a 100755
--- a/scripts/kubernetes-dev-env.sh
+++ b/scripts/kubernetes-dev-env.sh
@@ -24,7 +24,7 @@ if [[ -z "${HF_TOKEN:-}" ]]; then
   exit 1
 fi
 
-export VLLM_CHART_DIR="${VLLM_CHART_DIR:-../llm-d-kv-cache-manager/vllm-setup-helm}"
+export VLLM_CHART_DIR="${VLLM_CHART_DIR:-../llm-d-kv-cache/vllm-setup-helm}"
 # Check that Chart.yaml exists
 if [[ ! -f "$VLLM_CHART_DIR/Chart.yaml" ]]; then
   echo "Chart.yaml not found in $VLLM_CHART_DIR"
diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go
index d23b20a58d..027a496867 100644
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@@ -487,15 +487,16 @@ kind: EndpointPickerConfig
 plugins:
 - type: precise-prefix-cache-scorer
   parameters:
+    tokenProcessorConfig:
+      blockSize: 16 
+      hashSeed: "42"
     kvEventsConfig:
       zmqEndpoint: tcp://0.0.0.0:5557
     indexerConfig:
       prefixStoreConfig:
         blockSize: 16 
-      tokenProcessorConfig:
-        blockSize: 16                         # must match vLLM block size if not default (16)
-        hashSeed: "42"                        # must match PYTHONHASHSEED in vLLM pods
       tokenizersPoolConfig:
+        modelName: Qwen/Qwen2.5-1.5B-Instruct
         hf:
           tokenizersCacheDir: "/cache/tokenizers"
       kvBlockIndexConfig: