Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,6 @@ _cgo_*
.gopls/
/hack/tools

# Tokenizer binaries
/lib
# UDS tokenizer files
services/uds_tokenizer/models/*
!services/uds_tokenizer/models/README.md
10 changes: 2 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,15 @@ RUN python3.12 -m pip install --upgrade pip setuptools wheel && \
COPY examples/kv_events examples/kv_events
COPY . .

# HuggingFace tokenizer bindings
RUN mkdir -p lib
ARG RELEASE_VERSION=v1.22.1
RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
RUN ranlib lib/*.a

# Set up Python environment variables needed for the build
ENV PYTHONPATH=/workspace/pkg/preprocessing/chat_completions:/usr/lib64/python3.9/site-packages:/usr/lib/python3.9/site-packages
ENV PYTHON=python3.9

# Build the application with CGO enabled.
# We export CGO_CFLAGS and CGO_LDFLAGS using python3.12-config to ensure the Go compiler
# can find the Python headers and libraries correctly. This mirrors the fix from the Makefile.
RUN export CGO_CFLAGS="$(python3.12-config --cflags) -I/workspace/lib" && \
export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \
RUN export CGO_CFLAGS="$(python3.12-config --cflags)" && \
export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -ldl -lm" && \
CGO_ENABLED=1 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH:-amd64} \
go build -a -o bin/kv-cache-manager examples/kv_events/online/main.go

Expand Down
32 changes: 6 additions & 26 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,6 @@ SRC = $(shell find . -type f -name '*.go')
help: ## Print help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

##@ Tokenizer & Linking

TOKENIZER_LIB = lib/libtokenizers.a

# Extract RELEASE_VERSION from Dockerfile
TOKENIZER_VERSION := $(shell grep '^ARG RELEASE_VERSION=' Dockerfile | cut -d'=' -f2)

.PHONY: download-tokenizer
download-tokenizer: $(TOKENIZER_LIB)
$(TOKENIZER_LIB):
## Download the HuggingFace tokenizer bindings.
@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
mkdir -p lib
if [ "$(TARGETOS)" = "darwin" ] && [ "$(TARGETARCH)" = "amd64" ]; then \
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-x86_64.tar.gz | tar -xz -C lib; \
else \
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib; \
fi
ranlib lib/*.a

##@ Python Configuration

PYTHON_VERSION := 3.12
Expand Down Expand Up @@ -87,8 +67,8 @@ else
endif

# Final CGO flags with all dependencies
CGO_CFLAGS_FINAL := $(PYTHON_CFLAGS) -Ilib
CGO_LDFLAGS_FINAL := $(PYTHON_LDFLAGS) $(PYTHON_LIBS) -Llib -ltokenizers -ldl -lm
CGO_CFLAGS_FINAL := $(PYTHON_CFLAGS)
CGO_LDFLAGS_FINAL := $(PYTHON_LDFLAGS) $(PYTHON_LIBS) -ldl -lm

.PHONY: detect-python
detect-python: ## Detects Python and prints the configuration.
Expand Down Expand Up @@ -187,17 +167,17 @@ export PYTHONPATH=$(shell pwd)/pkg/preprocessing/chat_completions:$(VENV_DIR)/li
test: unit-test e2e-test ## Run all tests

.PHONY: unit-test
unit-test: download-tokenizer install-python-deps download-zmq ## Run unit tests
unit-test: install-python-deps download-zmq ## Run unit tests
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
@go test -v ./pkg/...

.PHONY: e2e-test
e2e-test: download-tokenizer download-local-llama3 install-python-deps download-zmq ## Run end-to-end tests
e2e-test: download-local-llama3 install-python-deps download-zmq ## Run end-to-end tests
@printf "\033[33;1m==== Running e2e tests ====\033[0m\n"
@go test -v ./tests/...

.PHONY: bench
bench: download-tokenizer install-python-deps download-zmq ## Run benchmarks
bench: install-python-deps download-zmq ## Run benchmarks
@printf "\033[33;1m==== Running chat template benchmarks ====\033[0m\n"
@go test -bench=. -benchmem ./pkg/preprocessing/chat_completions/
@printf "\033[33;1m==== Running tokenization benchmarks ====\033[0m\n"
Expand All @@ -211,7 +191,7 @@ run: build ## Run the application locally
##@ Build

.PHONY: build
build: check-go download-tokenizer install-python-deps download-zmq ## Build the application binary
build: check-go install-python-deps download-zmq ## Build the application binary
@printf "\033[33;1m==== Building application binary ====\033[0m\n"
@go build -o bin/$(PROJECT_NAME) examples/kv_events/online/main.go
@echo "✅ Built examples/kv_events/online/main.go -> bin/$(PROJECT_NAME)"
Expand Down
4 changes: 1 addition & 3 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,8 @@ Efficiently handling tokenization is critical for performance. The system is des
## Dependencies

The Indexer relies on several libraries and tools:
* **[daulet/tokenizers](https://github.com/daulet/tokenizers)**: Go bindings for the HuggingFace Tokenizers library.
* Used for tokenization of prompts.
* **[pebbe/zmq4](https://github.com/pebbe/zmq4)**: Go bindings for ZeroMQ.
* Used for the event processing pool and communication between components.
* Requires `libzmq` library to be installed on the system.
* **Python**: Required to run a CGO binding for the `chat_completions_template` package.
* Used for jinja2 templating of chat completions requests.
* Used for vllm templating of chat completions requests.
22 changes: 3 additions & 19 deletions examples/kv_events/online/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ const (
// ChatCompletionsRequest holds the fields needed for chat-completions rendering.
type ChatCompletionsRequest struct {
Model string `json:"model"`
*preprocessing.RenderJinjaTemplateRequest
*preprocessing.ApplyChatTemplateRequest
}

func main() {
Expand Down Expand Up @@ -318,34 +318,18 @@ func setupUnifiedHTTPEndpoints(
logger.Info("Created ChatCompletions", "req", req)

// Get chat template for the model if not provided
if req.ChatTemplate == "" {
templateReq := preprocessing.FetchChatTemplateRequest{
Model: req.Model,
Token: os.Getenv(envHFToken),
}

var err error
req.ChatTemplate, req.ChatTemplateKWArgs, err = chatTemplatingProcessor.FetchChatTemplate(ctx, templateReq)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get chat template: %v", err), http.StatusInternalServerError)
return
}
}

response, err := chatTemplatingProcessor.RenderChatTemplate(ctx, req.RenderJinjaTemplateRequest)
renderedPrompt, err := chatTemplatingProcessor.ApplyChatTemplate(ctx, req.ApplyChatTemplateRequest)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to render chat template: %v", err), http.StatusInternalServerError)
return
}

// Use KV-cache to score the rendered template
if len(response.RenderedChats) == 0 {
if renderedPrompt == "" {
http.Error(w, "No rendered chats found in response", http.StatusInternalServerError)
return
}

renderedPrompt := response.RenderedChats[0]

// Get score
pods, err := kvCacheIndexer.GetPodScores(ctx, nil, renderedPrompt, req.Model, nil)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion examples/testdata/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
ModelName = "bert-base-uncased"
)

var RenderReq *preprocessing.RenderJinjaTemplateRequest = nil
var RenderReq *preprocessing.ApplyChatTemplateRequest = nil

//go:embed prompt.txt
var Prompt string
Expand Down
10 changes: 8 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ go 1.24.1
require (
github.com/alicebob/miniredis/v2 v2.35.0
github.com/cespare/xxhash/v2 v2.3.0
github.com/daulet/tokenizers v1.22.1
github.com/dgraph-io/ristretto/v2 v2.3.0
github.com/dustin/go-humanize v1.0.1
github.com/fxamacker/cbor/v2 v2.7.0
Expand All @@ -18,6 +17,7 @@ require (
github.com/stretchr/testify v1.10.0
github.com/vmihailenco/msgpack/v5 v5.4.1
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.27.0
golang.org/x/net v0.38.0
google.golang.org/grpc v1.68.1
google.golang.org/protobuf v1.36.5
Expand All @@ -31,11 +31,14 @@ require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
Expand All @@ -49,21 +52,24 @@ require (
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/oauth2 v0.27.0 // indirect
golang.org/x/sync v0.12.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.9.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.33.0 // indirect
k8s.io/apiextensions-apiserver v0.33.0 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
Expand Down
20 changes: 18 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ github.com/alicebob/miniredis/v2 v2.35.0 h1:QwLphYqCEAo1eu1TqPRN2jgVMPBweeQcR21j
github.com/alicebob/miniredis/v2 v2.35.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/daulet/tokenizers v1.22.1 h1:3wzAFIxfgRuqGKka8xdkeTbctDmmqOOs12GofqdorpM=
github.com/daulet/tokenizers v1.22.1/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
Expand All @@ -25,6 +25,12 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k=
github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ=
github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU=
github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
Expand All @@ -45,12 +51,16 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
Expand Down Expand Up @@ -151,6 +161,8 @@ golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand All @@ -174,6 +186,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw=
gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0=
Expand All @@ -192,6 +206,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU=
k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM=
k8s.io/apiextensions-apiserver v0.33.0 h1:d2qpYL7Mngbsc1taA4IjJPRJ9ilnsXIrndH+r9IimOs=
k8s.io/apiextensions-apiserver v0.33.0/go.mod h1:VeJ8u9dEEN+tbETo+lFkwaaZPg6uFKLGj5vyNEwwSzc=
k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ=
k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98=
Expand Down
8 changes: 2 additions & 6 deletions pkg/kvcache/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ func (k *Indexer) KVBlockIndex() kvblock.Index {
// relevant.
//
// The function returns a map of pod identifiers to scores.
func (k *Indexer) GetPodScores(ctx context.Context, renderReq *preprocessing.RenderJinjaTemplateRequest, prompt, modelName string,
func (k *Indexer) GetPodScores(ctx context.Context, renderReq *preprocessing.ApplyChatTemplateRequest, prompt, modelName string,
podIdentifiers []string,
) (map[string]float64, error) {
traceLogger := log.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores")

// 1. tokenize prompt
tokens := k.tokenizersPool.Tokenize(renderReq, prompt)
tokens := k.tokenizersPool.Tokenize(renderReq, prompt, modelName)

// 2. get block keys
blockKeys := k.tokensProcessor.TokensToKVBlockKeys(nil, tokens, modelName)
Expand Down Expand Up @@ -183,7 +183,3 @@ func podsPerKeyPrintHelper(ks map[kvblock.Key][]kvblock.PodEntry) string {

return flattened
}

func (k *Indexer) SetTokenizer(tokenizer tokenization.Tokenizer, modelName string) {
k.tokenizersPool.SetTokenizer(tokenizer, modelName)
}
Loading
Loading