Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ venv
__pycache__

# Docker files
Dockerfile
Dockerfile

**/vllm_source
2 changes: 1 addition & 1 deletion .github/workflows/ci-examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ jobs:
run: chmod +x hack/verify-examples.sh

- name: Run verify-examples.sh
run: ./hack/verify-examples.sh
run: sudo ./hack/verify-examples.sh
4 changes: 2 additions & 2 deletions .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ jobs:
- name: Run make build
shell: bash
run: |
make build
sudo make build

- name: Run make test
shell: bash
run: |
make test
sudo PATH="/root/.local/bin:$PATH" make test
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,9 @@ _cgo_*
/hack/tools

# Tokenizer binaries
/lib
/lib

# uds tokenizer default model path
services/uds_tokenizer/models

**/vllm_source
34 changes: 17 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM python:3.12-slim AS python-builder

WORKDIR /workspace

RUN apt-get update && apt-get install -y --no-install-recommends build-essential

COPY Makefile Makefile
COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/
RUN make install-python-deps

# Build Stage: using Go 1.24.1 image
FROM quay.io/projectquay/golang:1.24 AS builder
ARG TARGETOS
Expand All @@ -35,14 +45,7 @@ COPY go.sum go.sum
# and so that source changes don't invalidate our downloaded layer
RUN go mod download

# Copy only the requirements file.
COPY pkg/preprocessing/chat_completions/requirements.txt ./requirements.txt
# Install Python dependencies. This layer will be cached unless requirements.txt changes.
RUN python3.12 -m pip install --upgrade pip setuptools wheel && \
python3.12 -m pip install -r ./requirements.txt

# Copy the go source
COPY examples/kv_events examples/kv_events
# Copy the source code.
COPY . .

# HuggingFace tokenizer bindings
Expand All @@ -51,6 +54,10 @@ ARG RELEASE_VERSION=v1.22.1
RUN curl -L https://github.com/daulet/tokenizers/releases/download/${RELEASE_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
RUN ranlib lib/*.a

# Copy this project's own Python source code into the final image
COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /workspace/pkg/preprocessing/chat_completions
RUN make setup-venv
COPY --from=python-builder /workspace/build/venv/lib/python3.12/site-packages /workspace/build/venv/lib/python3.12/site-packages
RUN make build

# Use distroless as minimal base image to package the manager binary
Expand All @@ -64,16 +71,9 @@ RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.
dnf install -y zeromq libxcrypt-compat python3.12 python3.12-pip && \
dnf clean all



# Install Python dependencies in the final image.
COPY --from=builder /workspace/requirements.txt /tmp/requirements.txt
RUN python3.12 -m pip install --upgrade pip setuptools wheel && \
python3.12 -m pip install --no-cache-dir -r /tmp/requirements.txt \
&& rm -rf /tmp/requirements.txt

# Copy this project's own Python source code into the final image
COPY --from=builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions
COPY --from=python-builder /workspace/pkg/preprocessing/chat_completions /app/pkg/preprocessing/chat_completions
COPY --from=python-builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages

# Set the PYTHONPATH. This mirrors the Makefile's export, ensuring both this project's
# Python code and the installed libraries (site-packages) are found at runtime.
Expand Down
29 changes: 22 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ detect-python: ## Detects Python and prints the configuration.
fi
@printf "\033[33;1m==============================\033[0m\n"

.PHONY: install-python-deps
install-python-deps: detect-python ## Sets up the Python virtual environment and installs dependencies.
.PHONY: setup-venv
setup-venv: detect-python ## Sets up the Python virtual environment.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
echo "Creating virtual environment..."; \
Expand All @@ -124,12 +124,27 @@ install-python-deps: detect-python ## Sets up the Python virtual environment and
exit 1; \
}; \
fi
@echo "Upgrading pip and installing dependencies..."
@echo "Upgrading pip..."
@$(VENV_BIN)/pip install --upgrade pip
@$(VENV_BIN)/pip install -q -r pkg/preprocessing/chat_completions/requirements.txt
@echo "Verifying transformers installation..."
@$(VENV_BIN)/python -c "import transformers; print('✅ Transformers version ' + transformers.__version__ + ' installed.')" || { \
echo "ERROR: transformers library not properly installed in venv."; \
@echo "Python virtual environment setup complete."

.PHONY: setup-venv
install-python-deps: setup-venv ## installs dependencies.
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
echo "Creating virtual environment..."; \
$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
echo "ERROR: Failed to create virtual environment."; \
echo "Your Python installation may be missing the 'venv' module."; \
echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
exit 1; \
}; \
fi
@echo "Upgrading pip and installing dependencies..."
@PATH=$(VENV_BIN):$$PATH ./pkg/preprocessing/chat_completions/setup.sh
@echo "Verifying vllm installation..."
@$(VENV_BIN)/python -c "import vllm; print('✅ vllm version ' + vllm.__version__ + ' installed.')" || { \
echo "ERROR: vllm library not properly installed in venv."; \
exit 1; \
}

Expand Down
25 changes: 4 additions & 21 deletions examples/kv_events/online/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ const (
// ChatCompletionsRequest holds the fields needed for chat-completions rendering.
type ChatCompletionsRequest struct {
Model string `json:"model"`
*preprocessing.RenderJinjaTemplateRequest
*preprocessing.ApplyChatTemplateRequest
}

func main() {
Expand Down Expand Up @@ -325,35 +325,18 @@ func setupUnifiedHTTPEndpoints(

logger.Info("Created ChatCompletions", "req", req)

// Get chat template for the model if not provided
if req.ChatTemplate == "" {
templateReq := preprocessing.FetchChatTemplateRequest{
Model: req.Model,
Token: os.Getenv(envHFToken),
}

var err error
req.ChatTemplate, req.ChatTemplateKWArgs, err = chatTemplatingProcessor.FetchChatTemplate(ctx, templateReq)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to get chat template: %v", err), http.StatusInternalServerError)
return
}
}

response, err := chatTemplatingProcessor.RenderChatTemplate(ctx, req.RenderJinjaTemplateRequest)
renderedPrompt, err := chatTemplatingProcessor.ApplyChatTemplate(ctx, req.ApplyChatTemplateRequest)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to render chat template: %v", err), http.StatusInternalServerError)
return
}

// Use KV-cache to score the rendered template
if len(response.RenderedChats) == 0 {
http.Error(w, "No rendered chats found in response", http.StatusInternalServerError)
if renderedPrompt == "" {
http.Error(w, "rendered prompt is empty", http.StatusInternalServerError)
return
}

renderedPrompt := response.RenderedChats[0]

// Get score
pods, err := kvCacheIndexer.GetPodScores(ctx, nil, renderedPrompt, req.Model, nil)
if err != nil {
Expand Down
10 changes: 5 additions & 5 deletions examples/testdata/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ const (
ModelName = "bert-base-uncased"
)

var RenderReq *preprocessing.RenderJinjaTemplateRequest = nil
var RenderReq *preprocessing.ApplyChatTemplateRequest = nil

//go:embed prompt.txt
var Prompt string

var PromptHashes = []uint64{
5883650188907136581,
4344014219501030587,
8576040316208967329,
13369611429964591057,
3246512376769953277,
2932514196368075983,
6384763183060574933,
13975137892230421288,
}
Empty file modified hack/verify-examples.sh
100644 → 100755
Empty file.
5 changes: 3 additions & 2 deletions pkg/kvcache/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ func NewKVCacheIndexer(ctx context.Context, config *Config, tokenProcessor kvblo
return nil, fmt.Errorf("failed to create KVBlockScorer: %w", err)
}

tokenizersPool, err := tokenization.NewTokenizationPool(config.TokenizersPoolConfig, tokenIndexer)
tokenizersPool, err := tokenization.NewTokenizationPool(ctx,
config.TokenizersPoolConfig, tokenIndexer)
if err != nil {
return nil, fmt.Errorf("failed to create tokenizers pool: %w", err)
}
Expand Down Expand Up @@ -127,7 +128,7 @@ func (k *Indexer) KVBlockIndex() kvblock.Index {
// relevant.
//
// The function returns a map of pod identifiers to scores.
func (k *Indexer) GetPodScores(ctx context.Context, renderReq *preprocessing.RenderJinjaTemplateRequest, prompt, modelName string,
func (k *Indexer) GetPodScores(ctx context.Context, renderReq *preprocessing.ApplyChatTemplateRequest, prompt, modelName string,
podIdentifiers []string,
) (map[string]float64, error) {
traceLogger := log.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores")
Expand Down
Loading
Loading