Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile.epp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Minimal runtime Dockerfile (microdnf-only, no torch, wrapper in site-packages)
# Build Stage: using Go 1.25 image
FROM quay.io/projectquay/golang:1.25 AS builder
# Build Stage: using Go 1.24 image
FROM quay.io/projectquay/golang:1.24 AS builder

ARG TARGETOS
ARG TARGETARCH
Expand Down
31 changes: 18 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,22 @@ TARGETOS ?= $(shell go env GOOS)
TARGETARCH ?= $(shell go env GOARCH)
PROJECT_NAME ?= llm-d-inference-scheduler
SIDECAR_IMAGE_NAME ?= llm-d-routing-sidecar
VLLM_SIMULATOR_IMAGE_NAME ?= llm-d-inference-sim
SIDECAR_NAME ?= pd-sidecar
IMAGE_REGISTRY ?= ghcr.io/llm-d
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
EPP_TAG ?= dev
export EPP_TAG
IMG = $(IMAGE_TAG_BASE):$(EPP_TAG)
export EPP_IMAGE ?= $(IMAGE_TAG_BASE):$(EPP_TAG)
SIDECAR_TAG ?= dev
export SIDECAR_TAG
SIDECAR_IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(SIDECAR_IMAGE_NAME)
SIDECAR_IMG = $(SIDECAR_IMAGE_TAG_BASE):$(SIDECAR_TAG)
export SIDECAR_IMAGE ?= $(SIDECAR_IMAGE_TAG_BASE):$(SIDECAR_TAG)
NAMESPACE ?= hc4ai-operator
VLLM_SIMULATOR_TAG ?= v0.6.1
export VLLM_SIMULATOR_TAG
VLLM_SIMULATOR_TAG_BASE ?= $(IMAGE_REGISTRY)/$(VLLM_SIMULATOR_IMAGE_NAME)
export VLLM_SIMULATOR_IMAGE ?= $(VLLM_SIMULATOR_TAG_BASE):$(VLLM_SIMULATOR_TAG)

# Map go arch to typos arch
ifeq ($(TARGETARCH),amd64)
Expand Down Expand Up @@ -57,8 +60,8 @@ BUILD_REF ?= $(shell git describe --abbrev=0 2>/dev/null)
SRC = $(shell find . -type f -name '*.go')

# Internal variables for generic targets
epp_IMAGE = $(IMG)
sidecar_IMAGE = $(SIDECAR_IMG)
epp_IMAGE = $(EPP_IMAGE)
sidecar_IMAGE = $(SIDECAR_IMAGE)
epp_NAME = epp
sidecar_NAME = $(SIDECAR_NAME)
epp_LDFLAGS = -ldflags="$(LDFLAGS)"
Expand Down Expand Up @@ -185,7 +188,7 @@ uninstall: uninstall-docker ## Default uninstall using Docker
.PHONY: install-docker
install-docker: check-container-tool ## Install app using $(CONTAINER_RUNTIME)
@echo "Starting container with $(CONTAINER_RUNTIME)..."
$(CONTAINER_RUNTIME) run -d --name $(PROJECT_NAME)-container $(IMG)
$(CONTAINER_RUNTIME) run -d --name $(PROJECT_NAME)-container $(EPP_IMAGE)
@echo "$(CONTAINER_RUNTIME) installation complete."
@echo "To use $(PROJECT_NAME), run:"
@echo "alias $(PROJECT_NAME)='$(CONTAINER_RUNTIME) exec -it $(PROJECT_NAME)-container /app/$(PROJECT_NAME)'"
Expand Down Expand Up @@ -230,12 +233,12 @@ uninstall-k8s: check-kubectl check-kustomize check-envsubst ## Uninstall from Ku

.PHONY: install-openshift
install-openshift: check-kubectl check-kustomize check-envsubst ## Install on OpenShift
@echo $$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION
@echo $$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE
@echo "Creating namespace $(NAMESPACE)..."
kubectl create namespace $(NAMESPACE) 2>/dev/null || true
@echo "Deploying common resources from deploy/ ..."
# Build and substitute the base manifests from deploy, then apply them
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -n $(NAMESPACE) -f -
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl apply -n $(NAMESPACE) -f -
@echo "Waiting for pod to become ready..."
sleep 5
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
Expand All @@ -246,9 +249,9 @@ install-openshift: check-kubectl check-kustomize check-envsubst ## Install on Op
.PHONY: uninstall-openshift
uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall from OpenShift
@echo "Removing resources from OpenShift..."
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete --force -f - || true
kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl delete --force -f - || true
# @if kubectl api-resources --api-group=route.openshift.io | grep -q Route; then \
# envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
# envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \
# fi
@POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \
echo "Deleting pod: $$POD"; \
Expand All @@ -260,18 +263,18 @@ uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall f
.PHONY: install-rbac
install-rbac: check-kubectl check-kustomize check-envsubst ## Install RBAC
@echo "Applying RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -f -
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME' | kubectl apply -f -

.PHONY: uninstall-rbac
uninstall-rbac: check-kubectl check-kustomize check-envsubst ## Uninstall RBAC
@echo "Removing RBAC configuration from deploy/rbac..."
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete -f - || true
kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME' | kubectl delete -f - || true

##@ Environment
.PHONY: env
env: ## Print environment variables
@echo "IMAGE_TAG_BASE=$(IMAGE_TAG_BASE)"
@echo "IMG=$(IMG)"
@echo "EPP_IMAGE=$(EPP_IMAGE)"
@echo "CONTAINER_RUNTIME=$(CONTAINER_RUNTIME)"

.PHONY: check-typos
Expand Down Expand Up @@ -390,7 +393,9 @@ env-dev-kind: ## Run under kind ($(KIND_CLUSTER_NAME))
CLUSTER_NAME=$(KIND_CLUSTER_NAME) \
GATEWAY_HOST_PORT=$(KIND_GATEWAY_HOST_PORT) \
IMAGE_REGISTRY=$(IMAGE_REGISTRY) \
EPP_TAG=$(EPP_TAG) \
EPP_IMAGE=$(EPP_IMAGE) \
VLLM_SIMULATOR_IMAGE=${VLLM_SIMULATOR_IMAGE} \
SIDECAR_IMAGE=${SIDECAR_IMAGE} \
./scripts/kind-dev-env.sh; \
fi

Expand Down
2 changes: 1 addition & 1 deletion deploy/components/inference-gateway/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: ghcr.io/llm-d/llm-d-inference-scheduler:latest
image: ${EPP_IMAGE}
imagePullPolicy: IfNotPresent
args:
- --pool-name
Expand Down
4 changes: 0 additions & 4 deletions deploy/components/inference-gateway/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,3 @@ resources:
- deployments.yaml
- gateways.yaml
- httproutes.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-scheduler
newTag: ${EPP_TAG}
6 changes: 3 additions & 3 deletions deploy/components/vllm-sim-pd/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spec:
spec:
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand Down Expand Up @@ -71,7 +71,7 @@ spec:
spec:
initContainers:
- name: routing-sidecar
image: ghcr.io/llm-d/llm-d-routing-sidecar:latest
image: ${SIDECAR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand Down Expand Up @@ -112,7 +112,7 @@ spec:
fieldPath: status.podIP
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8200"
Expand Down
6 changes: 0 additions & 6 deletions deploy/components/vllm-sim-pd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,3 @@ kind: Kustomization

resources:
- deployments.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-sim
newTag: ${VLLM_SIMULATOR_TAG}
- name: ghcr.io/llm-d/llm-d-routing-sidecar
newTag: ${SIDECAR_TAG}
4 changes: 2 additions & 2 deletions deploy/components/vllm-sim/deployments.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
spec:
initContainers:
- name: routing-sidecar
image: ghcr.io/llm-d/llm-d-routing-sidecar:latest
image: ${SIDECAR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8000"
Expand Down Expand Up @@ -57,7 +57,7 @@ spec:
fieldPath: status.podIP
containers:
- name: vllm
image: ghcr.io/llm-d/llm-d-inference-sim:latest
image: ${VLLM_SIMULATOR_IMAGE}
imagePullPolicy: IfNotPresent
args:
- "--port=8200"
Expand Down
6 changes: 0 additions & 6 deletions deploy/components/vllm-sim/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,3 @@ kind: Kustomization
resources:
- deployments.yaml

images:
- name: ghcr.io/llm-d/llm-d-inference-sim
newTag: ${VLLM_SIMULATOR_TAG}
- name: ghcr.io/llm-d/llm-d-routing-sidecar
newTag: ${SIDECAR_TAG}

Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ spec:
serviceAccountName: operator-controller-manager
containers:
- name: cmd
image: ${IMAGE_TAG_BASE}:${VERSION}
image: ${EPP_IMAGE}
imagePullPolicy: Always
3 changes: 1 addition & 2 deletions deploy/environments/openshift-base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ configMapGenerator:
# Define the image to be updated.
# images:
# - name: ghcr.io/llm-d/placeholder
# newName: ghcr.io/llm-d/${IMAGE_TAG_BASE}
# newTag: ${VERSION}
# newName: ${EPP_IMAGE}
patches:
- path: common/patch-service.yaml
- path: common/patch-statefulset.yaml
Expand Down
4 changes: 2 additions & 2 deletions docs/create_new_filter.md
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this seems like an unrelated change?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're fundamentally correct. But upstream, the function LoadConfig no longer exists causing lint to fail in the file test/config/prefix_cache_mode_test.go. I then searched for LoadConfig and updated all references.

Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Once a filter is defined, it can be used to modify llm-d-inference-scheduler

- Add the relevant import path (if defined outside this repository);
- Add any desired configuration knobs (e.g., environment variables); and
- Listing the new filter in the `LoadConfig()` function's `cfg.loadPluginInfo`
- Listing the new filter in the `LoadConfigPhaseTwo()` function's `cfg.loadPluginInfo`
list of available plugins.

In the case of the llm-d-inference-scheduler, filters can be hooked into the
Expand All @@ -137,7 +137,7 @@ In the case of the llm-d-inference-scheduler, filters can be hooked into the
environment variables):

```go
func (c *Config) LoadConfig() {
func (c *Config) LoadConfigPhaseTwo() {
c.loadPluginInfo(c.DecodeSchedulerPlugins, false,
KVCacheScorerName, ..., ByLabelFilterName, ... )
c.loadPluginInfo(c.PrefillSchedulerPlugins, true, ... )
Expand Down
22 changes: 11 additions & 11 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ require (
k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d
sigs.k8s.io/controller-runtime v0.22.4
sigs.k8s.io/gateway-api v1.4.0
sigs.k8s.io/gateway-api-inference-extension v1.1.0
sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251119101812-bef80ca4dedd
)

require (
Expand All @@ -44,7 +44,7 @@ require (
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect
github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
Expand Down Expand Up @@ -78,9 +78,9 @@ require (
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.1 // indirect
github.com/prometheus/common v0.67.2 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
github.com/prometheus/prometheus v0.307.1 // indirect
github.com/prometheus/prometheus v0.307.3 // indirect
github.com/redis/go-redis/v9 v9.11.0 // indirect
github.com/spf13/cobra v1.9.1 // indirect
github.com/spf13/pflag v1.0.7 // indirect
Expand Down Expand Up @@ -108,14 +108,14 @@ require (
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/exp v0.0.0-20250808145144-a408d31f581a // indirect
golang.org/x/mod v0.28.0 // indirect
golang.org/x/net v0.44.0 // indirect
golang.org/x/oauth2 v0.31.0 // indirect
golang.org/x/sys v0.36.0 // indirect
golang.org/x/term v0.35.0 // indirect
golang.org/x/text v0.29.0 // indirect
golang.org/x/mod v0.29.0 // indirect
golang.org/x/net v0.46.0 // indirect
golang.org/x/oauth2 v0.32.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/term v0.36.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/time v0.13.0 // indirect
golang.org/x/tools v0.37.0 // indirect
golang.org/x/tools v0.38.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
Expand Down
Loading