diff --git a/Dockerfile.epp b/Dockerfile.epp index 38dfeca47c..7d1df302ae 100644 --- a/Dockerfile.epp +++ b/Dockerfile.epp @@ -1,6 +1,6 @@ ## Minimal runtime Dockerfile (microdnf-only, no torch, wrapper in site-packages) -# Build Stage: using Go 1.25 image -FROM quay.io/projectquay/golang:1.25 AS builder +# Build Stage: using Go 1.24 image +FROM quay.io/projectquay/golang:1.24 AS builder ARG TARGETOS ARG TARGETARCH diff --git a/Makefile b/Makefile index 395a4dc3da..00e82f1589 100644 --- a/Makefile +++ b/Makefile @@ -8,19 +8,22 @@ TARGETOS ?= $(shell go env GOOS) TARGETARCH ?= $(shell go env GOARCH) PROJECT_NAME ?= llm-d-inference-scheduler SIDECAR_IMAGE_NAME ?= llm-d-routing-sidecar +VLLM_SIMULATOR_IMAGE_NAME ?= llm-d-inference-sim SIDECAR_NAME ?= pd-sidecar IMAGE_REGISTRY ?= ghcr.io/llm-d IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME) EPP_TAG ?= dev export EPP_TAG -IMG = $(IMAGE_TAG_BASE):$(EPP_TAG) +export EPP_IMAGE ?= $(IMAGE_TAG_BASE):$(EPP_TAG) SIDECAR_TAG ?= dev export SIDECAR_TAG SIDECAR_IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(SIDECAR_IMAGE_NAME) -SIDECAR_IMG = $(SIDECAR_IMAGE_TAG_BASE):$(SIDECAR_TAG) +export SIDECAR_IMAGE ?= $(SIDECAR_IMAGE_TAG_BASE):$(SIDECAR_TAG) NAMESPACE ?= hc4ai-operator VLLM_SIMULATOR_TAG ?= v0.6.1 export VLLM_SIMULATOR_TAG +VLLM_SIMULATOR_TAG_BASE ?= $(IMAGE_REGISTRY)/$(VLLM_SIMULATOR_IMAGE_NAME) +export VLLM_SIMULATOR_IMAGE ?= $(VLLM_SIMULATOR_TAG_BASE):$(VLLM_SIMULATOR_TAG) # Map go arch to typos arch ifeq ($(TARGETARCH),amd64) @@ -57,8 +60,8 @@ BUILD_REF ?= $(shell git describe --abbrev=0 2>/dev/null) SRC = $(shell find . -type f -name '*.go') # Internal variables for generic targets -epp_IMAGE = $(IMG) -sidecar_IMAGE = $(SIDECAR_IMG) +epp_IMAGE = $(EPP_IMAGE) +sidecar_IMAGE = $(SIDECAR_IMAGE) epp_NAME = epp sidecar_NAME = $(SIDECAR_NAME) epp_LDFLAGS = -ldflags="$(LDFLAGS)" @@ -185,7 +188,7 @@ uninstall: uninstall-docker ## Default uninstall using Docker .PHONY: install-docker install-docker: check-container-tool ## Install app using $(CONTAINER_RUNTIME) @echo "Starting container with $(CONTAINER_RUNTIME)..." - $(CONTAINER_RUNTIME) run -d --name $(PROJECT_NAME)-container $(IMG) + $(CONTAINER_RUNTIME) run -d --name $(PROJECT_NAME)-container $(EPP_IMAGE) @echo "$(CONTAINER_RUNTIME) installation complete." @echo "To use $(PROJECT_NAME), run:" @echo "alias $(PROJECT_NAME)='$(CONTAINER_RUNTIME) exec -it $(PROJECT_NAME)-container /app/$(PROJECT_NAME)'" @@ -230,12 +233,12 @@ uninstall-k8s: check-kubectl check-kustomize check-envsubst ## Uninstall from Ku .PHONY: install-openshift install-openshift: check-kubectl check-kustomize check-envsubst ## Install on OpenShift - @echo $$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION + @echo $$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE @echo "Creating namespace $(NAMESPACE)..." kubectl create namespace $(NAMESPACE) 2>/dev/null || true @echo "Deploying common resources from deploy/ ..." # Build and substitute the base manifests from deploy, then apply them - kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -n $(NAMESPACE) -f - + kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl apply -n $(NAMESPACE) -f - @echo "Waiting for pod to become ready..." sleep 5 @POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \ @@ -246,9 +249,9 @@ install-openshift: check-kubectl check-kustomize check-envsubst ## Install on Op .PHONY: uninstall-openshift uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall from OpenShift @echo "Removing resources from OpenShift..." - kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete --force -f - || true + kustomize build deploy/environments/openshift-base | envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' | kubectl delete --force -f - || true # @if kubectl api-resources --api-group=route.openshift.io | grep -q Route; then \ - # envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \ + # envsubst '$$PROJECT_NAME $$NAMESPACE $$EPP_IMAGE' < deploy/openshift/route.yaml | kubectl delete --force -f - || true; \ # fi @POD=$$(kubectl get pod -l app=$(PROJECT_NAME)-statefulset -n $(NAMESPACE) -o jsonpath='{.items[0].metadata.name}'); \ echo "Deleting pod: $$POD"; \ @@ -260,18 +263,18 @@ uninstall-openshift: check-kubectl check-kustomize check-envsubst ## Uninstall f .PHONY: install-rbac install-rbac: check-kubectl check-kustomize check-envsubst ## Install RBAC @echo "Applying RBAC configuration from deploy/rbac..." - kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl apply -f - + kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME' | kubectl apply -f - .PHONY: uninstall-rbac uninstall-rbac: check-kubectl check-kustomize check-envsubst ## Uninstall RBAC @echo "Removing RBAC configuration from deploy/rbac..." - kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME $$NAMESPACE $$IMAGE_TAG_BASE $$VERSION' | kubectl delete -f - || true + kustomize build deploy/environments/openshift-base/rbac | envsubst '$$PROJECT_NAME' | kubectl delete -f - || true ##@ Environment .PHONY: env env: ## Print environment variables @echo "IMAGE_TAG_BASE=$(IMAGE_TAG_BASE)" - @echo "IMG=$(IMG)" + @echo "EPP_IMAGE=$(EPP_IMAGE)" @echo "CONTAINER_RUNTIME=$(CONTAINER_RUNTIME)" .PHONY: check-typos @@ -390,7 +393,9 @@ env-dev-kind: ## Run under kind ($(KIND_CLUSTER_NAME)) CLUSTER_NAME=$(KIND_CLUSTER_NAME) \ GATEWAY_HOST_PORT=$(KIND_GATEWAY_HOST_PORT) \ IMAGE_REGISTRY=$(IMAGE_REGISTRY) \ - EPP_TAG=$(EPP_TAG) \ + EPP_IMAGE=$(EPP_IMAGE) \ + VLLM_SIMULATOR_IMAGE=${VLLM_SIMULATOR_IMAGE} \ + SIDECAR_IMAGE=${SIDECAR_IMAGE} \ ./scripts/kind-dev-env.sh; \ fi diff --git a/deploy/components/inference-gateway/deployments.yaml b/deploy/components/inference-gateway/deployments.yaml index d5fc862819..cc56789a33 100644 --- a/deploy/components/inference-gateway/deployments.yaml +++ b/deploy/components/inference-gateway/deployments.yaml @@ -18,7 +18,7 @@ spec: terminationGracePeriodSeconds: 130 containers: - name: epp - image: ghcr.io/llm-d/llm-d-inference-scheduler:latest + image: ${EPP_IMAGE} imagePullPolicy: IfNotPresent args: - --pool-name diff --git a/deploy/components/inference-gateway/kustomization.yaml b/deploy/components/inference-gateway/kustomization.yaml index 68fd981b3a..7f6dec8fdc 100644 --- a/deploy/components/inference-gateway/kustomization.yaml +++ b/deploy/components/inference-gateway/kustomization.yaml @@ -18,7 +18,3 @@ resources: - deployments.yaml - gateways.yaml - httproutes.yaml - -images: -- name: ghcr.io/llm-d/llm-d-inference-scheduler - newTag: ${EPP_TAG} diff --git a/deploy/components/vllm-sim-pd/deployments.yaml b/deploy/components/vllm-sim-pd/deployments.yaml index 5c3763d8fd..3568b9a761 100644 --- a/deploy/components/vllm-sim-pd/deployments.yaml +++ b/deploy/components/vllm-sim-pd/deployments.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:latest + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -71,7 +71,7 @@ spec: spec: initContainers: - name: routing-sidecar - image: ghcr.io/llm-d/llm-d-routing-sidecar:latest + image: ${SIDECAR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -112,7 +112,7 @@ spec: fieldPath: status.podIP containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:latest + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8200" diff --git a/deploy/components/vllm-sim-pd/kustomization.yaml b/deploy/components/vllm-sim-pd/kustomization.yaml index 407f9e9f00..40ac17d310 100644 --- a/deploy/components/vllm-sim-pd/kustomization.yaml +++ b/deploy/components/vllm-sim-pd/kustomization.yaml @@ -10,9 +10,3 @@ kind: Kustomization resources: - deployments.yaml - -images: -- name: ghcr.io/llm-d/llm-d-inference-sim - newTag: ${VLLM_SIMULATOR_TAG} -- name: ghcr.io/llm-d/llm-d-routing-sidecar - newTag: ${SIDECAR_TAG} diff --git a/deploy/components/vllm-sim/deployments.yaml b/deploy/components/vllm-sim/deployments.yaml index 7fabf034e5..423b55a9e1 100644 --- a/deploy/components/vllm-sim/deployments.yaml +++ b/deploy/components/vllm-sim/deployments.yaml @@ -16,7 +16,7 @@ spec: spec: initContainers: - name: routing-sidecar - image: ghcr.io/llm-d/llm-d-routing-sidecar:latest + image: ${SIDECAR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -57,7 +57,7 @@ spec: fieldPath: status.podIP containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:latest + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8200" diff --git a/deploy/components/vllm-sim/kustomization.yaml b/deploy/components/vllm-sim/kustomization.yaml index d6d81a5144..a072bad165 100644 --- a/deploy/components/vllm-sim/kustomization.yaml +++ b/deploy/components/vllm-sim/kustomization.yaml @@ -11,9 +11,3 @@ kind: Kustomization resources: - deployments.yaml -images: -- name: ghcr.io/llm-d/llm-d-inference-sim - newTag: ${VLLM_SIMULATOR_TAG} -- name: ghcr.io/llm-d/llm-d-routing-sidecar - newTag: ${SIDECAR_TAG} - diff --git a/deploy/environments/openshift-base/common/patch-statefulset.yaml b/deploy/environments/openshift-base/common/patch-statefulset.yaml index 5b7676ff8f..c3a8a39cda 100644 --- a/deploy/environments/openshift-base/common/patch-statefulset.yaml +++ b/deploy/environments/openshift-base/common/patch-statefulset.yaml @@ -16,5 +16,5 @@ spec: serviceAccountName: operator-controller-manager containers: - name: cmd - image: ${IMAGE_TAG_BASE}:${VERSION} + image: ${EPP_IMAGE} imagePullPolicy: Always diff --git a/deploy/environments/openshift-base/kustomization.yaml b/deploy/environments/openshift-base/kustomization.yaml index c690de1666..ab3306c647 100644 --- a/deploy/environments/openshift-base/kustomization.yaml +++ b/deploy/environments/openshift-base/kustomization.yaml @@ -26,8 +26,7 @@ configMapGenerator: # Define the image to be updated. # images: # - name: ghcr.io/llm-d/placeholder -# newName: ghcr.io/llm-d/${IMAGE_TAG_BASE} -# newTag: ${VERSION} +# newName: ${EPP_IMAGE} patches: - path: common/patch-service.yaml - path: common/patch-statefulset.yaml diff --git a/docs/create_new_filter.md b/docs/create_new_filter.md index f7b847767a..8edc414403 100644 --- a/docs/create_new_filter.md +++ b/docs/create_new_filter.md @@ -127,7 +127,7 @@ Once a filter is defined, it can be used to modify llm-d-inference-scheduler - Add the relevant import path (if defined outside this repository); - Add any desired configuration knobs (e.g., environment variables); and -- Listing the new filter in the `LoadConfig()` function's `cfg.loadPluginInfo` +- Listing the new filter in the `LoadConfigPhaseTwo()` function's `cfg.loadPluginInfo` list of available plugins. In the case of the llm-d-inference-scheduler, filters can be hooked into the @@ -137,7 +137,7 @@ In the case of the llm-d-inference-scheduler, filters can be hooked into the environment variables): ```go -func (c *Config) LoadConfig() { +func (c *Config) LoadConfigPhaseTwo() { c.loadPluginInfo(c.DecodeSchedulerPlugins, false, KVCacheScorerName, ..., ByLabelFilterName, ... ) c.loadPluginInfo(c.PrefillSchedulerPlugins, true, ... ) diff --git a/go.mod b/go.mod index e727e032c5..5ceb7e2e49 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d sigs.k8s.io/controller-runtime v0.22.4 sigs.k8s.io/gateway-api v1.4.0 - sigs.k8s.io/gateway-api-inference-extension v1.1.0 + sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251119101812-bef80ca4dedd ) require ( @@ -44,7 +44,7 @@ require ( github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect - github.com/envoyproxy/go-control-plane/envoy v1.35.0 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -78,9 +78,9 @@ require ( github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.67.1 // indirect + github.com/prometheus/common v0.67.2 // indirect github.com/prometheus/procfs v0.17.0 // indirect - github.com/prometheus/prometheus v0.307.1 // indirect + github.com/prometheus/prometheus v0.307.3 // indirect github.com/redis/go-redis/v9 v9.11.0 // indirect github.com/spf13/cobra v1.9.1 // indirect github.com/spf13/pflag v1.0.7 // indirect @@ -108,14 +108,14 @@ require ( go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250808145144-a408d31f581a // indirect - golang.org/x/mod v0.28.0 // indirect - golang.org/x/net v0.44.0 // indirect - golang.org/x/oauth2 v0.31.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/term v0.35.0 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/mod v0.29.0 // indirect + golang.org/x/net v0.46.0 // indirect + golang.org/x/oauth2 v0.32.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect + golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.13.0 // indirect - golang.org/x/tools v0.37.0 // indirect + golang.org/x/tools v0.38.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect diff --git a/go.sum b/go.sum index fdab19011b..c4d0fb3829 100644 --- a/go.sum +++ b/go.sum @@ -85,8 +85,8 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane/envoy v1.35.0 h1:ixjkELDE+ru6idPxcHLj8LBVc2bFP7iBytj353BoHUo= -github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/b3iyVfz5+z8QWGrzkoqm/8SbEs= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= @@ -227,14 +227,14 @@ github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.67.1 h1:OTSON1P4DNxzTg4hmKCc37o4ZAZDv0cfXLkOt0oEowI= -github.com/prometheus/common v0.67.1/go.mod h1:RpmT9v35q2Y+lsieQsdOh5sXZ6ajUGC8NjZAmr8vb0Q= +github.com/prometheus/common v0.67.2 h1:PcBAckGFTIHt2+L3I33uNRTlKTplNzFctXcWhPyAEN8= +github.com/prometheus/common v0.67.2/go.mod h1:63W3KZb1JOKgcjlIr64WW/LvFGAqKPj0atm+knVGEko= github.com/prometheus/otlptranslator v1.0.0 h1:s0LJW/iN9dkIH+EnhiD3BlkkP5QVIUVEoIwkU+A6qos= github.com/prometheus/otlptranslator v1.0.0/go.mod h1:vRYWnXvI6aWGpsdY/mOT/cbeVRBlPWtBNDb7kGR3uKM= github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0= github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw= -github.com/prometheus/prometheus v0.307.1 h1:Hh3kRMFn+xpQGLe/bR6qpUfW4GXQO0spuYeY7f2JZs4= -github.com/prometheus/prometheus v0.307.1/go.mod h1:/7YQG/jOLg7ktxGritmdkZvezE1fa6aWDj0MGDIZvcY= +github.com/prometheus/prometheus v0.307.3 h1:zGIN3EpiKacbMatcUL2i6wC26eRWXdoXfNPjoBc2l34= +github.com/prometheus/prometheus v0.307.3/go.mod h1:sPbNW+KTS7WmzFIafC3Inzb6oZVaGLnSvwqTdz2jxRQ= github.com/prometheus/sigv4 v0.2.1 h1:hl8D3+QEzU9rRmbKIRwMKRwaFGyLkbPdH5ZerglRHY0= github.com/prometheus/sigv4 v0.2.1/go.mod h1:ySk6TahIlsR2sxADuHy4IBFhwEjRGGsfbbLGhFYFj6Q= github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs= @@ -317,22 +317,22 @@ go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= +golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= +golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= golang.org/x/exp v0.0.0-20250808145144-a408d31f581a h1:Y+7uR/b1Mw2iSXZ3G//1haIiSElDQZ8KWh0h+sZPG90= golang.org/x/exp v0.0.0-20250808145144-a408d31f581a/go.mod h1:rT6SFzZ7oxADUDx58pcaKFTcZ+inxAa9fTrYx/uVYwg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= -golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= -golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= -golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= -golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= +golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= +golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -341,22 +341,22 @@ golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= -golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -409,8 +409,8 @@ sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327U sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= sigs.k8s.io/gateway-api v1.4.0 h1:ZwlNM6zOHq0h3WUX2gfByPs2yAEsy/EenYJB78jpQfQ= sigs.k8s.io/gateway-api v1.4.0/go.mod h1:AR5RSqciWP98OPckEjOjh2XJhAe2Na4LHyXD2FUY7Qk= -sigs.k8s.io/gateway-api-inference-extension v1.1.0 h1:MqRYk+3LNUWB0MbTgTZVhmJGNDTvm8l3ze4MOlzR7MU= -sigs.k8s.io/gateway-api-inference-extension v1.1.0/go.mod h1:BmJy8Hvc2EHl3Oa/Ka8/4RqwVHCCbX7BLndLdMNtugI= +sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251119101812-bef80ca4dedd h1:lvpEQrNb0nbGgaoXw3V9qJdIkTTsbs4tBwebGGGU8iQ= +sigs.k8s.io/gateway-api-inference-extension v0.0.0-20251119101812-bef80ca4dedd/go.mod h1:/HWeqxuOMjFM56YwJ2Spt3qceK7Spz4hk6ZfXYgE9a8= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= diff --git a/scripts/kind-dev-env.sh b/scripts/kind-dev-env.sh index 918a4f3e21..204045b180 100755 --- a/scripts/kind-dev-env.sh +++ b/scripts/kind-dev-env.sh @@ -22,18 +22,20 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Set the default IMAGE_REGISTRY if not provided : "${IMAGE_REGISTRY:=ghcr.io/llm-d}" -# Set a default VLLM_SIMULATOR_IMAGE if not provided -: "${VLLM_SIMULATOR_IMAGE:=llm-d-inference-sim}" - # Set a default VLLM_SIMULATOR_TAG if not provided export VLLM_SIMULATOR_TAG="${VLLM_SIMULATOR_TAG:-latest}" -# Set a default EPP_IMAGE if not provided -: "${EPP_IMAGE:=llm-d-inference-scheduler}" +# Set a default VLLM_SIMULATOR_IMAGE if not provided +VLLM_SIMULATOR_IMAGE="${VLLM_SIMULATOR_IMAGE:-${IMAGE_REGISTRY}/llm-d-inference-sim:${VLLM_SIMULATOR_TAG}}" +export VLLM_SIMULATOR_IMAGE # Set a default EPP_TAG if not provided export EPP_TAG="${EPP_TAG:-dev}" +# Set a default EPP_IMAGE if not provided +EPP_IMAGE="${EPP_IMAGE:-${IMAGE_REGISTRY}/llm-d-inference-scheduler:${EPP_TAG}}" +export EPP_IMAGE + # Set the model name to deploy export MODEL_NAME="${MODEL_NAME:-food-review}" # Extract model family (e.g., "meta-llama" from "meta-llama/Llama-3.1-8B-Instruct") @@ -46,12 +48,13 @@ export MODEL_NAME_SAFE=$(echo "${MODEL_ID}" | tr '[:upper:]' '[:lower:]' | tr ' # Set the endpoint-picker to deploy export EPP_NAME="${EPP_NAME:-${MODEL_NAME_SAFE}-endpoint-picker}" -# Set a default SIDECAR_IMAGE if not provided -: "${SIDECAR_IMAGE:=llm-d-routing-sidecar}" - # Set the default routing side car image tag export SIDECAR_TAG="${SIDECAR_TAG:-dev}" +# Set a default SIDECAR_IMAGE if not provided +SIDECAR_IMAGE="${SIDECAR_IMAGE:-${IMAGE_REGISTRY}/llm-d-routing-sidecar:${SIDECAR_TAG}}" +export SIDECAR_IMAGE + # Set the inference pool name for the deployment export POOL_NAME="${POOL_NAME:-${MODEL_NAME_SAFE}-inference-pool}" @@ -178,26 +181,26 @@ kubectl --context ${KUBE_CONTEXT} -n local-path-storage wait --for=condition=Rea # Load the vllm simulator image into the cluster if [ "${CONTAINER_RUNTIME}" == "podman" ]; then - podman save ${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin + podman save ${VLLM_SIMULATOR_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin else - if docker image inspect "${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG}" > /dev/null 2>&1; then + if docker image inspect "${VLLM_SIMULATOR_IMAGE}" > /dev/null 2>&1; then echo "INFO: Loading image into KIND cluster..." - kind --name ${CLUSTER_NAME} load docker-image ${IMAGE_REGISTRY}/${VLLM_SIMULATOR_IMAGE}:${VLLM_SIMULATOR_TAG} + kind --name ${CLUSTER_NAME} load docker-image ${VLLM_SIMULATOR_IMAGE} fi fi # Load the ext_proc endpoint-picker image into the cluster if [ "${CONTAINER_RUNTIME}" == "podman" ]; then - podman save ${IMAGE_REGISTRY}/${EPP_IMAGE}:${EPP_TAG} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin + podman save ${EPP_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin else - kind --name ${CLUSTER_NAME} load docker-image ${IMAGE_REGISTRY}/${EPP_IMAGE}:${EPP_TAG} + kind --name ${CLUSTER_NAME} load docker-image ${EPP_IMAGE} fi # Load the sidecar image into the cluster if [ "${CONTAINER_RUNTIME}" == "podman" ]; then - podman save ${IMAGE_REGISTRY}/${SIDECAR_IMAGE}:${SIDECAR_TAG} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin + podman save ${SIDECAR_IMAGE} -o /dev/stdout | kind --name ${CLUSTER_NAME} load image-archive /dev/stdin else - kind --name ${CLUSTER_NAME} load docker-image ${IMAGE_REGISTRY}/${SIDECAR_IMAGE}:${SIDECAR_TAG} + kind --name ${CLUSTER_NAME} load docker-image ${SIDECAR_IMAGE} fi # ------------------------------------------------------------------------------ @@ -233,8 +236,8 @@ envsubst '$PRIMARY_PORT' < ${EPP_CONFIG} > ${TEMP_FILE} kubectl --context ${KUBE_CONTEXT} create configmap epp-config --from-file=epp-config.yaml=${TEMP_FILE} kustomize build --enable-helm ${KUSTOMIZE_DIR} \ - | envsubst '${POOL_NAME} ${MODEL_NAME} ${MODEL_NAME_SAFE} ${EPP_NAME} ${EPP_TAG} ${VLLM_SIMULATOR_TAG} \ - ${PD_ENABLED} ${KV_CACHE_ENABLED} ${SIDECAR_TAG} ${TARGET_PORTS} \ + | envsubst '${POOL_NAME} ${MODEL_NAME} ${MODEL_NAME_SAFE} ${EPP_NAME} ${EPP_IMAGE} ${VLLM_SIMULATOR_IMAGE} \ + ${PD_ENABLED} ${KV_CACHE_ENABLED} ${SIDECAR_IMAGE} ${TARGET_PORTS} \ ${VLLM_REPLICA_COUNT} ${VLLM_REPLICA_COUNT_P} ${VLLM_REPLICA_COUNT_D} ${VLLM_DATA_PARALLEL_SIZE}' \ | kubectl --context ${KUBE_CONTEXT} apply -f - diff --git a/scripts/pull_images.sh b/scripts/pull_images.sh index cc80de382a..3acf439c18 100755 --- a/scripts/pull_images.sh +++ b/scripts/pull_images.sh @@ -5,15 +5,15 @@ CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-docker}" echo "Using container tool: ${CONTAINER_RUNTIME}" # Set a default EPP_TAG if not provided -export EPP_TAG="${EPP_TAG:-dev}" +EPP_TAG="${EPP_TAG:-dev}" # Set a default VLLM_SIMULATOR_TAG if not provided -export VLLM_SIMULATOR_TAG="${VLLM_SIMULATOR_TAG:-v0.6.1}" +VLLM_SIMULATOR_TAG="${VLLM_SIMULATOR_TAG:-v0.6.1}" # Set the default routing side car image tag -export SIDECAR_TAG="${SIDECAR_TAG:-dev}" +SIDECAR_TAG="${SIDECAR_TAG:-dev}" -EPP_IMAGE="ghcr.io/llm-d/llm-d-inference-scheduler:${EPP_TAG}" -VLLM_SIMULATOR_IMAGE="ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG}" -ROUTING_SIDECAR_IMAGE="ghcr.io/llm-d/llm-d-routing-sidecar:${SIDECAR_TAG}" +export EPP_IMAGE="${EPP_IMAGE:-ghcr.io/llm-d/llm-d-inference-scheduler:${EPP_TAG}}" +export VLLM_SIMULATOR_IMAGE="${VLLM_SIMULATOR_IMAGE:-ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG}}" +export SIDECAR_IMAGE="${SIDECAR_IMAGE:-ghcr.io/llm-d/llm-d-routing-sidecar:${SIDECAR_TAG}}" TARGETOS="${TARGETOS:-linux}" TARGETARCH="${TARGETARCH:-$(go env GOARCH)}" @@ -43,11 +43,11 @@ ensure_image() { echo "--- Using the following images ---" echo "Scheduler Image: ${EPP_IMAGE}" echo "Simulator Image: ${VLLM_SIMULATOR_IMAGE}" -echo "Sidecar Image: ${ROUTING_SIDECAR_IMAGE}" +echo "Sidecar Image: ${SIDECAR_IMAGE}" echo "----------------------------------------------------" echo "Pulling dependencies..." ensure_image "${EPP_IMAGE}" ensure_image "${VLLM_SIMULATOR_IMAGE}" -ensure_image "${ROUTING_SIDECAR_IMAGE}" +ensure_image "${SIDECAR_IMAGE}" echo "Successfully pulled dependencies" diff --git a/test/config/prefix_cache_mode_test.go b/test/config/prefix_cache_mode_test.go index c30130926f..b0fad97fd0 100644 --- a/test/config/prefix_cache_mode_test.go +++ b/test/config/prefix_cache_mode_test.go @@ -49,13 +49,17 @@ schedulingProfiles: for _, test := range tests { t.Run(test.name, func(t *testing.T) { _ = os.Setenv("HF_TOKEN", "dummy_token") // needed for cache_tracking + rawConfig, _, err := loader.LoadConfigPhaseOne([]byte(test.configText), logr.Discard()) + if err != nil { + t.Fatalf("unexpected error from LoadConfigPhaseOne: %v", err) + } handle := utils.NewTestHandle(ctx) - _, err := loader.LoadConfig([]byte(test.configText), handle, logr.Discard()) - fmt.Println("all plugins", handle.GetAllPluginsWithNames()) - + _, err = loader.LoadConfigPhaseTwo(rawConfig, handle, logr.Discard()) if err != nil { - t.Fatalf("unexpected error from LoadConfig: %v", err) + t.Fatalf("unexpected error from LoadConfigPhaseTwo: %v", err) } + fmt.Println("all plugins", handle.GetAllPluginsWithNames()) + _, err = giePlugins.PluginByType[*scorer.PrecisePrefixCacheScorer](handle, test.pluginName) if err != nil { t.Fatalf("expected PrecisePrefixCacheScorer, but got error: %v", err) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index c91082d6b3..25c65afe26 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -12,7 +12,10 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" "github.com/onsi/gomega/gexec" + corev1 "k8s.io/api/core/v1" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/config" k8slog "sigs.k8s.io/controller-runtime/pkg/log" @@ -48,22 +51,36 @@ const ( serviceAccountManifest = "./yaml/service-accounts.yaml" // servicesManifest is the manifest for the EPP's service resources. servicesManifest = "./yaml/services.yaml" - // nsName is the namespace in which the K8S objects will be created - nsName = "default" ) var ( - port string + port string = env.GetEnvString("E2E_PORT", "30080", ginkgo.GinkgoLogr) testConfig *testutils.TestConfig - containerRuntime = env.GetEnvString("CONTAINER_RUNTIME", "docker", ginkgo.GinkgoLogr) - eppTag = env.GetEnvString("EPP_TAG", "dev", ginkgo.GinkgoLogr) - vllmSimTag = env.GetEnvString("VLLM_SIMULATOR_TAG", "dev", ginkgo.GinkgoLogr) - routingSideCarTag = env.GetEnvString("SIDECAR_TAG", "dev", ginkgo.GinkgoLogr) + containerRuntime = env.GetEnvString("CONTAINER_RUNTIME", "docker", ginkgo.GinkgoLogr) + eppImage = env.GetEnvString("EPP_IMAGE", "ghcr.io/llm-d/llm-d-inference-scheduler:dev", ginkgo.GinkgoLogr) + vllmSimImage = env.GetEnvString("VLLM_SIMULATOR_IMAGE", "ghcr.io/llm-d/llm-d-inference-sim:dev", ginkgo.GinkgoLogr) + sideCarImage = env.GetEnvString("SIDECAR_IMAGE", "ghcr.io/llm-d/llm-d-routing-sidecar:dev", ginkgo.GinkgoLogr) + + // nsName is the namespace in which the K8S objects will be created + nsName = env.GetEnvString("NAMESPACE", "default", ginkgo.GinkgoLogr) + + // k8sContext is the Kubernetes context to work with + k8sContext = env.GetEnvString("K8S_CONTEXT", "", ginkgo.GinkgoLogr) readyTimeout = env.GetEnvDuration("READY_TIMEOUT", defaultReadyTimeout, ginkgo.GinkgoLogr) interval = defaultInterval + + crdObjects []string + envoyObjects []string + rbacObjects []string + serviceAccountObjects []string + serviceObjects []string + infPoolObjects []string + createdNameSpace bool + + portForwardSession *gexec.Session ) func TestEndToEnd(t *testing.T) { @@ -74,22 +91,47 @@ func TestEndToEnd(t *testing.T) { } var _ = ginkgo.BeforeSuite(func() { - port = "30080" - - setupK8sCluster() - testConfig = testutils.NewTestConfig(nsName) + if k8sContext == "" { + setupK8sCluster() + } + testConfig = testutils.NewTestConfig(nsName, k8sContext) setupK8sClient() + setupNameSpace() createCRDs() createEnvoy() - testutils.ApplyYAMLFile(testConfig, rbacManifest) - testutils.ApplyYAMLFile(testConfig, serviceAccountManifest) - testutils.ApplyYAMLFile(testConfig, servicesManifest) + rbacObjects = testutils.ApplyYAMLFile(testConfig, rbacManifest) + serviceAccountObjects = testutils.ApplyYAMLFile(testConfig, serviceAccountManifest) + serviceObjects = testutils.ApplyYAMLFile(testConfig, servicesManifest) // Prevent failure in tests due to InferencePool not existing before the test - createInferencePool(1, false) + infPoolObjects = createInferencePool(1, false) }) var _ = ginkgo.AfterSuite(func() { + if k8sContext != "" { + // Used an existing Kubernetes context + // Stop port-forward + if portForwardSession != nil { + portForwardSession.Terminate() + } + + // cleanup created objects + ginkgo.By("Deleting created Kubernetes objects") + testutils.DeleteObjects(testConfig, infPoolObjects) + testutils.DeleteObjects(testConfig, serviceObjects) + testutils.DeleteObjects(testConfig, serviceAccountObjects) + testutils.DeleteObjects(testConfig, rbacObjects) + testutils.DeleteObjects(testConfig, envoyObjects) + testutils.DeleteObjects(testConfig, crdObjects) + + if createdNameSpace { + ginkgo.By("Deleting namespace " + nsName) + err := testConfig.KubeCli.CoreV1().Namespaces().Delete(testConfig.Context, nsName, metav1.DeleteOptions{}) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } + return + } + command := exec.Command("kind", "delete", "cluster", "--name", "e2e-tests") session, err := gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) @@ -114,9 +156,9 @@ func setupK8sCluster() { gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Eventually(session).WithTimeout(600 * time.Second).Should(gexec.Exit(0)) - kindLoadImage("ghcr.io/llm-d/llm-d-inference-sim:" + vllmSimTag) - kindLoadImage("ghcr.io/llm-d/llm-d-inference-scheduler:" + eppTag) - kindLoadImage("ghcr.io/llm-d/llm-d-routing-sidecar:" + routingSideCarTag) + kindLoadImage(vllmSimImage) + kindLoadImage(eppImage) + kindLoadImage(sideCarImage) } func kindLoadImage(image string) { @@ -147,10 +189,11 @@ func kindLoadImage(image string) { } func setupK8sClient() { - k8sCfg := config.GetConfigOrDie() + k8sCfg, err := config.GetConfigWithContext(k8sContext) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.ExpectWithOffset(1, k8sCfg).NotTo(gomega.BeNil()) - err := clientgoscheme.AddToScheme(testConfig.Scheme) + err = clientgoscheme.AddToScheme(testConfig.Scheme) gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = infextv1.Install(testConfig.Scheme) @@ -167,19 +210,59 @@ func setupK8sClient() { k8slog.SetLogger(ginkgo.GinkgoLogr) } +// setupNameSpace sets up the specified namespace if it doesn't exist +func setupNameSpace() { + if nsName == "default" { + return + } + _, err := testConfig.KubeCli.CoreV1().Namespaces().Get(testConfig.Context, nsName, metav1.GetOptions{}) + if err == nil { + return + } + gomega.Expect(errors.IsNotFound(err)).To(gomega.BeTrue()) + + ginkgo.By("Creating namespace " + nsName) + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + }, + } + _, err = testConfig.KubeCli.CoreV1().Namespaces().Create(testConfig.Context, namespace, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + createdNameSpace = true +} + // createCRDs creates the Inference Extension CRDs used for testing. func createCRDs() { crds := runKustomize(gieCrdsKustomize) - testutils.CreateObjsFromYaml(testConfig, crds) + crdObjects = testutils.CreateObjsFromYaml(testConfig, crds) } func createEnvoy() { manifests := testutils.ReadYaml(envoyManifest) + manifests = substituteMany(manifests, map[string]string{"${NAMESPACE}": nsName}) ginkgo.By("Creating envoy proxy resources from manifest: " + envoyManifest) - testutils.CreateObjsFromYaml(testConfig, manifests) + envoyObjects = testutils.CreateObjsFromYaml(testConfig, manifests) + + if k8sContext != "" { + envoyName := "" + for _, obj := range envoyObjects { + splitObj := strings.Split(obj, "/") + if strings.ToLower(splitObj[0]) == "deployment" { + envoyName = splitObj[1] + } + } + gomega.Expect(envoyName).ToNot(gomega.BeEmpty()) + + command := exec.Command("kubectl", "port-forward", "deployment/"+envoyName, port+":8081", + "--context="+k8sContext, "--namespace="+nsName) + var err error + portForwardSession, err = gexec.Start(command, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) + } } -func createInferencePool(numTargetPorts int, toDelete bool) { +func createInferencePool(numTargetPorts int, toDelete bool) []string { poolName := modelName + "-inference-pool" if toDelete { @@ -198,7 +281,7 @@ func createInferencePool(numTargetPorts int, toDelete bool) { "${TARGET_PORTS}": targetPorts, }) - testutils.CreateObjsFromYaml(testConfig, infPoolYaml) + return testutils.CreateObjsFromYaml(testConfig, infPoolYaml) } const kindClusterConfig = ` diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 6b49475f15..02eb2d0d68 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -43,7 +43,7 @@ var ( var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { ginkgo.When("Running simple non-PD configuration", func() { ginkgo.It("should run successfully", func() { - createInferencePool(1, true) + infPoolObjects = createInferencePool(1, true) modelServers := createModelServers(false, false, false, 1, 0, 0) @@ -68,7 +68,7 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { ginkgo.When("Running a PD configuration", func() { ginkgo.It("should run successfully", func() { - createInferencePool(1, true) + infPoolObjects = createInferencePool(1, true) prefillReplicas := 1 decodeReplicas := 4 @@ -117,7 +117,7 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { ginkgo.When("Running simple non-PD KV enabled configuration", func() { ginkgo.It("should run successfully", func() { - createInferencePool(1, true) + infPoolObjects = createInferencePool(1, true) epp := createEndPointPicker(kvConfig) @@ -141,7 +141,7 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { ginkgo.When("Scaling up and down the model servers", func() { ginkgo.It("should distribute inference requests across all model servers", func() { - createInferencePool(1, true) + infPoolObjects = createInferencePool(1, true) modelServers := createModelServers(false, false, false, 1, 0, 0) @@ -197,7 +197,7 @@ var _ = ginkgo.Describe("Run end to end tests", ginkgo.Ordered, func() { ginkgo.When("Running a vLLM Data Parallel configuration", func() { ginkgo.It("should schedule inference on all ranks", func() { - createInferencePool(2, true) + infPoolObjects = createInferencePool(2, true) modelServers := createModelServers(false, false, true, 1, 0, 0) @@ -267,11 +267,11 @@ func createModelServers(withPD, withKV, withDP bool, vllmReplicas, prefillReplic "${MODEL_NAME_SAFE}": theSafeModelName, "${POOL_NAME}": poolName, "${KV_CACHE_ENABLED}": strconv.FormatBool(withKV), - "${SIDECAR_TAG}": routingSideCarTag, + "${SIDECAR_IMAGE}": sideCarImage, "${VLLM_REPLICA_COUNT}": strconv.Itoa(vllmReplicas), "${VLLM_REPLICA_COUNT_D}": strconv.Itoa(decodeReplicas), "${VLLM_REPLICA_COUNT_P}": strconv.Itoa(prefillReplicas), - "${VLLM_SIMULATOR_TAG}": vllmSimTag, + "${VLLM_SIMULATOR_IMAGE}": vllmSimImage, }) objects := testutils.CreateObjsFromYaml(testConfig, manifests) @@ -300,7 +300,8 @@ func createEndPointPicker(eppConfig string) []string { eppYamls := testutils.ReadYaml(eppManifest) eppYamls = substituteMany(eppYamls, map[string]string{ - "${EPP_TAG}": eppTag, + "${EPP_IMAGE}": eppImage, + "${NAMESPACE}": nsName, "${POOL_NAME}": modelName + "-inference-pool", }) diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 6f5af09b5e..756f292551 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -14,9 +14,7 @@ import ( v1 "k8s.io/apimachinery/pkg/apis/meta/v1" apilabels "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/config" ) const ( @@ -24,9 +22,6 @@ const ( ) func scaleDeployment(objects []string, increment int) { - k8sCfg := config.GetConfigOrDie() - client, err := kubernetes.NewForConfig(k8sCfg) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) direction := "up" absIncrement := increment if increment < 0 { @@ -38,11 +33,11 @@ func scaleDeployment(objects []string, increment int) { split := strings.Split(kindAndName, "/") if strings.ToLower(split[0]) == deploymentKind { ginkgo.By(fmt.Sprintf("Scaling the deployment %s %s by %d", split[1], direction, absIncrement)) - scale, err := client.AppsV1().Deployments(nsName).GetScale(testConfig.Context, split[1], v1.GetOptions{}) + scale, err := testConfig.KubeCli.AppsV1().Deployments(nsName).GetScale(testConfig.Context, split[1], v1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) scale.Spec.Replicas += int32(increment) - _, err = client.AppsV1().Deployments(nsName).UpdateScale(testConfig.Context, split[1], scale, v1.UpdateOptions{}) + _, err = testConfig.KubeCli.AppsV1().Deployments(nsName).UpdateScale(testConfig.Context, split[1], scale, v1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } } diff --git a/test/e2e/yaml/deployments.yaml b/test/e2e/yaml/deployments.yaml index 3149e82be0..4d844e32c4 100644 --- a/test/e2e/yaml/deployments.yaml +++ b/test/e2e/yaml/deployments.yaml @@ -18,13 +18,13 @@ spec: terminationGracePeriodSeconds: 130 containers: - name: epp - image: ghcr.io/llm-d/llm-d-inference-scheduler:${EPP_TAG} + image: ${EPP_IMAGE} imagePullPolicy: IfNotPresent args: - --pool-name - ${POOL_NAME} - --pool-namespace - - "default" + - ${NAMESPACE} - --v - "4" - --zap-encoder diff --git a/test/e2e/yaml/envoy.yaml b/test/e2e/yaml/envoy.yaml index 25d488a208..d84b1d25cb 100644 --- a/test/e2e/yaml/envoy.yaml +++ b/test/e2e/yaml/envoy.yaml @@ -78,7 +78,7 @@ data: route_config: name: vllm virtual_hosts: - - name: vllm-default + - name: vllm-${NAMESPACE} domains: ["*"] routes: - match: @@ -100,7 +100,7 @@ data: grpc_service: envoy_grpc: cluster_name: ext_proc - authority: e2e-epp.default:9002 + authority: e2e-epp.${NAMESPACE}:9002 timeout: 10s processing_mode: request_header_mode: SEND @@ -205,7 +205,7 @@ data: - endpoint: address: socket_address: - address: e2e-epp.default + address: e2e-epp.${NAMESPACE} port_value: 9002 load_balancing_weight: 1 --- @@ -234,7 +234,7 @@ spec: image: docker.io/envoyproxy/envoy:distroless-v1.33.2 args: - "--service-cluster" - - "default/inference-gateway" + - "${NAMESPACE}/inference-gateway" - "--service-node" - "$(ENVOY_POD_NAME)" - "--log-level" diff --git a/test/e2e/yaml/vllm-sim-dp.yaml b/test/e2e/yaml/vllm-sim-dp.yaml index c55a11f285..b0452d91eb 100644 --- a/test/e2e/yaml/vllm-sim-dp.yaml +++ b/test/e2e/yaml/vllm-sim-dp.yaml @@ -17,7 +17,7 @@ spec: spec: initContainers: - name: routing-sidecar - image: ghcr.io/llm-d/llm-d-routing-sidecar:${SIDECAR_TAG} + image: ${SIDECAR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -41,7 +41,7 @@ spec: fieldPath: status.podIP containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG} + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8200" diff --git a/test/e2e/yaml/vllm-sim-pd.yaml b/test/e2e/yaml/vllm-sim-pd.yaml index ca8009d7fd..0f757c2f2b 100644 --- a/test/e2e/yaml/vllm-sim-pd.yaml +++ b/test/e2e/yaml/vllm-sim-pd.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG} + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -60,7 +60,7 @@ spec: spec: initContainers: - name: routing-sidecar - image: ghcr.io/llm-d/llm-d-routing-sidecar:${SIDECAR_TAG} + image: ${SIDECAR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8000" @@ -74,7 +74,7 @@ spec: restartPolicy: Always containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG} + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--port=8200" diff --git a/test/e2e/yaml/vllm-sim.yaml b/test/e2e/yaml/vllm-sim.yaml index ce5a71af1e..36036996c8 100644 --- a/test/e2e/yaml/vllm-sim.yaml +++ b/test/e2e/yaml/vllm-sim.yaml @@ -16,7 +16,7 @@ spec: spec: containers: - name: vllm - image: ghcr.io/llm-d/llm-d-inference-sim:${VLLM_SIMULATOR_TAG} + image: ${VLLM_SIMULATOR_IMAGE} imagePullPolicy: IfNotPresent args: - "--mode=echo"