llm-d · github-actions · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -190,14 +190,14 @@ kubectl config set-context --current --namespace="${NAMESPACE}"
 export HF_TOKEN="<HF_TOKEN>"
 ```
 
-Download the `llm-d-kv-cache-manager` repository (the installation script and Helm chart to install the vLLM environment):
+Download the `llm-d-kv-cache` repository (the installation script and Helm chart to install the vLLM environment):
 
 ```bash
-cd .. && git clone git@github.com:llm-d/llm-d-kv-cache-manager.git
+cd .. && git clone git@github.com:llm-d/llm-d-kv-cache.git
 ```
 
 If you prefer to clone it into the `/tmp` directory, make sure to update the `VLLM_CHART_DIR` environment variable:
-`export VLLM_CHART_DIR=<tmp_dir>/llm-d-kv-cache-manager/vllm-setup-helm`
+`export VLLM_CHART_DIR=<tmp_dir>/llm-d-kv-cache/vllm-setup-helm`
 
 Once all this is set up, you can deploy the environment:
 

diff --git a/deploy/components/crds-gie/kustomization.yaml b/deploy/components/crds-gie/kustomization.yaml
@@ -10,4 +10,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 
 resources:
-- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?ref=v1.1.0
+- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?ref=v1.2.1
diff --git a/deploy/components/inference-gateway/httproutes.yaml b/deploy/components/inference-gateway/httproutes.yaml
@@ -2,6 +2,8 @@ apiVersion: gateway.networking.k8s.io/v1
 kind: HTTPRoute
 metadata:
   name: ${POOL_NAME}-inference-route
+  labels:
+    istio.io/rev: llm-d-gateway
 spec:
   parentRefs:
   - name: inference-gateway

diff --git a/deploy/components/inference-gateway/rbac.yaml b/deploy/components/inference-gateway/rbac.yaml
@@ -8,6 +8,7 @@ rules:
   resources:
   - "inferencepools"
   - "inferenceobjectives"
+  - "inferencemodelrewrites"
   verbs:
   - "get"
   - "watch"

diff --git a/deploy/components/istio-control-plane/configmaps.yaml b/deploy/components/istio-control-plane/configmaps.yaml
@@ -17,7 +17,7 @@ metadata:
     app.kubernetes.io/managed-by: Helm
     app.kubernetes.io/name: istiod
     app.kubernetes.io/part-of: istio
-    app.kubernetes.io/version: 1.28.0
+    app.kubernetes.io/version: 1.28.1
     helm.sh/chart: istiod-1.27.1
     install.operator.istio.io/owning-resource: unknown
     istio.io/rev: llm-d-gateway
@@ -865,7 +865,7 @@ data:
         spec:
           initContainers:
             - name: grpc-bootstrap-init
-              image: busybox:1.28
+              image: busybox:1.28.1
               volumeMounts:
                 - mountPath: /var/lib/grpc/data/
                   name: grpc-io-proxyless-bootstrap
@@ -2131,7 +2131,7 @@ data:
         "sts": {
           "servicePort": 0
         },
-        "tag": "1.28.0",
+        "tag": "1.28.1",
         "variant": "",
         "waypoint": {
           "affinity": {},
@@ -2176,7 +2176,7 @@ metadata:
     app.kubernetes.io/managed-by: Helm
     app.kubernetes.io/name: istiod
     app.kubernetes.io/part-of: istio
-    app.kubernetes.io/version: 1.28.0
+    app.kubernetes.io/version: 1.28.1
     helm.sh/chart: istiod-1.27.1
     install.operator.istio.io/owning-resource: unknown
     istio.io/rev: llm-d-gateway

diff --git a/deploy/components/istio-control-plane/deployments.yaml b/deploy/components/istio-control-plane/deployments.yaml
@@ -99,7 +99,7 @@ spec:
           value: ""
         - name: ENABLE_GATEWAY_API_INFERENCE_EXTENSION
           value: "true"
-        image: docker.io/istio/pilot:1.28.0
+        image: docker.io/istio/pilot:1.28.1
         name: discovery
         ports:
         - containerPort: 8080

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -299,7 +299,7 @@ Similarly to the IGW `prefix-cache-scorer`, it provides a score based on the num
  the `precise-prefix-cache-scorer` tracks the real-time KV-cache states across the vLLM instances to
  provide more accurate scoring.
 
-When enabled, the scorer will use the `llm-d-kv-cache-manager` to track the KV-cache states
+When enabled, the scorer will use the `llm-d-kv-cache` to track the KV-cache states
  across the vLLM instances. It will use the `kvcache.Indexer` to score the pods based on the
  number of matching blocks in the KV-cache. It will also use the `kvevents.Pool` to subscribe
  to the KV-Events emitted by the vLLM instances and update the KV-cache states in near-real-time.
@@ -311,7 +311,7 @@ Configuration:
   - `indexerConfig`: Configuration for the `kvcache.Indexer`.
   - `kvEventsConfig`: Configuration for the `kvevents.Pool`.
 
-See list of parameters at [llm-d-kv-cache-manager/docs/configuration.md](https://github.com/llm-d/llm-d-kv-cache-manager/blob/fa85b60207ba0a09daf23071e10ccb62d7977b40/docs/configuration.md).
+See list of parameters at [llm-d-kv-cache/docs/configuration.md](https://github.com/llm-d/llm-d-kv-cache/blob/fa85b60207ba0a09daf23071e10ccb62d7977b40/docs/configuration.md).
 
 Note that in most cases you will only need to set:
 - HuggingFace token for the `tokenizersPoolConfig` or the `tokenizersCacheDir` to a mounted directory containing the tokenizers.