From d42f2fd19a9922b1fd031b8c5799fbf8df6409d4 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Tue, 20 Jan 2026 11:41:30 -0800
Subject: [PATCH 1/9] added latency predictor converage for inferencepool and
 added converage for epp standalone mode

---
 hack/verify-helm.sh | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh
index e9bb8dd09c..69ddef5a4f 100755
--- a/hack/verify-helm.sh
+++ b/hack/verify-helm.sh
@@ -22,6 +22,7 @@ declare -A test_cases_inference_pool
 test_cases_inference_pool["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
+test_cases_inference_pool["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true"
 
 # Run the install command in case this script runs from a different bash
 # source (such as in the verify-all script)
@@ -46,5 +47,30 @@ for key in "${!test_cases_inference_pool[@]}"; do
   fi
 done
 
+declare -A test_cases_epp_standalone
 
+# InferencePool Helm Chart test cases
+test_cases_epp_standalone["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
+test_cases_epp_standalone["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
+test_cases_epp_standalone["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true"
+
+
+echo "Building dependencies for epp-standalone chart..."
+${SCRIPT_ROOT}/bin/helm dependency build ${SCRIPT_ROOT}/config/charts/epp-standalone
+if [ $? -ne 0 ]; then
+  echo "Helm dependency build failed."
+  exit 1
+fi
+
+# Running tests cases
+echo "Running helm template command for epp-standalone chart..."
+# Loop through the keys of the associative array
+for key in "${!test_cases_epp_standalone[@]}"; do
+  echo "Running test: $key"
+  ${SCRIPT_ROOT}/bin/helm template ${SCRIPT_ROOT}/config/charts/epp-standalone ${test_cases_epp_standalone[$key]} --output-dir="${SCRIPT_ROOT}/bin"
+  if [ $? -ne 0 ]; then
+    echo "Helm template command failed for test: $key"
+    exit 1
+  fi
+done
 

From f45b208a8b94315495e5cf88e3a277a7f4f1afb0 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Tue, 20 Jan 2026 11:58:52 -0800
Subject: [PATCH 2/9] fixd ci cd

---
 hack/verify-helm.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh
index 69ddef5a4f..ca2f1ad147 100755
--- a/hack/verify-helm.sh
+++ b/hack/verify-helm.sh
@@ -22,7 +22,7 @@ declare -A test_cases_inference_pool
 test_cases_inference_pool["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
-test_cases_inference_pool["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true"
+test_cases_inference_pool["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
 
 # Run the install command in case this script runs from a different bash
 # source (such as in the verify-all script)
@@ -50,9 +50,9 @@ done
 declare -A test_cases_epp_standalone
 
 # InferencePool Helm Chart test cases
-test_cases_epp_standalone["basic"]="--set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
-test_cases_epp_standalone["gke-provider"]="--set provider.name=gke --set inferencePool.modelServers.matchLabels.app=llm-instance-gateway"
-test_cases_epp_standalone["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true"
+test_cases_epp_standalone["basic"]="--set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["gke-provider"]="--set provider.name=gke --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
+test_cases_epp_standalone["latency-predictor"]="--set inferenceExtension.latencyPredictor.enabled=true --set inferenceExtension.endpointsServer.endpointSelector='app=llm-instance-gateway'"
 
 
 echo "Building dependencies for epp-standalone chart..."

From 4cf13ac5a99af794242c4074ddca1ae60e554e04 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Tue, 20 Jan 2026 13:02:27 -0800
Subject: [PATCH 3/9] added existance check

---
 config/charts/inference-extension/templates/_gke.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/charts/inference-extension/templates/_gke.yaml b/config/charts/inference-extension/templates/_gke.yaml
index 9a19e7597a..506f66e35a 100644
--- a/config/charts/inference-extension/templates/_gke.yaml
+++ b/config/charts/inference-extension/templates/_gke.yaml
@@ -1,5 +1,5 @@
 {{- define "inference-extension.gke" -}}
-{{- if eq (lower .Values.provider.name) "gke" }}
+{{- if and .Values.provider (eq (lower .Values.provider.name) "gke") }}
 {{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
 {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
 {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}

From ac0e7cbaca96b0229f8dc5d7184a33a6622ea191 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Tue, 20 Jan 2026 13:04:07 -0800
Subject: [PATCH 4/9] added existence check

---
 config/charts/epp-standalone/values.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
index 3cb10dd5ec..4bdc52e97a 100644
--- a/config/charts/epp-standalone/values.yaml
+++ b/config/charts/epp-standalone/values.yaml
@@ -296,3 +296,13 @@ inferenceExtension:
 
   latencyPredictor:
     enabled: false
+
+# Options: ["gke"]
+provider:
+  name: none
+
+  # GKE-specific configuration.
+  # This block is only used if name is "gke".
+  gke:
+    # Set to true if the cluster is an Autopilot cluster.
+    autopilot: false
\ No newline at end of file

From 65712815c03313cc9e0e90bc6dc8e3ef29a7ed04 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Tue, 20 Jan 2026 13:25:37 -0800
Subject: [PATCH 5/9] added latency predictor value

---
 config/charts/epp-standalone/values.yaml | 83 +++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 1 deletion(-)

diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
index 4bdc52e97a..5b8cee153f 100644
--- a/config/charts/epp-standalone/values.yaml
+++ b/config/charts/epp-standalone/values.yaml
@@ -297,6 +297,86 @@ inferenceExtension:
   latencyPredictor:
     enabled: false
 
+    # Training Server Configuration
+    trainingServer:
+      image:
+        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+        name: latencypredictor-training-server
+        tag: latest
+        pullPolicy: Always
+      port: 8000
+      resources:
+        requests:
+          cpu: "2000m"
+          memory: "4Gi"
+        limits:
+          cpu: "4000m"
+          memory: "8Gi"
+      livenessProbe:
+        httpGet:
+          path: /healthz
+          port: 8000
+        initialDelaySeconds: 30
+        periodSeconds: 20
+      readinessProbe:
+        httpGet:
+          path: /readyz
+          port: 8000
+        initialDelaySeconds: 45
+        periodSeconds: 10
+      volumeSize: "20Gi"
+      config:
+        LATENCY_RETRAINING_INTERVAL_SEC: "1"
+        LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
+        LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
+        LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
+        LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
+        LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
+        LATENCY_MODEL_TYPE: "xgboost"
+        LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
+        LATENCY_QUANTILE_ALPHA: "0.9"
+
+    # Prediction Server Configuration
+    predictionServers:
+      count: 10
+      startPort: 8001
+      image:
+        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+        name: latencypredictor-prediction-server
+        tag: latest
+        pullPolicy: Always
+      resources:
+        requests:
+          cpu: "500m"
+          memory: "1Gi"
+        limits:
+          cpu: "1000m"
+          memory: "2Gi"
+      livenessProbe:
+        httpGet:
+          path: /healthz
+        initialDelaySeconds: 15
+        periodSeconds: 15
+      readinessProbe:
+        httpGet:
+          path: /readyz
+        initialDelaySeconds: 10
+        periodSeconds: 5
+        failureThreshold: 10
+      volumeSize: "10Gi"
+      config:
+        LATENCY_MODEL_TYPE: "xgboost"
+        PREDICT_HOST: "0.0.0.0"
+        LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
+        LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
+        LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
+        LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
+
+    # EPP Environment Variables for Latency Predictor
+    eppEnv:
+      LATENCY_MAX_SAMPLE_SIZE: "10000"
+
+
 # Options: ["gke"]
 provider:
   name: none
@@ -305,4 +385,5 @@ provider:
   # This block is only used if name is "gke".
   gke:
     # Set to true if the cluster is an Autopilot cluster.
-    autopilot: false
\ No newline at end of file
+    autopilot: false
+

From b4207d54648fa4c94b80f6b7395e8d860a60b42c Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Wed, 21 Jan 2026 10:26:44 -0800
Subject: [PATCH 6/9] added prediction value.yaml

---
 config/charts/epp-standalone/values.yaml      | 80 ------------------
 config/charts/inference-extension/values.yaml | 80 ++++++++++++++++++
 config/charts/inferencepool/Chart.yaml        |  1 +
 config/charts/inferencepool/values.yaml       | 81 +------------------
 4 files changed, 82 insertions(+), 160 deletions(-)
 create mode 100644 config/charts/inference-extension/values.yaml

diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
index 5b8cee153f..7b3af858a2 100644
--- a/config/charts/epp-standalone/values.yaml
+++ b/config/charts/epp-standalone/values.yaml
@@ -297,86 +297,6 @@ inferenceExtension:
   latencyPredictor:
     enabled: false
 
-    # Training Server Configuration
-    trainingServer:
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-training-server
-        tag: latest
-        pullPolicy: Always
-      port: 8000
-      resources:
-        requests:
-          cpu: "2000m"
-          memory: "4Gi"
-        limits:
-          cpu: "4000m"
-          memory: "8Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-          port: 8000
-        initialDelaySeconds: 30
-        periodSeconds: 20
-      readinessProbe:
-        httpGet:
-          path: /readyz
-          port: 8000
-        initialDelaySeconds: 45
-        periodSeconds: 10
-      volumeSize: "20Gi"
-      config:
-        LATENCY_RETRAINING_INTERVAL_SEC: "1"
-        LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
-        LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
-        LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
-        LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
-        LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
-        LATENCY_MODEL_TYPE: "xgboost"
-        LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
-        LATENCY_QUANTILE_ALPHA: "0.9"
-
-    # Prediction Server Configuration
-    predictionServers:
-      count: 10
-      startPort: 8001
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-prediction-server
-        tag: latest
-        pullPolicy: Always
-      resources:
-        requests:
-          cpu: "500m"
-          memory: "1Gi"
-        limits:
-          cpu: "1000m"
-          memory: "2Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-        initialDelaySeconds: 15
-        periodSeconds: 15
-      readinessProbe:
-        httpGet:
-          path: /readyz
-        initialDelaySeconds: 10
-        periodSeconds: 5
-        failureThreshold: 10
-      volumeSize: "10Gi"
-      config:
-        LATENCY_MODEL_TYPE: "xgboost"
-        PREDICT_HOST: "0.0.0.0"
-        LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
-        LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
-        LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
-        LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
-
-    # EPP Environment Variables for Latency Predictor
-    eppEnv:
-      LATENCY_MAX_SAMPLE_SIZE: "10000"
-
-
 # Options: ["gke"]
 provider:
   name: none
diff --git a/config/charts/inference-extension/values.yaml b/config/charts/inference-extension/values.yaml
new file mode 100644
index 0000000000..1803829ce7
--- /dev/null
+++ b/config/charts/inference-extension/values.yaml
@@ -0,0 +1,80 @@
+latencyPredictor:
+  enabled: false
+  # Training Server Configuration
+  trainingServer:
+    image:
+      hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+      name: latencypredictor-training-server
+      tag: latest
+      pullPolicy: Always
+    port: 8000
+    resources:
+      requests:
+        cpu: "2000m"
+        memory: "4Gi"
+      limits:
+        cpu: "4000m"
+        memory: "8Gi"
+    livenessProbe:
+      httpGet:
+        path: /healthz
+        port: 8000
+      initialDelaySeconds: 30
+      periodSeconds: 20
+    readinessProbe:
+      httpGet:
+        path: /readyz
+        port: 8000
+      initialDelaySeconds: 45
+      periodSeconds: 10
+    volumeSize: "20Gi"
+    config:
+      LATENCY_RETRAINING_INTERVAL_SEC: "1"
+      LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
+      LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
+      LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
+      LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
+      LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
+      LATENCY_MODEL_TYPE: "xgboost"
+      LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
+      LATENCY_QUANTILE_ALPHA: "0.9"
+
+  # Prediction Server Configuration
+  predictionServers:
+    count: 10
+    startPort: 8001
+    image:
+      hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
+      name: latencypredictor-prediction-server
+      tag: latest
+      pullPolicy: Always
+    resources:
+      requests:
+        cpu: "500m"
+        memory: "1Gi"
+      limits:
+        cpu: "1000m"
+        memory: "2Gi"
+    livenessProbe:
+      httpGet:
+        path: /healthz
+      initialDelaySeconds: 15
+      periodSeconds: 15
+    readinessProbe:
+      httpGet:
+        path: /readyz
+      initialDelaySeconds: 10
+      periodSeconds: 5
+      failureThreshold: 10
+    volumeSize: "10Gi"
+    config:
+      LATENCY_MODEL_TYPE: "xgboost"
+      PREDICT_HOST: "0.0.0.0"
+      LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
+      LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
+      LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
+      LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
+
+  # EPP Environment Variables for Latency Predictor
+  eppEnv:
+    LATENCY_MAX_SAMPLE_SIZE: "10000"
diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml
index f6eadc3e21..815ba0f2df 100644
--- a/config/charts/inferencepool/Chart.yaml
+++ b/config/charts/inferencepool/Chart.yaml
@@ -11,3 +11,4 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    alias: inferenceExtension
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index 1803e2cbaa..da0488e6cf 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -69,86 +69,7 @@ inferenceExtension:
 
   # Latency Predictor Configuration
   latencyPredictor:
-    enabled: false
-
-    # Training Server Configuration
-    trainingServer:
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-training-server
-        tag: latest
-        pullPolicy: Always
-      port: 8000
-      resources:
-        requests:
-          cpu: "2000m"
-          memory: "4Gi"
-        limits:
-          cpu: "4000m"
-          memory: "8Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-          port: 8000
-        initialDelaySeconds: 30
-        periodSeconds: 20
-      readinessProbe:
-        httpGet:
-          path: /readyz
-          port: 8000
-        initialDelaySeconds: 45
-        periodSeconds: 10
-      volumeSize: "20Gi"
-      config:
-        LATENCY_RETRAINING_INTERVAL_SEC: "1"
-        LATENCY_MIN_SAMPLES_FOR_RETRAIN: "100"
-        LATENCY_TTFT_MODEL_PATH: "/models/ttft.joblib"
-        LATENCY_TPOT_MODEL_PATH: "/models/tpot.joblib"
-        LATENCY_TTFT_SCALER_PATH: "/models/ttft_scaler.joblib"
-        LATENCY_TPOT_SCALER_PATH: "/models/tpot_scaler.joblib"
-        LATENCY_MODEL_TYPE: "xgboost"
-        LATENCY_MAX_TRAINING_DATA_SIZE_PER_BUCKET: "5000"
-        LATENCY_QUANTILE_ALPHA: "0.9"
-
-    # Prediction Server Configuration
-    predictionServers:
-      count: 10
-      startPort: 8001
-      image:
-        hub: path/to/your/docker/repo # NOTE: Update with your Docker repository path for sidecars
-        name: latencypredictor-prediction-server
-        tag: latest
-        pullPolicy: Always
-      resources:
-        requests:
-          cpu: "500m"
-          memory: "1Gi"
-        limits:
-          cpu: "1000m"
-          memory: "2Gi"
-      livenessProbe:
-        httpGet:
-          path: /healthz
-        initialDelaySeconds: 15
-        periodSeconds: 15
-      readinessProbe:
-        httpGet:
-          path: /readyz
-        initialDelaySeconds: 10
-        periodSeconds: 5
-        failureThreshold: 10
-      volumeSize: "10Gi"
-      config:
-        LATENCY_MODEL_TYPE: "xgboost"
-        PREDICT_HOST: "0.0.0.0"
-        LOCAL_TTFT_MODEL_PATH: "/server_models/ttft.joblib"
-        LOCAL_TPOT_MODEL_PATH: "/server_models/tpot.joblib"
-        LOCAL_TTFT_SCALER_PATH: "/server_models/ttft_scaler.joblib"
-        LOCAL_TPOT_SCALER_PATH: "/server_models/tpot_scaler.joblib"
-
-    # EPP Environment Variables for Latency Predictor
-    eppEnv:
-      LATENCY_MAX_SAMPLE_SIZE: "10000"
+    enabled: true
 
 inferencePool:
   targetPorts:

From b2ba12adfcfffc2e7dafce719c0302d07fc3d692 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Wed, 21 Jan 2026 10:28:23 -0800
Subject: [PATCH 7/9] added comments

---
 config/charts/epp-standalone/values.yaml | 1 +
 config/charts/inferencepool/Chart.yaml   | 1 +
 config/charts/inferencepool/values.yaml  | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
index 7b3af858a2..9ab4b831bd 100644
--- a/config/charts/epp-standalone/values.yaml
+++ b/config/charts/epp-standalone/values.yaml
@@ -295,6 +295,7 @@ inferenceExtension:
     enabled: false
 
   latencyPredictor:
+    # common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
     enabled: false
 
 # Options: ["gke"]
diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml
index 815ba0f2df..55eb48c047 100644
--- a/config/charts/inferencepool/Chart.yaml
+++ b/config/charts/inferencepool/Chart.yaml
@@ -11,4 +11,5 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    # This is needed to make use of the common values.yaml in ./config/charts/inference-extension/values.yaml
     alias: inferenceExtension
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index da0488e6cf..92d95ca29a 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -69,7 +69,8 @@ inferenceExtension:
 
   # Latency Predictor Configuration
   latencyPredictor:
-    enabled: true
+    # common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
+    enabled: false
 
 inferencePool:
   targetPorts:

From 974aa3ba4f48342828b29fe357493623e87aa831 Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Wed, 21 Jan 2026 10:29:24 -0800
Subject: [PATCH 8/9] added alias in epp-standalone

---
 config/charts/epp-standalone/Chart.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml
index 07f84dc1f5..37ccc341e8 100644
--- a/config/charts/epp-standalone/Chart.yaml
+++ b/config/charts/epp-standalone/Chart.yaml
@@ -12,3 +12,4 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    alias: inferenceExtension

From b7c0e8a65cc919bcf969f7d36ee316bb78586fcf Mon Sep 17 00:00:00 2001
From: Xiyue Yu <xiyue@google.com>
Date: Wed, 21 Jan 2026 10:30:59 -0800
Subject: [PATCH 9/9] added comment

---
 config/charts/epp-standalone/Chart.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml
index 37ccc341e8..1d88d71c1c 100644
--- a/config/charts/epp-standalone/Chart.yaml
+++ b/config/charts/epp-standalone/Chart.yaml
@@ -12,4 +12,5 @@ dependencies:
   - name: inference-extension
     version: 0.0.0
     repository: "file://../inference-extension"
+    # This is needed to make use of the common values.yaml in ./config/charts/inference-extension/values.yaml
     alias: inferenceExtension