diff --git a/.gitignore b/.gitignore
index 18a475b5e7..7e9674135f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,9 @@ site
 
 # MacOS generated files
 **/.DS_Store
+
+# Ignore all Chart.lock files anywhere under config/charts
+config/charts/**/Chart.lock
+
+# Ignore all .tgz files anywhere under config/charts
+config/charts/**/*.tgz
diff --git a/config/charts/epp-standalone/.helmignore b/config/charts/epp-standalone/.helmignore
new file mode 100644
index 0000000000..0e8a0eb36f
--- /dev/null
+++ b/config/charts/epp-standalone/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml
new file mode 100644
index 0000000000..07f84dc1f5
--- /dev/null
+++ b/config/charts/epp-standalone/Chart.yaml
@@ -0,0 +1,14 @@
+apiVersion: v2
+name: epp-standalone
+description: A Helm chart for Endpoint Picker
+
+type: application
+
+version: 0.0.0
+
+appVersion: "0.0.0"
+
+dependencies:
+  - name: inference-extension
+    version: 0.0.0
+    repository: "file://../inference-extension"
diff --git a/config/charts/epp-standalone/templates/epp-config.yaml b/config/charts/epp-standalone/templates/epp-config.yaml
new file mode 100644
index 0000000000..c7295a5d54
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-config.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.config" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-deployment.yaml b/config/charts/epp-standalone/templates/epp-deployment.yaml
new file mode 100644
index 0000000000..4eaba71d28
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-deployment.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.deployment" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-gke.yaml b/config/charts/epp-standalone/templates/epp-gke.yaml
new file mode 100644
index 0000000000..ee54a8469c
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-gke.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.gke" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-leader-election-rbac.yaml b/config/charts/epp-standalone/templates/epp-leader-election-rbac.yaml
new file mode 100644
index 0000000000..6820306788
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-leader-election-rbac.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.lead-election-rbac" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-rbac.yaml b/config/charts/epp-standalone/templates/epp-rbac.yaml
new file mode 100644
index 0000000000..6c99c9e56d
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-rbac.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.rbac" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-sa-token-secret.yaml b/config/charts/epp-standalone/templates/epp-sa-token-secret.yaml
new file mode 100644
index 0000000000..ec13d9dce8
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-sa-token-secret.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.sa-token-secret" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-service-monitor.yaml b/config/charts/epp-standalone/templates/epp-service-monitor.yaml
new file mode 100644
index 0000000000..2e5f7a0d3e
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-service-monitor.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.service-monitor" . -}}
diff --git a/config/charts/epp-standalone/templates/epp-service.yaml b/config/charts/epp-standalone/templates/epp-service.yaml
new file mode 100644
index 0000000000..fb6ab40573
--- /dev/null
+++ b/config/charts/epp-standalone/templates/epp-service.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.service" . -}}
diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml
new file mode 100644
index 0000000000..3cb10dd5ec
--- /dev/null
+++ b/config/charts/epp-standalone/values.yaml
@@ -0,0 +1,298 @@
+inferenceExtension:
+  replicas: 1
+  image:
+    name: epp
+    hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
+    tag: main
+    pullPolicy: Always
+  extProcPort: 9002
+  extraServicePorts:
+    - name: http
+      port: 8081
+      protocol: TCP
+      targetPort: 8081
+  env: []
+  pluginsConfigFile: "default-plugins.yaml"
+
+  endpointsServer:
+    standalone: true
+    # Required when standalone is true
+    #    endpointSelector: app=vllm-llama3-8b-instruct
+    targetPorts: 8000
+    modelServerType: vllm # vllm, triton-tensorrt-llm
+
+
+  sidecar:
+    enabled: true
+    configMap:
+      name: envoy
+      # Because the template just dumps this section, the keys become filenames.
+      # The values MUST be strings (note the literal block scalar '|')
+      data:
+        envoy.yaml: |
+          admin:
+            address:
+              socket_address:
+                address: 127.0.0.1
+                port_value: 19000
+            access_log:
+              - name: envoy.access_loggers.file
+                typed_config:
+                  "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                  path: /dev/null
+          static_resources:
+            listeners:
+              - name: envoy-proxy-ready-0.0.0.0-19001
+                address:
+                  socket_address:
+                    address: 0.0.0.0
+                    port_value: 19001
+                filter_chains:
+                  - filters:
+                      - name: envoy.filters.network.http_connection_manager
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                          stat_prefix: envoy-ready-http
+                          route_config:
+                            name: local_route
+                            virtual_hosts:
+                              - name: prometheus_stats
+                                domains: ["*"]
+                                routes:
+                                  - match:
+                                      prefix: "/stats/prometheus"
+                                    route:
+                                      cluster: "prometheus_stats"
+                          http_filters:
+                            - name: envoy.filters.http.health_check
+                              typed_config:
+                                "@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck
+                                pass_through_mode: false
+                                headers:
+                                  - name: ":path"
+                                    string_match:
+                                      exact: "/ready"
+                            - name: envoy.filters.http.router
+                              typed_config:
+                                "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+              - name: vllm
+                address:
+                  socket_address:
+                    address: 0.0.0.0
+                    port_value: 8081
+                per_connection_buffer_limit_bytes: 32768
+                access_log:
+                  - name: envoy.access_loggers.file
+                    filter:
+                      response_flag_filter:
+                        flags: ["NR"]
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                      path: /dev/stdout
+                      log_format:
+                        text_format_source:
+                          inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+                filter_chains:
+                  - name: vllm
+                    filters:
+                      - name: envoy.filters.network.http_connection_manager
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+                          stat_prefix: http-8081
+                          route_config:
+                            name: vllm
+                            virtual_hosts:
+                              - name: vllm-default
+                                domains: ["*"]
+                                routes:
+                                  - match:
+                                      prefix: "/"
+                                    route:
+                                      cluster: original_destination_cluster
+                                      timeout: 86400s
+                                      idle_timeout: 86400s
+                                      upgrade_configs:
+                                        - upgrade_type: websocket
+                                    typed_per_filter_config:
+                                      envoy.filters.http.ext_proc:
+                                        "@type": type.googleapis.com/envoy.config.route.v3.FilterConfig
+                                        config: {}
+                          http_filters:
+                            - name: envoy.filters.http.ext_proc
+                              typed_config:
+                                "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+                                grpc_service:
+                                  envoy_grpc:
+                                    cluster_name: ext_proc
+                                    authority: localhost:9002
+                                  timeout: 10s
+                                processing_mode:
+                                  request_header_mode: SEND
+                                  response_header_mode: SEND
+                                  request_body_mode: FULL_DUPLEX_STREAMED
+                                  response_body_mode: FULL_DUPLEX_STREAMED
+                                  request_trailer_mode: SEND
+                                  response_trailer_mode: SEND
+                                message_timeout: 1000s
+                            - name: envoy.filters.http.router
+                              typed_config:
+                                "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+                                suppress_envoy_headers: true
+                          http2_protocol_options:
+                            max_concurrent_streams: 100
+                            initial_stream_window_size: 65536
+                            initial_connection_window_size: 1048576
+                          use_remote_address: true
+                          normalize_path: true
+                          merge_slashes: true
+                          server_header_transformation: PASS_THROUGH
+                          common_http_protocol_options:
+                            headers_with_underscores_action: REJECT_REQUEST
+                          path_with_escaped_slashes_action: UNESCAPE_AND_REDIRECT
+                          access_log:
+                            - name: envoy.access_loggers.file
+                              typed_config:
+                                "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+                                path: /dev/stdout
+                                log_format:
+                                  text_format_source:
+                                    inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n"
+            clusters:
+              - name: prometheus_stats
+                type: STATIC
+                connect_timeout: 0.250s
+                load_assignment:
+                  cluster_name: prometheus_stats
+                  endpoints:
+                    - lb_endpoints:
+                        - endpoint:
+                            address:
+                              socket_address:
+                                address: 127.0.0.1
+                                port_value: 19000
+              - name: original_destination_cluster
+                type: ORIGINAL_DST
+                connect_timeout: 1000s
+                lb_policy: CLUSTER_PROVIDED
+                circuit_breakers:
+                  thresholds:
+                    - max_connections: 40000
+                      max_pending_requests: 40000
+                      max_requests: 40000
+                original_dst_lb_config:
+                  use_http_header: true
+                  http_header_name: x-gateway-destination-endpoint
+              - name: ext_proc
+                type: STATIC
+                connect_timeout: 86400s
+                lb_policy: LEAST_REQUEST
+                circuit_breakers:
+                  thresholds:
+                    - max_connections: 40000
+                      max_pending_requests: 40000
+                      max_requests: 40000
+                      max_retries: 1024
+                health_checks:
+                  - timeout: 2s
+                    interval: 10s
+                    unhealthy_threshold: 3
+                    healthy_threshold: 2
+                    reuse_connection: true
+                    grpc_health_check:
+                      service_name: "envoy.service.ext_proc.v3.ExternalProcessor"
+                    tls_options:
+                      alpn_protocols: ["h2"]
+                transport_socket:
+                  name: "envoy.transport_sockets.tls"
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+                    common_tls_context:
+                      validation_context:
+                typed_extension_protocol_options:
+                  envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+                    "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+                    explicit_http_config:
+                      http2_protocol_options:
+                        initial_stream_window_size: 65536
+                        initial_connection_window_size: 1048576
+                load_assignment:
+                  cluster_name: ext_proc
+                  endpoints:
+                    - locality:
+                        region: ext_proc/e2e/0
+                      lb_endpoints:
+                        - endpoint:
+                            address:
+                              socket_address:
+                                address: 127.0.0.1
+                                port_value: 9002
+                          load_balancing_weight: 1
+    name: envoy-sidecar
+    image: docker.io/envoyproxy/envoy:distroless-v1.33.2
+    command: "envoy"
+    args:
+      - "--service-node"
+      - "envoy-sidecar"
+      - "--log-level"
+      - "trace"
+      - "--cpuset-threads"
+      - "--drain-strategy"
+      - "immediate"
+      - "--drain-time-s"
+      - "60"
+      - "-c"
+      - "/etc/envoy/envoy.yaml"
+    env:
+      - name: NS_NAME
+        valueFrom:
+          fieldRef:
+            fieldPath: metadata.namespace
+      - name: POD_NAME
+        valueFrom:
+          fieldRef:
+            fieldPath: metadata.name
+    ports:
+      - containerPort: 8081
+        name: http-8081
+      - containerPort: 19001
+        name: metrics-19001
+    resources:
+      requests:
+        cpu: 100m
+        memory: 512Mi
+
+    readinessProbe:
+      failureThreshold: 1
+      httpGet:
+        path: /ready
+        port: 19001
+        scheme: HTTP
+      periodSeconds: 5
+      successThreshold: 1
+      timeoutSeconds: 1
+
+    volumeMounts:
+      - name: config
+        mountPath: /etc/envoy
+        readOnly: true
+    volumes:
+      - name: config
+        configMap:
+          name: envoy
+          items:
+            - key: envoy.yaml
+              path: envoy.yaml
+  monitoring:
+    interval: "10s"
+    # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
+    prometheus:
+      enabled: false
+      auth:
+        # To allow unauthenticated /metrics access (e.g., for debugging with curl), set to false
+        enabled: true
+
+  tracing:
+    enabled: false
+
+  latencyPredictor:
+    enabled: false
diff --git a/config/charts/inference-extension/.helmignore b/config/charts/inference-extension/.helmignore
new file mode 100644
index 0000000000..0e8a0eb36f
--- /dev/null
+++ b/config/charts/inference-extension/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/config/charts/inference-extension/Chart.yaml b/config/charts/inference-extension/Chart.yaml
new file mode 100644
index 0000000000..f4a7359ba7
--- /dev/null
+++ b/config/charts/inference-extension/Chart.yaml
@@ -0,0 +1,10 @@
+apiVersion: v2
+name: inference-extension
+description: A library Helm chart for Endpoint Picker
+
+type: library
+
+version: 0.0.0
+
+appVersion: "0.0.0"
+
diff --git a/config/charts/inference-extension/templates/_config.yaml b/config/charts/inference-extension/templates/_config.yaml
new file mode 100644
index 0000000000..797d810014
--- /dev/null
+++ b/config/charts/inference-extension/templates/_config.yaml
@@ -0,0 +1,91 @@
+{{- define "inference-extension.config" -}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+data:
+  default-plugins.yaml: |
+    apiVersion: inference.networking.x-k8s.io/v1alpha1
+    kind: EndpointPickerConfig
+    plugins:
+    - type: queue-scorer
+    - type: kv-cache-utilization-scorer
+    - type: prefix-cache-scorer
+    {{- if .Values.inferenceExtension.latencyPredictor.enabled }}
+    - type: predicted-latency-scorer
+      parameters:
+        {{- with .Values.inferenceExtension.latencyPredictor.sloAwareRouting | default dict }}
+        samplingMean: {{ .samplingMean | default 1000.0 }}
+        maxSampledTokens: {{ .maxSampledTokens | default 20 }}
+        sloBufferFactor: {{ .sloBufferFactor | default 1.0 }}
+        negHeadroomTTFTWeight: {{ .negHeadroomTTFTWeight | default 0.8 }}
+        negHeadroomTPOTWeight: {{ .negHeadroomTPOTWeight | default 0.2 }}
+        headroomTTFTWeight: {{ .headroomTTFTWeight | default 0.8 }}
+        headroomTPOTWeight: {{ .headroomTPOTWeight | default 0.2 }}
+        headroomSelectionStrategy: {{ .headroomSelectionStrategy | default "least" | quote }}
+        compositeKVWeight: {{ .compositeKVWeight | default 1.0 }}
+        compositeQueueWeight: {{ .compositeQueueWeight | default 1.0 }}
+        compositePrefixWeight: {{ .compositePrefixWeight | default 1.0 }}
+        epsilonExploreSticky: {{ .epsilonExploreSticky | default 0.01 }}
+        epsilonExploreNeg: {{ .epsilonExploreNeg | default 0.01 }}
+        affinityGateTau: {{ .affinityGateTau | default 0.80 }}
+        affinityGateTauGlobal: {{ .affinityGateTauGlobal | default 0.99 }}
+        selectionMode: {{ .selectionMode | default "linear" | quote }}
+        {{- end }}
+    {{- end }}
+    schedulingProfiles:
+    {{- if .Values.inferenceExtension.latencyPredictor.enabled }}
+    - name: default
+      plugins:
+      - pluginRef: predicted-latency-scorer
+    featureGates:
+      - prepareDataPlugins
+    {{- else }}
+    - name: default
+      plugins:
+      - pluginRef: queue-scorer
+        weight: 2
+      - pluginRef: kv-cache-utilization-scorer
+        weight: 2
+      - pluginRef: prefix-cache-scorer
+        weight: 3
+    {{- end }}
+  {{- if (hasKey .Values.inferenceExtension "pluginsCustomConfig") }}
+  {{- .Values.inferenceExtension.pluginsCustomConfig | toYaml | nindent 2 }}
+  {{- end }}
+  
+---
+{{- if and .Values.inferenceExtension.sidecar.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.inferenceExtension.sidecar.configMap.name }}
+  namespace: {{ .Release.Namespace }}
+data:
+  {{- .Values.inferenceExtension.sidecar.configMap.data | toYaml | nindent 2 }}
+{{- end }}
+---
+{{- if .Values.inferenceExtension.latencyPredictor.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training
+  namespace: {{ .Release.Namespace }}
+data:
+  {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.trainingServer.config }}
+  {{ $key }}: {{ $value | quote }}
+{{- end }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-prediction
+  namespace: {{ .Release.Namespace }}
+data:
+  {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.predictionServers.config }}
+  {{ $key }}: {{ $value | quote }}
+  {{- end }}
+{{- end }}
+---
+{{- end -}}
diff --git a/config/charts/inference-extension/templates/_deployment.yaml b/config/charts/inference-extension/templates/_deployment.yaml
new file mode 100644
index 0000000000..522c6ddf37
--- /dev/null
+++ b/config/charts/inference-extension/templates/_deployment.yaml
@@ -0,0 +1,223 @@
+{{- define "inference-extension.deployment" -}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
+  strategy:
+    # The current recommended EPP deployment pattern is to have a single active replica. This ensures
+    # optimal performance of the stateful operations such prefix cache aware scorer.
+    # The Recreate strategy the old replica is killed immediately, and allow the new replica(s) to
+    # quickly take over. This is particularly important in the high availability set up with leader
+    # election, as the rolling update strategy would prevent the old leader being killed because
+    # otherwise the maxUnavailable would be 100%.
+    type: Recreate
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
+      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
+      terminationGracePeriodSeconds: 130
+      containers:
+      {{- if .Values.inferenceExtension.sidecar.enabled }}
+        - name: {{ .Values.inferenceExtension.sidecar.name }}
+          image: {{ .Values.inferenceExtension.sidecar.image }}
+          imagePullPolicy: {{ .Values.inferenceExtension.sidecar.imagePullPolicy | default "IfNotPresent" }}
+        {{- with .Values.inferenceExtension.sidecar.command }}
+          command:
+            - {{ . | quote }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.args }}
+          args:
+          {{- range . }}
+            - {{ . | quote }}
+          {{- end }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.env }}
+          env:
+          {{- toYaml . | nindent 10 }}
+          {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.ports }}
+          ports:
+          {{- toYaml . | nindent 10 }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.livenessProbe }}
+          livenessProbe:
+          {{- toYaml . | nindent 12 }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.readinessProbe }}
+          readinessProbe:
+          {{- toYaml . | nindent 12 }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.resources }}
+          resources:
+          {{- toYaml . | nindent 12 }}
+        {{- end }}
+        {{- with .Values.inferenceExtension.sidecar.volumeMounts }}
+          volumeMounts:
+          {{- toYaml . | nindent 12 }}
+        {{- end }}
+        {{- end }}
+        - name: epp
+          image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+          imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "IfNotPresent" }}
+          args:
+          {{- /* 1. Determine Model Server Type Logic */ -}}
+          {{- $modelServerType := "vllm" }}
+          {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
+            {{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }}
+          {{- else }}
+            {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }}
+          {{- end }}
+          {{- /* 2. Mode Specific Flags */ -}}
+          {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone }}
+              - --endpoint-selector
+              - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }}
+              - --endpoint-target-ports
+              - {{ .Values.inferenceExtension.endpointsServer.targetPorts | quote }}
+          {{- else }}
+              - --pool-name
+              - {{ .Release.Name }}
+              # The pool namespace is optional because EPP can default to the NAMESPACE env var.
+              - --pool-namespace
+              - {{ .Release.Namespace }}
+          {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }}
+              - --pool-group
+              - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"
+          {{- end }}
+          {{- end }}
+          {{- if eq $modelServerType "triton-tensorrt-llm" }}
+              - --total-queued-requests-metric
+              - "nv_trt_llm_request_metrics{request_type=waiting}"
+              - --kv-cache-usage-percentage-metric
+              - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
+              - --lora-info-metric
+              - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
+          {{- end }}
+              - --zap-encoder
+              - "json"
+              - --config-file
+              - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
+          {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+              - --ha-enable-leader-election
+          {{- end }}
+              # Pass additional flags via the inferenceExtension.flags field in values.yaml.
+          {{- range $key, $value := .Values.inferenceExtension.flags }}
+              - --{{ $key }}
+              - "{{ $value }}"
+          {{- end }}
+          {{- if .Values.inferenceExtension.tracing.enabled }}
+              - --tracing=true
+          {{- else }}
+              - --tracing=false
+          {{- end }}
+          {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
+              - --metrics-endpoint-auth=false
+          {{- end }}
+          ports:
+            - name: grpc
+              containerPort: 9002
+            - name: grpc-health
+              containerPort: 9003
+            - name: metrics
+              containerPort: 9090
+        {{- if .Values.inferenceExtension.extraContainerPorts }}
+        {{- toYaml .Values.inferenceExtension.extraContainerPorts | nindent 8 }}
+        {{- end }}
+          livenessProbe:
+          {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+            grpc:
+              port: 9003
+              service: liveness
+          {{- else }}
+            grpc:
+              port: 9003
+              service: inference-extension
+          {{- end }}
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          readinessProbe:
+          {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
+            grpc:
+              port: 9003
+              service: readiness
+          {{- else }}
+            grpc:
+              port: 9003
+              service: inference-extension
+          {{- end }}
+            periodSeconds: 2
+          env:
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+        {{- include "gateway-api-inference-extension.latencyPredictor.env" . | nindent 8 }}
+        {{- if .Values.inferenceExtension.tracing.enabled }}
+            - name: OTEL_SERVICE_NAME
+              value: "gateway-api-inference-extension"
+            - name: OTEL_EXPORTER_OTLP_ENDPOINT
+              value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | quote }}
+            - name: OTEL_TRACES_EXPORTER
+              value: "otlp"
+            - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: spec.nodeName
+            - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
+              valueFrom:
+                fieldRef:
+                  apiVersion: v1
+                  fieldPath: metadata.name
+            - name: OTEL_RESOURCE_ATTRIBUTES
+              value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
+            - name: OTEL_TRACES_SAMPLER
+              value: {{ .Values.inferenceExtension.tracing.sampling.sampler | quote }}
+            - name: OTEL_TRACES_SAMPLER_ARG
+              value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | quote }}
+        {{- end }}
+        {{- if .Values.inferenceExtension.env }}
+        {{- toYaml .Values.inferenceExtension.env | nindent 8 }}
+        {{- end }}
+          volumeMounts:
+            - name: plugins-config-volume
+              mountPath: "/config"
+        {{- if .Values.inferenceExtension.volumeMounts }}
+        {{- toYaml .Values.inferenceExtension.volumeMounts | nindent 8 }}
+        {{- end }}
+      {{- include "gateway-api-inference-extension.latencyPredictor.containers" . | nindent 6 }}
+      volumes:
+      {{- if .Values.inferenceExtension.volumes }}
+      {{- toYaml .Values.inferenceExtension.volumes | nindent 6 }}
+      {{- end }}
+      {{- if .Values.inferenceExtension.sidecar.volumes }}
+      {{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }}
+      {{- end }}
+      - name: plugins-config-volume
+        configMap:
+          name: {{ include "gateway-api-inference-extension.name" . }}
+      {{- include "gateway-api-inference-extension.latencyPredictor.volumes" . | nindent 6 }}
+      {{- if .Values.inferenceExtension.affinity }}
+      affinity:
+        {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }}
+      {{- end }}
+      {{- if .Values.inferenceExtension.tolerations }}
+      tolerations:
+        {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
+      {{- end }}
+---
+{{- end }}
diff --git a/config/charts/inference-extension/templates/_gke.yaml b/config/charts/inference-extension/templates/_gke.yaml
new file mode 100644
index 0000000000..9a19e7597a
--- /dev/null
+++ b/config/charts/inference-extension/templates/_gke.yaml
@@ -0,0 +1,108 @@
+{{- define "inference-extension.gke" -}}
+{{- if eq (lower .Values.provider.name) "gke" }}
+{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
+{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
+{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
+{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
+{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}}
+{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}}
+{{- $gmpNamespace := "gmp-system" -}}
+{{- $isAutopilot := false -}}
+{{- with .Values.provider.gke }}
+  {{- $isAutopilot = .autopilot | default false -}}
+{{- end }}
+{{- if $isAutopilot -}}
+{{-   $gmpNamespace = "gke-gmp-system" -}}
+{{- end -}}
+{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ $metricsReadSA }}
+  namespace: {{ .Release.Namespace }}
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ $metricsReadSecretName }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+  annotations:
+    kubernetes.io/service-account.name: {{ $metricsReadSA }}
+type: kubernetes.io/service-account-token
+---
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  endpoints:
+    - port: metrics
+      scheme: http
+      interval: {{ .Values.inferenceExtension.monitoring.interval }}
+      path: /metrics
+      authorization:
+        type: Bearer
+        credentials:
+          secret:
+            name: {{ $metricsReadSecretName }}
+            key: token
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ $metricsReadRoleName }}
+rules:
+  - nonResourceURLs:
+      - /metrics
+    verbs:
+      - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ $metricsReadRoleBindingName }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ $metricsReadSA }}
+    namespace: {{ .Release.Namespace }}
+roleRef:
+  kind: ClusterRole
+  name: {{ $metricsReadRoleName }}
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ $secretReadRoleName }}
+rules:
+  - resources:
+      - secrets
+    apiGroups: [""]
+    verbs: ["get", "list", "watch"]
+    resourceNames: [{{ $metricsReadSecretName | quote }}]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ $gmpCollectorRoleBindingName }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  name: {{ $secretReadRoleName }}
+  kind: Role
+  apiGroup: rbac.authorization.k8s.io
+subjects:
+  - name: collector
+    namespace: {{ $gmpNamespace }}
+    kind: ServiceAccount
+---
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inference-extension/templates/_helpers.tpl
similarity index 72%
rename from config/charts/inferencepool/templates/_helpers.tpl
rename to config/charts/inference-extension/templates/_helpers.tpl
index fdc9b1a2b7..c8e903fcc7 100644
--- a/config/charts/inferencepool/templates/_helpers.tpl
+++ b/config/charts/inference-extension/templates/_helpers.tpl
@@ -29,5 +29,12 @@ Cluster RBAC unique name
 Selector labels
 */}}
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
+{{- /* Check if endpointsServer exists AND if standalone is true */ -}}
+{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}}
+{{- /* LOGIC FOR STANDALONE EPP MODE */ -}}
+epp: {{ include "gateway-api-inference-extension.name" . }}
+{{- else -}}
+{{- /* LOGIC FOR PARENT (INFERENCEPOOL) MODE */ -}}
 inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
+{{- end -}}
diff --git a/config/charts/inferencepool/templates/_latency-predictor.tpl b/config/charts/inference-extension/templates/_latency-predictor.tpl
similarity index 100%
rename from config/charts/inferencepool/templates/_latency-predictor.tpl
rename to config/charts/inference-extension/templates/_latency-predictor.tpl
diff --git a/config/charts/inferencepool/templates/leader-election-rbac.yaml b/config/charts/inference-extension/templates/_leader-election-rbac.yaml
similarity index 93%
rename from config/charts/inferencepool/templates/leader-election-rbac.yaml
rename to config/charts/inference-extension/templates/_leader-election-rbac.yaml
index 11b3dd5168..4d44f2f5a2 100644
--- a/config/charts/inferencepool/templates/leader-election-rbac.yaml
+++ b/config/charts/inference-extension/templates/_leader-election-rbac.yaml
@@ -1,3 +1,4 @@
+{{- define "inference-extension.lead-election-rbac" -}}
 {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
 ---
 kind: Role
@@ -27,4 +28,6 @@ roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: Role
   name: {{ include "gateway-api-inference-extension.name" . }}-leader-election
+---
+{{- end }}
 {{- end }}
diff --git a/config/charts/inference-extension/templates/_rbac.yaml b/config/charts/inference-extension/templates/_rbac.yaml
new file mode 100644
index 0000000000..0b77026048
--- /dev/null
+++ b/config/charts/inference-extension/templates/_rbac.yaml
@@ -0,0 +1,75 @@
+{{- define "inference-extension.rbac" -}}
+{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups:
+    - authentication.k8s.io
+  resources:
+    - tokenreviews
+  verbs:
+    - create
+- apiGroups:
+    - authorization.k8s.io
+  resources:
+    - subjectaccessreviews
+  verbs:
+    - create
+- nonResourceURLs:
+    - "/metrics"
+  verbs:
+    - get
+---
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
+subjects:
+- kind: ServiceAccount
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
+{{- end }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }}
+  namespace: {{ .Release.Namespace }}
+subjects:
+- kind: ServiceAccount
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }}
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+---
+{{- end }}
diff --git a/config/charts/inference-extension/templates/_sa-token-secret.yaml b/config/charts/inference-extension/templates/_sa-token-secret.yaml
new file mode 100644
index 0000000000..6c3a1ed74d
--- /dev/null
+++ b/config/charts/inference-extension/templates/_sa-token-secret.yaml
@@ -0,0 +1,15 @@
+{{- define "inference-extension.sa-token-secret" -}}
+{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+  annotations:
+    kubernetes.io/service-account.name: {{ include "gateway-api-inference-extension.name" . }}
+type: kubernetes.io/service-account-token
+---
+{{- end }}
+{{- end }}
diff --git a/config/charts/inference-extension/templates/_service.yaml b/config/charts/inference-extension/templates/_service.yaml
new file mode 100644
index 0000000000..9bc5c47714
--- /dev/null
+++ b/config/charts/inference-extension/templates/_service.yaml
@@ -0,0 +1,24 @@
+{{- define "inference-extension.service" -}}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: grpc-ext-proc
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    - name: http-metrics
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+    {{- with .Values.inferenceExtension.extraServicePorts }}
+    {{- . | toYaml | nindent 4 }}
+    {{- end }}
+  type: ClusterIP
+---
+{{- end }}
diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inference-extension/templates/_servicemonitor.yaml
similarity index 94%
rename from config/charts/inferencepool/templates/epp-servicemonitor.yaml
rename to config/charts/inference-extension/templates/_servicemonitor.yaml
index d58662cbef..17174a2938 100644
--- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml
+++ b/config/charts/inference-extension/templates/_servicemonitor.yaml
@@ -1,3 +1,4 @@
+{{- define "inference-extension.service-monitor" -}}
 {{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }}
 apiVersion: monitoring.coreos.com/v1
 kind: ServiceMonitor
@@ -27,4 +28,6 @@ spec:
   selector:
     matchLabels:
       {{- include "gateway-api-inference-extension.labels" . | nindent 6 }}
+---
+{{- end }}
 {{- end }}
diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml
index f98153c500..f6eadc3e21 100644
--- a/config/charts/inferencepool/Chart.yaml
+++ b/config/charts/inferencepool/Chart.yaml
@@ -7,3 +7,7 @@ type: application
 version: 0.0.0
 
 appVersion: "0.0.0"
+dependencies:
+  - name: inference-extension
+    version: 0.0.0
+    repository: "file://../inference-extension"
diff --git a/config/charts/inferencepool/templates/epp-config.yaml b/config/charts/inferencepool/templates/epp-config.yaml
index 6f947a9295..c7295a5d54 100644
--- a/config/charts/inferencepool/templates/epp-config.yaml
+++ b/config/charts/inferencepool/templates/epp-config.yaml
@@ -1,88 +1 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-data:
-  default-plugins.yaml: |
-    apiVersion: inference.networking.x-k8s.io/v1alpha1
-    kind: EndpointPickerConfig
-    plugins:
-    - type: queue-scorer
-    - type: kv-cache-utilization-scorer
-    - type: prefix-cache-scorer
-    {{- if .Values.inferenceExtension.latencyPredictor.enabled }}
-    - type: predicted-latency-scorer
-      parameters:
-        {{- with .Values.inferenceExtension.latencyPredictor.sloAwareRouting | default dict }}
-        samplingMean: {{ .samplingMean | default 1000.0 }}
-        maxSampledTokens: {{ .maxSampledTokens | default 20 }}
-        sloBufferFactor: {{ .sloBufferFactor | default 1.0 }}
-        negHeadroomTTFTWeight: {{ .negHeadroomTTFTWeight | default 0.8 }}
-        negHeadroomTPOTWeight: {{ .negHeadroomTPOTWeight | default 0.2 }}
-        headroomTTFTWeight: {{ .headroomTTFTWeight | default 0.8 }}
-        headroomTPOTWeight: {{ .headroomTPOTWeight | default 0.2 }}
-        headroomSelectionStrategy: {{ .headroomSelectionStrategy | default "least" | quote }}
-        compositeKVWeight: {{ .compositeKVWeight | default 1.0 }}
-        compositeQueueWeight: {{ .compositeQueueWeight | default 1.0 }}
-        compositePrefixWeight: {{ .compositePrefixWeight | default 1.0 }}
-        epsilonExploreSticky: {{ .epsilonExploreSticky | default 0.01 }}
-        epsilonExploreNeg: {{ .epsilonExploreNeg | default 0.01 }}
-        affinityGateTau: {{ .affinityGateTau | default 0.80 }}
-        affinityGateTauGlobal: {{ .affinityGateTauGlobal | default 0.99 }}
-        selectionMode: {{ .selectionMode | default "linear" | quote }}
-        {{- end }}
-    {{- end }}
-    schedulingProfiles:
-    {{- if .Values.inferenceExtension.latencyPredictor.enabled }}
-    - name: default
-      plugins:
-      - pluginRef: predicted-latency-scorer
-    featureGates:
-      - prepareDataPlugins
-    {{- else }}
-    - name: default
-      plugins:
-      - pluginRef: queue-scorer
-        weight: 2
-      - pluginRef: kv-cache-utilization-scorer
-        weight: 2
-      - pluginRef: prefix-cache-scorer
-        weight: 3
-    {{- end }}
-  {{- if (hasKey .Values.inferenceExtension "pluginsCustomConfig") }}
-  {{- .Values.inferenceExtension.pluginsCustomConfig | toYaml | nindent 2 }}
-  {{- end }}
-  
----
-{{- if and .Values.inferenceExtension.sidecar.enabled .Values.inferenceExtension.sidecar.configMapData }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}-sidecar
-  namespace: {{ .Release.Namespace }}
-data:
-  {{- .Values.inferenceExtension.sidecar.configMapData | toYaml | nindent 2 }}
-{{- end }}
----
-{{- if .Values.inferenceExtension.latencyPredictor.enabled }}
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training
-  namespace: {{ .Release.Namespace }}
-data:
-  {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.trainingServer.config }}
-  {{ $key }}: {{ $value | quote }}
-{{- end }}
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-prediction
-  namespace: {{ .Release.Namespace }}
-data:
-  {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.predictionServers.config }}
-  {{ $key }}: {{ $value | quote }}
-  {{- end }}
-{{- end }}
+{{- include "inference-extension.config" . -}}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
index bd526ec4e2..4eaba71d28 100644
--- a/config/charts/inferencepool/templates/epp-deployment.yaml
+++ b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -1,207 +1 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-spec:
-  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
-  strategy:
-    # The current recommended EPP deployment pattern is to have a single active replica. This ensures 
-    # optimal performance of the stateful operations such prefix cache aware scorer.
-    # The Recreate strategy the old replica is killed immediately, and allow the new replica(s) to 
-    # quickly take over. This is particularly important in the high availability set up with leader
-    # election, as the rolling update strategy would prevent the old leader being killed because 
-    # otherwise the maxUnavailable would be 100%.
-    type: Recreate
-  selector:
-    matchLabels:
-      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
-  template:
-    metadata:
-      labels:
-        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
-    spec:
-      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
-      # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
-      terminationGracePeriodSeconds: 130
-      containers:
-      {{- if .Values.inferenceExtension.sidecar.enabled }}
-      - name: {{ .Values.inferenceExtension.sidecar.name }}
-        image: {{ .Values.inferenceExtension.sidecar.image }}
-        imagePullPolicy: {{ .Values.inferenceExtension.sidecar.imagePullPolicy | default "IfNotPresent" }}
-        {{- with .Values.inferenceExtension.sidecar.command }}
-        command:
-          - {{ . | quote }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.args }}
-        args:
-          {{- range . }}
-              - {{ . | quote }}
-          {{- end }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.env }}
-        env:
-          {{- toYaml . | nindent 10 }}
-          {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.ports }}
-        ports:
-          {{- toYaml . | nindent 10 }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.livenessProbe }}
-        livenessProbe:
-          {{- toYaml . | nindent 10 }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.readinessProbe }}
-        readinessProbe:
-          {{- toYaml . | nindent 10 }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.resources }}
-        resources:
-          {{- toYaml . | nindent 10 }}
-        {{- end }}
-        {{- with .Values.inferenceExtension.sidecar.volumeMounts }}
-        volumeMounts:
-          {{- toYaml . | nindent 10 }}
-        {{- end }}
-        {{- end }}
-      - name: epp
-        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
-        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "IfNotPresent" }}
-        args:
-        - --pool-name
-        - {{ .Release.Name }}
-        # The pool namespace is optional because EPP can default to the NAMESPACE env var.
-        # We still keep this here so that the template works with older versions of EPP, or other
-        # distros of EPP which may not have implemented the NAMESPACE env var defaulting behavior.
-        - --pool-namespace
-        - {{ .Release.Namespace }}
-        {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }}
-        - --pool-group
-        - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"
-        {{- end }}
-        - --zap-encoder
-        - "json"
-        - --config-file
-        - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
-        {{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
-        - --total-queued-requests-metric
-        - "nv_trt_llm_request_metrics{request_type=waiting}"
-        - --kv-cache-usage-percentage-metric
-        - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}"
-        - --lora-info-metric
-        - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet.
-        {{- end }}
-        {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
-        - --ha-enable-leader-election
-        {{- end }}
-        # Pass additional flags via the inferenceExtension.flags field in values.yaml.
-        {{- range $key, $value := .Values.inferenceExtension.flags }}
-        - --{{ $key }}
-        - "{{ $value }}"
-        {{- end }}
-        {{- if .Values.inferenceExtension.tracing.enabled }}
-        - --tracing=true
-        {{- else }}
-        - --tracing=false
-        {{- end }}
-        {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
-        - --metrics-endpoint-auth=false
-        {{- end }}
-        ports:
-        - name: grpc
-          containerPort: 9002
-        - name: grpc-health
-          containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        {{- if .Values.inferenceExtension.extraContainerPorts }}
-        {{- toYaml .Values.inferenceExtension.extraContainerPorts | nindent 8 }}
-        {{- end }}
-        livenessProbe:
-          {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
-          grpc:
-            port: 9003
-            service: liveness
-          {{- else }}
-          grpc:
-            port: 9003
-            service: inference-extension
-          {{- end }}
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          {{- if gt (.Values.inferenceExtension.replicas | int) 1 }}
-          grpc:
-            port: 9003
-            service: readiness
-          {{- else }}
-          grpc:
-            port: 9003
-            service: inference-extension
-          {{- end }}
-          periodSeconds: 2
-        env:
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: POD_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        {{- include "gateway-api-inference-extension.latencyPredictor.env" . | nindent 8 }}
-        {{- if .Values.inferenceExtension.tracing.enabled }}
-        - name: OTEL_SERVICE_NAME
-          value: "gateway-api-inference-extension"
-        - name: OTEL_EXPORTER_OTLP_ENDPOINT
-          value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | quote }}
-        - name: OTEL_TRACES_EXPORTER
-          value: "otlp"
-        - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME
-          valueFrom:
-            fieldRef:
-              apiVersion: v1
-              fieldPath: spec.nodeName
-        - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME
-          valueFrom:
-            fieldRef:
-              apiVersion: v1
-              fieldPath: metadata.name
-        - name: OTEL_RESOURCE_ATTRIBUTES
-          value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)'
-        - name: OTEL_TRACES_SAMPLER
-          value: {{ .Values.inferenceExtension.tracing.sampling.sampler | quote }}
-        - name: OTEL_TRACES_SAMPLER_ARG
-          value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | quote }}
-        {{- end }}
-        {{- if .Values.inferenceExtension.env }}
-        {{- toYaml .Values.inferenceExtension.env | nindent 8 }}
-        {{- end }}
-        volumeMounts:
-        - name: plugins-config-volume
-          mountPath: "/config"
-        {{- if .Values.inferenceExtension.volumeMounts }}
-        {{- toYaml .Values.inferenceExtension.volumeMounts | nindent 8 }}
-        {{- end }}
-      {{- include "gateway-api-inference-extension.latencyPredictor.containers" . | nindent 6 }}
-      volumes:
-      {{- if .Values.inferenceExtension.volumes }}
-      {{- toYaml .Values.inferenceExtension.volumes | nindent 6 }}
-      {{- end }}
-      {{- if .Values.inferenceExtension.sidecar.volumes }}
-      {{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }}
-      {{- end }}
-      - name: plugins-config-volume
-        configMap:
-          name: {{ include "gateway-api-inference-extension.name" . }}
-      {{- include "gateway-api-inference-extension.latencyPredictor.volumes" . | nindent 6 }}
-      {{- if .Values.inferenceExtension.affinity }}
-      affinity:
-        {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }}
-      {{- end }}
-      {{- if .Values.inferenceExtension.tolerations }}
-      tolerations:
-        {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }}
-      {{- end }}
+{{- include "inference-extension.deployment" . -}}
diff --git a/config/charts/inferencepool/templates/epp-leader-election-rbac.yaml b/config/charts/inferencepool/templates/epp-leader-election-rbac.yaml
new file mode 100644
index 0000000000..6820306788
--- /dev/null
+++ b/config/charts/inferencepool/templates/epp-leader-election-rbac.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.lead-election-rbac" . -}}
diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
index 16d935f965..ec13d9dce8 100644
--- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
+++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml
@@ -1,12 +1 @@
-{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-  annotations:
-    kubernetes.io/service-account.name: {{ include "gateway-api-inference-extension.name" . }}
-type: kubernetes.io/service-account-token
-{{- end }}
\ No newline at end of file
+{{- include "inference-extension.sa-token-secret" . -}}
diff --git a/config/charts/inferencepool/templates/epp-service-monitor.yaml b/config/charts/inferencepool/templates/epp-service-monitor.yaml
new file mode 100644
index 0000000000..2e5f7a0d3e
--- /dev/null
+++ b/config/charts/inferencepool/templates/epp-service-monitor.yaml
@@ -0,0 +1 @@
+{{- include "inference-extension.service-monitor" . -}}
diff --git a/config/charts/inferencepool/templates/epp-service.yaml b/config/charts/inferencepool/templates/epp-service.yaml
index 2d476e1826..fb6ab40573 100644
--- a/config/charts/inferencepool/templates/epp-service.yaml
+++ b/config/charts/inferencepool/templates/epp-service.yaml
@@ -1,21 +1 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-spec:
-  selector:
-    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
-  ports:
-    - name: grpc-ext-proc
-      protocol: TCP
-      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
-    - name: http-metrics
-      protocol: TCP
-      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
-    {{- with .Values.inferenceExtension.extraServicePorts }}
-    {{- . | toYaml | nindent 4 }}
-    {{- end }}
-  type: ClusterIP
+{{- include "inference-extension.service" . -}}
diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
index 2ee2e13fc9..e8b4ab8d91 100644
--- a/config/charts/inferencepool/templates/gke.yaml
+++ b/config/charts/inferencepool/templates/gke.yaml
@@ -20,8 +20,8 @@ spec:
     config:
       type: HTTP
       httpHealthCheck:
-          requestPath: /health
-          port:  {{ .Values.inferencePool.targetPortNumber }}
+        requestPath: /health
+        port:  {{ .Values.inferencePool.targetPortNumber }}
 ---
 apiVersion: networking.gke.io/v1
 kind: GCPBackendPolicy
@@ -40,107 +40,5 @@ spec:
     logging:
       enabled: true    # log all requests by default
 ---
-{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
-{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
-{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
-{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
-{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}}
-{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}}
-{{- $gmpNamespace := "gmp-system" -}}
-{{- $isAutopilot := false -}}
-{{- with .Values.provider.gke }}
-  {{- $isAutopilot = .autopilot | default false -}}
-{{- end }}
-{{- if $isAutopilot -}}
-{{-   $gmpNamespace = "gke-gmp-system" -}}
-{{- end -}}
-{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ $metricsReadSA }}
-  namespace: {{ .Release.Namespace }}
----
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ $metricsReadSecretName }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-  annotations:
-    kubernetes.io/service-account.name: {{ $metricsReadSA }}
-type: kubernetes.io/service-account-token
----
-apiVersion: monitoring.googleapis.com/v1
-kind: PodMonitoring
-metadata:
-  name: {{ .Release.Name }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-spec:
-  endpoints:
-  - port: metrics
-    scheme: http
-    interval: {{ .Values.inferenceExtension.monitoring.interval }}
-    path: /metrics
-    authorization:
-      type: Bearer
-      credentials:
-        secret:
-          name: {{ $metricsReadSecretName }}
-          key: token
-  selector:
-    matchLabels:
-      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: {{ $metricsReadRoleName }}
-rules:
-- nonResourceURLs:
-  - /metrics
-  verbs:
-  - get
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: {{ $metricsReadRoleBindingName }}
-subjects:
-- kind: ServiceAccount
-  name: {{ $metricsReadSA }}
-  namespace: {{ .Release.Namespace }}
-roleRef:
-  kind: ClusterRole
-  name: {{ $metricsReadRoleName }}
-  apiGroup: rbac.authorization.k8s.io
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: Role
-metadata:
-  name: {{ $secretReadRoleName }}
-rules:
-- resources:
-  - secrets
-  apiGroups: [""]
-  verbs: ["get", "list", "watch"]
-  resourceNames: [{{ $metricsReadSecretName | quote }}]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: {{ $gmpCollectorRoleBindingName }}
-  namespace: {{ .Release.Namespace }}
-roleRef:
-  name: {{ $secretReadRoleName }}
-  kind: Role
-  apiGroup: rbac.authorization.k8s.io
-subjects:
-- name: collector
-  namespace: {{ $gmpNamespace }}
-  kind: ServiceAccount
-{{- end }}
 {{- end }}
+{{- include "inference-extension.gke" . -}}
diff --git a/config/charts/inferencepool/templates/httproute.yaml b/config/charts/inferencepool/templates/httproute.yaml
index a280d1581b..c448c7a430 100644
--- a/config/charts/inferencepool/templates/httproute.yaml
+++ b/config/charts/inferencepool/templates/httproute.yaml
@@ -24,6 +24,8 @@ spec:
           name: X-Gateway-Base-Model-Name
           value: {{ .Values.experimentalHttpRoute.baseModel }}
       {{- end }}
+    {{- if ne (lower .Values.provider.name) "gke" }}
     timeouts:
       request: 300s
+    {{- end }}
 {{- end }}
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
index 5c973b9983..796c832211 100644
--- a/config/charts/inferencepool/templates/inferencepool.yaml
+++ b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -44,5 +44,3 @@ spec:
     port:
       number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
 {{- end }}
-
-
diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml
index dc6b3e0c4a..c2fed9ab23 100644
--- a/config/charts/inferencepool/templates/rbac.yaml
+++ b/config/charts/inferencepool/templates/rbac.yaml
@@ -1,46 +1,7 @@
-{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-rules:
-- apiGroups:
-  - authentication.k8s.io
-  resources:
-  - tokenreviews
-  verbs:
-  - create
-- apiGroups:
-  - authorization.k8s.io
-  resources:
-  - subjectaccessreviews
-  verbs:
-  - create
-- nonResourceURLs:
-  - "/metrics"
-  verbs:
-  - get
----
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
-subjects:
-- kind: ServiceAccount
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }}
-{{- end }}
----
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
@@ -51,14 +12,11 @@ rules:
 - apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"]
   resources: ["inferencepools"]
   verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
   namespace: {{ .Release.Namespace }}
 subjects:
 - kind: ServiceAccount
@@ -67,12 +25,6 @@ subjects:
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: Role
-  name: {{ include "gateway-api-inference-extension.name" . }}
+  name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }}
 ---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+{{- include "inference-extension.rbac" . -}}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index c6cc51e8f9..1803e2cbaa 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -43,7 +43,7 @@ inferenceExtension:
   affinity: {}
 
   tolerations: []
-  
+
   # Sidecar configuration for EPP
   sidecar:
     enabled: false
@@ -70,7 +70,7 @@ inferenceExtension:
   # Latency Predictor Configuration
   latencyPredictor:
     enabled: false
-    
+
     # Training Server Configuration
     trainingServer:
       image:
@@ -154,12 +154,12 @@ inferencePool:
   targetPorts:
     - number: 8000
   modelServerType: vllm # vllm, triton-tensorrt-llm
-  apiVersion: inference.networking.k8s.io/v1 
+  apiVersion: inference.networking.k8s.io/v1
   # modelServers: # REQUIRED
   #   matchLabels:
   #     app: vllm-llama3-8b-instruct
 
-  # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2, 
+  # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
   # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
   targetPortNumber: 8000
 
diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh
index 0388b6e24d..e9bb8dd09c 100755
--- a/hack/verify-helm.sh
+++ b/hack/verify-helm.sh
@@ -27,6 +27,13 @@ test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 -
 # source (such as in the verify-all script)
 make helm-install
 
+echo "Building dependencies for inferencePool chart..."
+${SCRIPT_ROOT}/bin/helm dependency build ${SCRIPT_ROOT}/config/charts/inferencepool
+if [ $? -ne 0 ]; then
+  echo "Helm dependency build failed."
+  exit 1
+fi
+
 # Running tests cases
 echo "Running helm template command for inferencePool chart..."
 # Loop through the keys of the associative array
diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md
index 0867537549..8f10292c5c 100644
--- a/site-src/_includes/epp-latest.md
+++ b/site-src/_includes/epp-latest.md
@@ -3,6 +3,7 @@
       ```bash
       export GATEWAY_PROVIDER=gke
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -14,6 +15,7 @@
       ```bash
       export GATEWAY_PROVIDER=istio
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -25,6 +27,7 @@
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -36,6 +39,7 @@
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md
index df0a7b6a89..61ffb9be8d 100644
--- a/site-src/_includes/epp.md
+++ b/site-src/_includes/epp.md
@@ -3,6 +3,7 @@
       ```bash
       export GATEWAY_PROVIDER=gke
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --set experimentalHttpRoute.enabled=true \
@@ -15,6 +16,7 @@
       ```bash
       export GATEWAY_PROVIDER=istio
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --set experimentalHttpRoute.enabled=true \
@@ -27,6 +29,7 @@
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --set experimentalHttpRoute.enabled=true \
@@ -39,6 +42,7 @@
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-llama3-8b-instruct \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
       --set experimentalHttpRoute.enabled=true \
diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md
index 10b0aeb5b8..94b6625e2e 100644
--- a/site-src/guides/getting-started-latest.md
+++ b/site-src/guides/getting-started-latest.md
@@ -106,7 +106,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens
       1. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value
 
          ```bash
-         helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --set nginxGateway.gwAPIInferenceExtension.enable=true
+         helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --dependency-update --set nginxGateway.gwAPIInferenceExtension.enable=true
          ```
          This enables NGINX Gateway Fabric to watch and manage Inference Extension resources such as InferencePool and InferenceObjective.
 
diff --git a/site-src/guides/serve-multiple-genai-models.md b/site-src/guides/serve-multiple-genai-models.md
index f1b8185d8e..78b5026f8d 100644
--- a/site-src/guides/serve-multiple-genai-models.md
+++ b/site-src/guides/serve-multiple-genai-models.md
@@ -160,6 +160,7 @@ Select a tab to follow the provider-specific instructions.
       ```bash
       export GATEWAY_PROVIDER=gke
       helm install vllm-deepseek-r1 \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -171,6 +172,7 @@ Select a tab to follow the provider-specific instructions.
       ```bash
       export GATEWAY_PROVIDER=istio
       helm install vllm-deepseek-r1 \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -181,6 +183,7 @@ Select a tab to follow the provider-specific instructions.
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-deepseek-r1 \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \
@@ -192,6 +195,7 @@ Select a tab to follow the provider-specific instructions.
       ```bash
       export GATEWAY_PROVIDER=none
       helm install vllm-deepseek-r1 \
+      --dependency-update \
       --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \
       --set provider.name=$GATEWAY_PROVIDER \
       --version $IGW_CHART_VERSION \