From 8f4c3953cdbfaf4b89202675d21fc6d2732cd654 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 19:41:52 -0800 Subject: [PATCH 01/11] inferencepool workable --- config/charts/epp-standalone/.helmignore | 23 ++ .../epp-standalone/templates/_helpers.tpl | 0 config/charts/epp-standalone/values.yaml | 298 ++++++++++++++++++ config/charts/inference-extension/.helmignore | 23 ++ config/charts/inference-extension/Chart.yaml | 10 + .../templates/_config.yaml | 91 ++++++ .../templates/_deployment.yaml | 223 +++++++++++++ .../inference-extension/templates/_gke.yaml | 108 +++++++ .../templates/_helpers.tpl | 7 + .../templates/_leader-election-rbac.yaml} | 3 + .../inference-extension/templates/_rbac.yaml | 75 +++++ .../templates/_sa-token-secret.yaml | 15 + .../templates/_service.yaml | 24 ++ .../templates/_servicemonitor.yaml} | 3 + config/charts/inferencepool/Chart.lock | 6 + config/charts/inferencepool/Chart.yaml | 4 + .../charts/inference-extension-0.0.0.tgz | Bin 0 -> 5051 bytes .../inferencepool/templates/epp-config.yaml | 89 +----- .../templates/epp-deployment.yaml | 208 +----------- .../templates/epp-leader-election-rbac.yaml | 1 + .../templates/epp-sa-token-secret.yaml | 13 +- .../templates/epp-service-monitor.yaml | 1 + .../inferencepool/templates/epp-service.yaml | 22 +- .../charts/inferencepool/templates/gke.yaml | 108 +------ .../inferencepool/templates/httproute.yaml | 3 +- .../charts/inferencepool/templates/rbac.yaml | 74 +---- config/charts/inferencepool/values.yaml | 11 +- 27 files changed, 943 insertions(+), 500 deletions(-) create mode 100644 config/charts/epp-standalone/.helmignore create mode 100644 config/charts/epp-standalone/templates/_helpers.tpl create mode 100644 config/charts/epp-standalone/values.yaml create mode 100644 config/charts/inference-extension/.helmignore create mode 100644 config/charts/inference-extension/Chart.yaml create mode 100644 config/charts/inference-extension/templates/_config.yaml create mode 100644 config/charts/inference-extension/templates/_deployment.yaml create mode 100644 config/charts/inference-extension/templates/_gke.yaml rename config/charts/{inferencepool => inference-extension}/templates/_helpers.tpl (72%) rename config/charts/{inferencepool/templates/leader-election-rbac.yaml => inference-extension/templates/_leader-election-rbac.yaml} (93%) create mode 100644 config/charts/inference-extension/templates/_rbac.yaml create mode 100644 config/charts/inference-extension/templates/_sa-token-secret.yaml create mode 100644 config/charts/inference-extension/templates/_service.yaml rename config/charts/{inferencepool/templates/epp-servicemonitor.yaml => inference-extension/templates/_servicemonitor.yaml} (94%) create mode 100644 config/charts/inferencepool/Chart.lock create mode 100644 config/charts/inferencepool/charts/inference-extension-0.0.0.tgz create mode 100644 config/charts/inferencepool/templates/epp-leader-election-rbac.yaml create mode 100644 config/charts/inferencepool/templates/epp-service-monitor.yaml diff --git a/config/charts/epp-standalone/.helmignore b/config/charts/epp-standalone/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/config/charts/epp-standalone/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/config/charts/epp-standalone/templates/_helpers.tpl b/config/charts/epp-standalone/templates/_helpers.tpl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/config/charts/epp-standalone/values.yaml b/config/charts/epp-standalone/values.yaml new file mode 100644 index 0000000000..3cb10dd5ec --- /dev/null +++ b/config/charts/epp-standalone/values.yaml @@ -0,0 +1,298 @@ +inferenceExtension: + replicas: 1 + image: + name: epp + hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension + tag: main + pullPolicy: Always + extProcPort: 9002 + extraServicePorts: + - name: http + port: 8081 + protocol: TCP + targetPort: 8081 + env: [] + pluginsConfigFile: "default-plugins.yaml" + + endpointsServer: + standalone: true + # Required when standalone is true + # endpointSelector: app=vllm-llama3-8b-instruct + targetPorts: 8000 + modelServerType: vllm # vllm, triton-tensorrt-llm + + + sidecar: + enabled: true + configMap: + name: envoy + # Because the template just dumps this section, the keys become filenames. + # The values MUST be strings (note the literal block scalar '|') + data: + envoy.yaml: | + admin: + address: + socket_address: + address: 127.0.0.1 + port_value: 19000 + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: /dev/null + static_resources: + listeners: + - name: envoy-proxy-ready-0.0.0.0-19001 + address: + socket_address: + address: 0.0.0.0 + port_value: 19001 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: envoy-ready-http + route_config: + name: local_route + virtual_hosts: + - name: prometheus_stats + domains: ["*"] + routes: + - match: + prefix: "/stats/prometheus" + route: + cluster: "prometheus_stats" + http_filters: + - name: envoy.filters.http.health_check + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck + pass_through_mode: false + headers: + - name: ":path" + string_match: + exact: "/ready" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + - name: vllm + address: + socket_address: + address: 0.0.0.0 + port_value: 8081 + per_connection_buffer_limit_bytes: 32768 + access_log: + - name: envoy.access_loggers.file + filter: + response_flag_filter: + flags: ["NR"] + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: /dev/stdout + log_format: + text_format_source: + inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n" + filter_chains: + - name: vllm + filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: http-8081 + route_config: + name: vllm + virtual_hosts: + - name: vllm-default + domains: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: original_destination_cluster + timeout: 86400s + idle_timeout: 86400s + upgrade_configs: + - upgrade_type: websocket + typed_per_filter_config: + envoy.filters.http.ext_proc: + "@type": type.googleapis.com/envoy.config.route.v3.FilterConfig + config: {} + http_filters: + - name: envoy.filters.http.ext_proc + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor + grpc_service: + envoy_grpc: + cluster_name: ext_proc + authority: localhost:9002 + timeout: 10s + processing_mode: + request_header_mode: SEND + response_header_mode: SEND + request_body_mode: FULL_DUPLEX_STREAMED + response_body_mode: FULL_DUPLEX_STREAMED + request_trailer_mode: SEND + response_trailer_mode: SEND + message_timeout: 1000s + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + suppress_envoy_headers: true + http2_protocol_options: + max_concurrent_streams: 100 + initial_stream_window_size: 65536 + initial_connection_window_size: 1048576 + use_remote_address: true + normalize_path: true + merge_slashes: true + server_header_transformation: PASS_THROUGH + common_http_protocol_options: + headers_with_underscores_action: REJECT_REQUEST + path_with_escaped_slashes_action: UNESCAPE_AND_REDIRECT + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: /dev/stdout + log_format: + text_format_source: + inline_string: "{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",...}\n" + clusters: + - name: prometheus_stats + type: STATIC + connect_timeout: 0.250s + load_assignment: + cluster_name: prometheus_stats + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 19000 + - name: original_destination_cluster + type: ORIGINAL_DST + connect_timeout: 1000s + lb_policy: CLUSTER_PROVIDED + circuit_breakers: + thresholds: + - max_connections: 40000 + max_pending_requests: 40000 + max_requests: 40000 + original_dst_lb_config: + use_http_header: true + http_header_name: x-gateway-destination-endpoint + - name: ext_proc + type: STATIC + connect_timeout: 86400s + lb_policy: LEAST_REQUEST + circuit_breakers: + thresholds: + - max_connections: 40000 + max_pending_requests: 40000 + max_requests: 40000 + max_retries: 1024 + health_checks: + - timeout: 2s + interval: 10s + unhealthy_threshold: 3 + healthy_threshold: 2 + reuse_connection: true + grpc_health_check: + service_name: "envoy.service.ext_proc.v3.ExternalProcessor" + tls_options: + alpn_protocols: ["h2"] + transport_socket: + name: "envoy.transport_sockets.tls" + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + common_tls_context: + validation_context: + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: + initial_stream_window_size: 65536 + initial_connection_window_size: 1048576 + load_assignment: + cluster_name: ext_proc + endpoints: + - locality: + region: ext_proc/e2e/0 + lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 9002 + load_balancing_weight: 1 + name: envoy-sidecar + image: docker.io/envoyproxy/envoy:distroless-v1.33.2 + command: "envoy" + args: + - "--service-node" + - "envoy-sidecar" + - "--log-level" + - "trace" + - "--cpuset-threads" + - "--drain-strategy" + - "immediate" + - "--drain-time-s" + - "60" + - "-c" + - "/etc/envoy/envoy.yaml" + env: + - name: NS_NAME + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + ports: + - containerPort: 8081 + name: http-8081 + - containerPort: 19001 + name: metrics-19001 + resources: + requests: + cpu: 100m + memory: 512Mi + + readinessProbe: + failureThreshold: 1 + httpGet: + path: /ready + port: 19001 + scheme: HTTP + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + + volumeMounts: + - name: config + mountPath: /etc/envoy + readOnly: true + volumes: + - name: config + configMap: + name: envoy + items: + - key: envoy.yaml + path: envoy.yaml + monitoring: + interval: "10s" + # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection + prometheus: + enabled: false + auth: + # To allow unauthenticated /metrics access (e.g., for debugging with curl), set to false + enabled: true + + tracing: + enabled: false + + latencyPredictor: + enabled: false diff --git a/config/charts/inference-extension/.helmignore b/config/charts/inference-extension/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/config/charts/inference-extension/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/config/charts/inference-extension/Chart.yaml b/config/charts/inference-extension/Chart.yaml new file mode 100644 index 0000000000..2081320c2e --- /dev/null +++ b/config/charts/inference-extension/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: inference-extension +description: A Helm chart for Kubernetes + +type: library + +version: 0.0.0 + +appVersion: "0.0.0" + diff --git a/config/charts/inference-extension/templates/_config.yaml b/config/charts/inference-extension/templates/_config.yaml new file mode 100644 index 0000000000..38bba5aec9 --- /dev/null +++ b/config/charts/inference-extension/templates/_config.yaml @@ -0,0 +1,91 @@ +{{- define "inference-extension.config" -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +data: + default-plugins.yaml: | + apiVersion: inference.networking.x-k8s.io/v1alpha1 + kind: EndpointPickerConfig + plugins: + - type: queue-scorer + - type: kv-cache-utilization-scorer + - type: prefix-cache-scorer + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} + - type: predicted-latency-scorer + parameters: + {{- with .Values.inferenceExtension.latencyPredictor.sloAwareRouting | default dict }} + samplingMean: {{ .samplingMean | default 1000.0 }} + maxSampledTokens: {{ .maxSampledTokens | default 20 }} + sloBufferFactor: {{ .sloBufferFactor | default 1.0 }} + negHeadroomTTFTWeight: {{ .negHeadroomTTFTWeight | default 0.8 }} + negHeadroomTPOTWeight: {{ .negHeadroomTPOTWeight | default 0.2 }} + headroomTTFTWeight: {{ .headroomTTFTWeight | default 0.8 }} + headroomTPOTWeight: {{ .headroomTPOTWeight | default 0.2 }} + headroomSelectionStrategy: {{ .headroomSelectionStrategy | default "least" | quote }} + compositeKVWeight: {{ .compositeKVWeight | default 1.0 }} + compositeQueueWeight: {{ .compositeQueueWeight | default 1.0 }} + compositePrefixWeight: {{ .compositePrefixWeight | default 1.0 }} + epsilonExploreSticky: {{ .epsilonExploreSticky | default 0.01 }} + epsilonExploreNeg: {{ .epsilonExploreNeg | default 0.01 }} + affinityGateTau: {{ .affinityGateTau | default 0.80 }} + affinityGateTauGlobal: {{ .affinityGateTauGlobal | default 0.99 }} + selectionMode: {{ .selectionMode | default "linear" | quote }} + {{- end }} + {{- end }} + schedulingProfiles: + {{- if .Values.inferenceExtension.latencyPredictor.enabled }} + - name: default + plugins: + - pluginRef: predicted-latency-scorer + featureGates: + - prepareDataPlugins + {{- else }} + - name: default + plugins: + - pluginRef: queue-scorer + weight: 2 + - pluginRef: kv-cache-utilization-scorer + weight: 2 + - pluginRef: prefix-cache-scorer + weight: 3 + {{- end }} + {{- if (hasKey .Values.inferenceExtension "pluginsCustomConfig") }} + {{- .Values.inferenceExtension.pluginsCustomConfig | toYaml | nindent 2 }} + {{- end }} + +--- +{{- if and .Values.inferenceExtension.sidecar.enabled .Values.inferenceExtension.sidecar.configMapData }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "gateway-api-inference-extension.name" . }}-sidecar + namespace: {{ .Release.Namespace }} +data: + {{- .Values.inferenceExtension.sidecar.configMapData | toYaml | nindent 2 }} +{{- end }} +--- +{{- if .Values.inferenceExtension.latencyPredictor.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training + namespace: {{ .Release.Namespace }} +data: + {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.trainingServer.config }} + {{ $key }}: {{ $value | quote }} +{{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-prediction + namespace: {{ .Release.Namespace }} +data: + {{- range $key, $value := .Values.inferenceExtension.latencyPredictor.predictionServers.config }} + {{ $key }}: {{ $value | quote }} + {{- end }} +{{- end }} +--- +{{- end -}} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_deployment.yaml b/config/charts/inference-extension/templates/_deployment.yaml new file mode 100644 index 0000000000..d1729bbda4 --- /dev/null +++ b/config/charts/inference-extension/templates/_deployment.yaml @@ -0,0 +1,223 @@ +{{- define "inference-extension.deployment" -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.inferenceExtension.replicas | default 1 }} + strategy: + # The current recommended EPP deployment pattern is to have a single active replica. This ensures + # optimal performance of the stateful operations such prefix cache aware scorer. + # The Recreate strategy the old replica is killed immediately, and allow the new replica(s) to + # quickly take over. This is particularly important in the high availability set up with leader + # election, as the rolling update strategy would prevent the old leader being killed because + # otherwise the maxUnavailable would be 100%. + type: Recreate + selector: + matchLabels: + {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "gateway-api-inference-extension.name" . }} + # Conservatively, this timeout should mirror the longest grace period of the pods within the pool + terminationGracePeriodSeconds: 130 + containers: + {{- if .Values.inferenceExtension.sidecar.enabled }} + - name: {{ .Values.inferenceExtension.sidecar.name }} + image: {{ .Values.inferenceExtension.sidecar.image }} + imagePullPolicy: {{ .Values.inferenceExtension.sidecar.imagePullPolicy | default "IfNotPresent" }} + {{- with .Values.inferenceExtension.sidecar.command }} + command: + - {{ . | quote }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.args }} + args: + {{- range . }} + - {{ . | quote }} + {{- end }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.env }} + env: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.ports }} + ports: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.resources }} + resources: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.inferenceExtension.sidecar.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} + - name: epp + image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }} + imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "IfNotPresent" }} + args: + {{- /* 1. Determine Model Server Type Logic */ -}} + {{- $modelServerType := "vllm" }} + {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} + {{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }} + {{- else }} + {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }} + {{- end }} + {{- /* 2. Mode Specific Flags */ -}} + {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} + - --endpoint-selector + - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }} + - --endpoint-target-ports + - {{ .Values.inferenceExtension.endpointsServer.targetPorts | quote }} + {{- else }} + - --pool-name + - {{ .Release.Name }} + # The pool namespace is optional because EPP can default to the NAMESPACE env var. + - --pool-namespace + - {{ .Release.Namespace }} + {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }} + - --pool-group + - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" + {{- end }} + {{- end }} + {{- if eq $modelServerType "triton-tensorrt-llm" }} + - --total-queued-requests-metric + - "nv_trt_llm_request_metrics{request_type=waiting}" + - --kv-cache-usage-percentage-metric + - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}" + - --lora-info-metric + - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet. + {{- end }} + - --zap-encoder + - "json" + - --config-file + - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}" + {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} + - --ha-enable-leader-election + {{- end }} + # Pass additional flags via the inferenceExtension.flags field in values.yaml. + {{- range $key, $value := .Values.inferenceExtension.flags }} + - --{{ $key }} + - "{{ $value }}" + {{- end }} + {{- if .Values.inferenceExtension.tracing.enabled }} + - --tracing=true + {{- else }} + - --tracing=false + {{- end }} + {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} + - --metrics-endpoint-auth=false + {{- end }} + ports: + - name: grpc + containerPort: 9002 + - name: grpc-health + containerPort: 9003 + - name: metrics + containerPort: 9090 + {{- if .Values.inferenceExtension.extraContainerPorts }} + {{- toYaml .Values.inferenceExtension.extraContainerPorts | nindent 8 }} + {{- end }} + livenessProbe: + {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} + grpc: + port: 9003 + service: liveness + {{- else }} + grpc: + port: 9003 + service: inference-extension + {{- end }} + initialDelaySeconds: 5 + periodSeconds: 10 + readinessProbe: + {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} + grpc: + port: 9003 + service: readiness + {{- else }} + grpc: + port: 9003 + service: inference-extension + {{- end }} + periodSeconds: 2 + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + {{- include "gateway-api-inference-extension.latencyPredictor.env" . | nindent 8 }} + {{- if .Values.inferenceExtension.tracing.enabled }} + - name: OTEL_SERVICE_NAME + value: "gateway-api-inference-extension" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.inferenceExtension.tracing.otelExporterEndpoint | quote }} + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_RESOURCE_ATTRIBUTES_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: OTEL_RESOURCE_ATTRIBUTES_POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: OTEL_RESOURCE_ATTRIBUTES + value: 'k8s.namespace.name=$(NAMESPACE),k8s.node.name=$(OTEL_RESOURCE_ATTRIBUTES_NODE_NAME),k8s.pod.name=$(OTEL_RESOURCE_ATTRIBUTES_POD_NAME)' + - name: OTEL_TRACES_SAMPLER + value: {{ .Values.inferenceExtension.tracing.sampling.sampler | quote }} + - name: OTEL_TRACES_SAMPLER_ARG + value: {{ .Values.inferenceExtension.tracing.sampling.samplerArg | quote }} + {{- end }} + {{- if .Values.inferenceExtension.env }} + {{- toYaml .Values.inferenceExtension.env | nindent 8 }} + {{- end }} + volumeMounts: + - name: plugins-config-volume + mountPath: "/config" + {{- if .Values.inferenceExtension.volumeMounts }} + {{- toYaml .Values.inferenceExtension.volumeMounts | nindent 8 }} + {{- end }} + {{- include "gateway-api-inference-extension.latencyPredictor.containers" . | nindent 6 }} + volumes: + {{- if .Values.inferenceExtension.volumes }} + {{- toYaml .Values.inferenceExtension.volumes | nindent 6 }} + {{- end }} + {{- if .Values.inferenceExtension.sidecar.volumes }} + {{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }} + {{- end }} + - name: plugins-config-volume + configMap: + name: {{ include "gateway-api-inference-extension.name" . }} + {{- include "gateway-api-inference-extension.latencyPredictor.volumes" . | nindent 6 }} + {{- if .Values.inferenceExtension.affinity }} + affinity: + {{- toYaml .Values.inferenceExtension.affinity | nindent 8 }} + {{- end }} + {{- if .Values.inferenceExtension.tolerations }} + tolerations: + {{- toYaml .Values.inferenceExtension.tolerations | nindent 8 }} + {{- end }} +--- +{{- end }} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_gke.yaml b/config/charts/inference-extension/templates/_gke.yaml new file mode 100644 index 0000000000..3f2b424986 --- /dev/null +++ b/config/charts/inference-extension/templates/_gke.yaml @@ -0,0 +1,108 @@ +{{- define "inference-extension.gke" -}} +{{- if eq (lower .Values.provider.name) "gke" }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} +{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} +{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} +{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} +{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} +{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}} +{{- $gmpNamespace := "gmp-system" -}} +{{- $isAutopilot := false -}} +{{- with .Values.provider.gke }} + {{- $isAutopilot = .autopilot | default false -}} +{{- end }} +{{- if $isAutopilot -}} +{{- $gmpNamespace = "gke-gmp-system" -}} +{{- end -}} +{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ $metricsReadSA }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ $metricsReadSecretName }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ $metricsReadSA }} +type: kubernetes.io/service-account-token +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + endpoints: + - port: metrics + scheme: http + interval: {{ .Values.inferenceExtension.monitoring.interval }} + path: /metrics + authorization: + type: Bearer + credentials: + secret: + name: {{ $metricsReadSecretName }} + key: token + selector: + matchLabels: + {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ $metricsReadRoleName }} +rules: + - nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $metricsReadRoleBindingName }} +subjects: + - kind: ServiceAccount + name: {{ $metricsReadSA }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ $metricsReadRoleName }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $secretReadRoleName }} +rules: + - resources: + - secrets + apiGroups: [""] + verbs: ["get", "list", "watch"] + resourceNames: [{{ $metricsReadSecretName | quote }}] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $gmpCollectorRoleBindingName }} + namespace: {{ .Release.Namespace }} +roleRef: + name: {{ $secretReadRoleName }} + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: + - name: collector + namespace: {{ $gmpNamespace }} + kind: ServiceAccount +--- +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inference-extension/templates/_helpers.tpl similarity index 72% rename from config/charts/inferencepool/templates/_helpers.tpl rename to config/charts/inference-extension/templates/_helpers.tpl index fdc9b1a2b7..1e81f1ff23 100644 --- a/config/charts/inferencepool/templates/_helpers.tpl +++ b/config/charts/inference-extension/templates/_helpers.tpl @@ -29,5 +29,12 @@ Cluster RBAC unique name Selector labels */}} {{- define "gateway-api-inference-extension.selectorLabels" -}} +{{- /* Check if endpointsServer exists AND if standalone is true */ -}} +{{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} +{{- /* LOGIC FOR STANDALONE EPP MODE */ -}} +epp: {{ include "gateway-api-inference-extension.name" . }} +{{- else -}} +{{- /* LOGIC FOR PARENT (INFERENCEPOOL) MODE */ -}} inferencepool: {{ include "gateway-api-inference-extension.name" . }} {{- end -}} +{{- end -}} \ No newline at end of file diff --git a/config/charts/inferencepool/templates/leader-election-rbac.yaml b/config/charts/inference-extension/templates/_leader-election-rbac.yaml similarity index 93% rename from config/charts/inferencepool/templates/leader-election-rbac.yaml rename to config/charts/inference-extension/templates/_leader-election-rbac.yaml index 11b3dd5168..0ce57f7616 100644 --- a/config/charts/inferencepool/templates/leader-election-rbac.yaml +++ b/config/charts/inference-extension/templates/_leader-election-rbac.yaml @@ -1,3 +1,4 @@ +{{- define "inference-extension.lead-election-rbac" -}} {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} --- kind: Role @@ -27,4 +28,6 @@ roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: {{ include "gateway-api-inference-extension.name" . }}-leader-election +--- {{- end }} +{{- end }} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_rbac.yaml b/config/charts/inference-extension/templates/_rbac.yaml new file mode 100644 index 0000000000..c0b6b3f417 --- /dev/null +++ b/config/charts/inference-extension/templates/_rbac.yaml @@ -0,0 +1,75 @@ +{{- define "inference-extension.rbac" -}} +{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create + - nonResourceURLs: + - "/metrics" + verbs: + - get +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }} +subjects: + - kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }} +{{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }} + namespace: {{ .Release.Namespace }} +subjects: + - kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +--- +{{- end }} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_sa-token-secret.yaml b/config/charts/inference-extension/templates/_sa-token-secret.yaml new file mode 100644 index 0000000000..eb7ffec5f1 --- /dev/null +++ b/config/charts/inference-extension/templates/_sa-token-secret.yaml @@ -0,0 +1,15 @@ +{{- define "inference-extension.sa-token-secret" -}} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ include "gateway-api-inference-extension.name" . }} +type: kubernetes.io/service-account-token +--- +{{- end }} +{{- end }} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_service.yaml b/config/charts/inference-extension/templates/_service.yaml new file mode 100644 index 0000000000..7bb28950f7 --- /dev/null +++ b/config/charts/inference-extension/templates/_service.yaml @@ -0,0 +1,24 @@ +{{- define "inference-extension.service" -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + selector: + {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }} + ports: + - name: grpc-ext-proc + protocol: TCP + port: {{ .Values.inferenceExtension.extProcPort | default 9002 }} + - name: http-metrics + protocol: TCP + port: {{ .Values.inferenceExtension.metricsPort | default 9090 }} + {{- with .Values.inferenceExtension.extraServicePorts }} + {{- . | toYaml | nindent 4 }} + {{- end }} + type: ClusterIP +--- +{{- end }} \ No newline at end of file diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inference-extension/templates/_servicemonitor.yaml similarity index 94% rename from config/charts/inferencepool/templates/epp-servicemonitor.yaml rename to config/charts/inference-extension/templates/_servicemonitor.yaml index d58662cbef..17174a2938 100644 --- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml +++ b/config/charts/inference-extension/templates/_servicemonitor.yaml @@ -1,3 +1,4 @@ +{{- define "inference-extension.service-monitor" -}} {{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor @@ -27,4 +28,6 @@ spec: selector: matchLabels: {{- include "gateway-api-inference-extension.labels" . | nindent 6 }} +--- +{{- end }} {{- end }} diff --git a/config/charts/inferencepool/Chart.lock b/config/charts/inferencepool/Chart.lock new file mode 100644 index 0000000000..1fd39d5865 --- /dev/null +++ b/config/charts/inferencepool/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: inference-extension + repository: file://../inference-extension + version: 0.0.0 +digest: sha256:4a8aeb7ac929d73eab47276d786fd8e49c9c83856d70b612ce020608b83f1160 +generated: "2026-01-11T18:29:35.481137-08:00" diff --git a/config/charts/inferencepool/Chart.yaml b/config/charts/inferencepool/Chart.yaml index f98153c500..31fc2f057b 100644 --- a/config/charts/inferencepool/Chart.yaml +++ b/config/charts/inferencepool/Chart.yaml @@ -7,3 +7,7 @@ type: application version: 0.0.0 appVersion: "0.0.0" +dependencies: + - name: inference-extension + version: 0.0.0 + repository: "file://../inference-extension" \ No newline at end of file diff --git a/config/charts/inferencepool/charts/inference-extension-0.0.0.tgz b/config/charts/inferencepool/charts/inference-extension-0.0.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..cd1196de96efd4b205b14c5d079e86274cf0b775 GIT binary patch literal 5051 zcmV;s6GZGEiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PH>QbKAI%``Ld5&h4AmbSlO9(RS;Ze#j+ixgJvLs8EoTSO_UicxgB@zHZ5ClMwrhJMLxrYvZ)yNeUeEV=l zq;?l1WIx?W84L!4SNr?P|G{8T{Xf_p?EJLza%XR6@M{0HdA{>r1RO zkv|P?jjP(ZKS?1G{fyEK`w-^4JrYIv*MU252R$E^CuyXUM+fi^WFdG)Kwv5){4<`Q z*BC~OXjI!5p5Rj&+)@{`%>P%f zURLM->z9Ma`F}6v+cyV%oKlX^uaO1U3P2y6>+4=gSM2mIDffqPm{j;gqF#ua_(YRo z55OveZ{L7&kHtPhKOh>f$ig8Jb?OkA7JYEx`Z{f{BH~#X?kO@P3f<%Ev8k1z@^MOH zrk#kz0p%)@)ggTE0RR*!&XKwt^_7s9ln3rt=kjOeQn5YXAuO7aourxV*@*j*pj?ls zcZt&WGkKk28YW*HuO~T#^lMtT@ZO9N zor3$Buo#t_cVm>Z&dgQrEyfak>S-Zee)u<6%z+nMB=H4HRS<(&&CK$g^Vgw6bU0ndGHAdw?kcgQfRm{7!2G&v1v%Yo|(Gn zpNmW6$~Lb)Er8f9HCIgh5>KZn-w{J|ioE==h`)@SWAG0oz7!%nKYw@rUr2+QwlHd6 z6(9}VpPL|!PZ}ZRuS$@1i;!jw1g$=Are@Ya%IfoWI?j-xXN3JsOQJDYltHb$E8|_m9lpduV-dP1!bm8p@t>cI1g<=89=p8if0`Lv3#9O-1DtU(accn;Uo}I*NKq#E z=xfA;#4}C3%amDlZ%UjW>@)*9#-I+$F$QZukm;0isu%Bx#&Z%|0F{Sj`5CNf{+@{m zVYNW)s}iU;Z%Q*X6Od2BPuGFcqmsN(jzrc9jL~4ob6r^Zs=OKcu~9fDo0gr7_d7#ihekvJXyi+M=GPNOUaGFt6BJVnie!5lzMx^0Jdm;mv7TSR$v60o7I*23ocUNteg zz~%yvZ#ZS|i*MP~jxr(~EeYk62RC4)Bs@TPdWnk{@N{na4a2t^aUer}hH{P><1;Bv zAj9=_LQM+4pkXWge>e`NpQzvu!owwc%SE*r8%z44MyAbI=lc2w`F}0`zmE|UixBy3 z;(_b^|J}X8t3lcS-yQ7l>^=Jb_fgvX|D^z|#sEkZsqK8KALTXgI}!lE$OKu@bnA-C zHZ<6li_5-&sUq|YJc$vb9!cHHX04FbD_Z%MHkG+Q33X54e1_n~QW{heJrRb;ee_{8 z9>bE`5D~3Wa-a&dfEk%15Kxo{41su>&M~9Sh4UFzfLz5ARfwZS zuF^0>pAwC1@ggyP2xHFERBl*#b1VL=a|8vc01UA|f^QVn!re z01YD{HQ^d4PXNwnFat6tl#vN#R4+iGhB&g}3x>o`?a3#Qc@ZywsHCkF%(ws$M}Dbi zS0ZM6zBwDXE6Ai<*W_Q zWSs3~zK@VrWlnzrt(vSh52g=E8)IF?0N7Zn{L+elXL3f zf#->s>*L$$4?7@-f*YvBOdBJM+K7~yiwHx) z{W7iT!(ri(VW6S7L-b%%5{L{`8NYc_M7}gZ-EzfsR5j9d3sRBK%P5e~tAwV0SaAmL z3_20kl?tQOD;!H6ZA{56>0oq@9F-bNF{!Y=^ymrv2bOf zXpgvIy2Ynb%-yTHN)zrACODQTY@l&%M4<}0+PJ{v+I|*8y_?Z67J_Y#ZdLrZpTn-3 z3?iIG=+P`Z9b`2zL|{0&F@!zmaKy6me8})&KSl zo~ejY4gKwYE9Nct!YnEM4cC;kYxre#X7{zET5!imijYz})oGh`K?_ZolWgwzj>Io9 zDy^IlwWMCPQ+$4*rM_S+yhv+a*ox{~_TBicZ?6c|lm~_WWZ0HlLW%_FL?}Jv+I(!q zcsajFnqEwp@Gf)2mB$JGsU(RvUqb*Bk{B0JG$8E56F5T+gad|=UVyD;q}8V?@zy_z z(}V1>@+66j({IBD3m_7K3k@oc%ydQ{CJQ)E#sU_o-F3s6(fDr?ImkWXV^tURe^-L9 zcFG2qohfD2+0oy&?^?`;Y%?pV`;IbP5k@+NK*O_c;V#Pc7VOk@ZAKjHtvWVj;$(rB zHoiQ8F;NPL@6%MTOq0cAPD!FhY6!9wrWBbEl*8Ooz91nhkHW^`ehav!cgw(jjXFp{ zcbMhty?!l;XVg-S*PqdmR(Y#sjGdH!)?iAQ!Vcsa3135EC^*$Z8Y&|xLew*il}lnh zTSs*|qnFE~Y0yqmjXSuswgVZJr+6XKVwTAvycrC38=E>aBuvj*A?(#cNITTl`psa2 z(Z{b^lEWehRUMzqVc85wO$4ry;cNZfJJ1w{^}`C=qIzEEvB6+}5+Z?-E&eGfftx%Gv>vvLw zRocO>DW^DHeUrW$6FnQ|F>|-ccC`eIPmV4O5kD4xS;utW3|q;Mm^Z}h;ciwh%afDy z(Z`Fk(dp+8hoeU33|r~1no0%WqyHYCoSu(PFHX)sjxR>XN8^(Z$LIBE)*DY*mxM+( z`kGjMGD{S0cK-^boSz;Xj?Qw58oDYpi`Mk#baZy|>*?X>;^6%J^usT|o{!Ehj!%x7 zxwpA!Rnj?DD->i;Md-O)_;|ePQnpZixAS2WqU$G27ozLO?C-{P%qM^H>Fv{Jxx(3c zk-Qgv{;GW>ZJS8=txYmEZvDMg^3D!EjXyRCUN_;-lCqnX9$8<|y2QRpStQa>$o_-um;&qCAQ%5&!Zt+;Y)h;9m!ck6Pgb=SV% zZbe$|oYrn*FB!YR&T87AOk&sGt9l~pyWMGnx1OAc!LxRnDloR-X-D$9OI@CuSr*z` zrup1nGSV(LibeYXWC`laHrbQmh7%))R68kdARAh+JXI%eR(QB6fkk+2G#2v^e^?sg zV|k#`lK&Z8;_VNDx1Imoef7F}{&Vn{|9L;9E&nsPM4R}z+lYJDh;%7Iw57p*A#kio`!VfZMfT9JNb2h6F1 zf}X@AGpz+$Qm+fdQ^BwX-U`Tm-TqfM@V`**Qyz4|;z+^Jnb-!KptK!c)1g`hCEpsk z6okb(k%bH%ZGy5J>|7SH}tY*RU=Z%J>ele>{!RNzL69hm2TD5uX53(`uG6&5< zp2kX7Io^TKNl5j*6mhhMKR4mznjx<-BFZDal2huWQ^yWgzxw^(3d0Zo>IbO% zFQCt;O8#G&Ns-pcz$OeF!oQj)-7?Vl+fDnvUVPRW0G;)3VeH$On@LA17G1`?EW;K- zZjIE)XW9BlMYvY<%K?Y_j}PrO{bfpv|35<(p;WGpe%$>}yZfvD|I7W|NB{pm%C~RZ z&wGa=3fv(n?^%x?zY^mNVGMt4*?UjTF`R#O2iz>fb&gjIICiO?x z3mN{_h5gJ zLQDZ(Upp8@B_GoEBVFm?^p}G}h&lZd;|=@aSxAmq44fa;sSF@jE8y&su?n*TxN?*O zqo|*f{FE<~rn6>0??c5Kx17w=yKg!{s-|EvjX(S}iUtC^3*cuNWC z_~3MOd=Ae(9KRc#j*bsUy&&} z=zBs)KRrpFpGf(wS3FH=swG4!`zCX6q29xN0eyS9fca#vIWV81?4^$kH5yP=TIvud z_aUqxULvqy;kU|AXFl(Pk9fAV(Q)LLu5M$n=2k{-!BOwxc~_kEJDm!RzV}1xsmJnT zO3V6xqxHU8TGs#lotL{+|9@xyasU5bO56Isvf|%(FaP?wCg^`pOF@kX>;B+19iX_9 z=7DO{KjfNv6_s9dQ-WhRawN_vUe)emE6su^Bp&g;BR6WH0<%*nl;~=FMPb!j!z*-qevf~ZOYQEhuxE|o$$-8 zr=scHkA$xpwxn`HJ+a(>%*N6^a{4bYkE#>sPPn%w!=*i~=FfG9;$!CUAHTHN|BBem zqBN82X0AYs{lBw2*sH|S`Ghwk zP5tRJ4{x%%Y?hn&7+U(Hl{Wovhw^qIpf>%#_iC>i|9SQL<)i+;kJ76DtpKd~0kG6= z{VN!(S$1(barVx#WK;EAxWT4${S1E9P9#Nkg-s-d7M@^3I6oX`&ypai5w zSjghpg*Ssi{wqxx)|u9kQ_f<(Dbf_m3eq?E<(XaX?o6(#N#&_@+l^`L*8LF3ez8V= zsM0P Date: Sun, 11 Jan 2026 20:06:22 -0800 Subject: [PATCH 02/11] inferncepool still work --- config/charts/epp-standalone/Chart.lock | 6 ++++++ config/charts/epp-standalone/Chart.yaml | 14 ++++++++++++++ .../charts/inference-extension-0.0.0.tgz | Bin 0 -> 5396 bytes .../epp-standalone/templates/_helpers.tpl | 0 .../epp-standalone/templates/epp-config.yaml | 1 + .../templates/epp-deployment.yaml | 1 + .../epp-standalone/templates/epp-gke.yaml | 1 + .../templates/epp-leader-election-rbac.yaml | 1 + .../templates/epp-sa-token-secret.yaml | 1 + .../templates/epp-service-monitor.yaml | 1 + .../epp-standalone/templates/epp-service.yaml | 1 + config/charts/inference-extension/Chart.yaml | 2 +- .../templates/_latency-predictor.tpl | 0 .../charts/inference-extension-0.0.0.tgz | Bin 5051 -> 5797 bytes 14 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 config/charts/epp-standalone/Chart.lock create mode 100644 config/charts/epp-standalone/Chart.yaml create mode 100644 config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz delete mode 100644 config/charts/epp-standalone/templates/_helpers.tpl create mode 100644 config/charts/epp-standalone/templates/epp-config.yaml create mode 100644 config/charts/epp-standalone/templates/epp-deployment.yaml create mode 100644 config/charts/epp-standalone/templates/epp-gke.yaml create mode 100644 config/charts/epp-standalone/templates/epp-leader-election-rbac.yaml create mode 100644 config/charts/epp-standalone/templates/epp-sa-token-secret.yaml create mode 100644 config/charts/epp-standalone/templates/epp-service-monitor.yaml create mode 100644 config/charts/epp-standalone/templates/epp-service.yaml rename config/charts/{inferencepool => inference-extension}/templates/_latency-predictor.tpl (100%) diff --git a/config/charts/epp-standalone/Chart.lock b/config/charts/epp-standalone/Chart.lock new file mode 100644 index 0000000000..b14ce74b2d --- /dev/null +++ b/config/charts/epp-standalone/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: inference-extension + repository: file://../inference-extension + version: 0.0.0 +digest: sha256:4a8aeb7ac929d73eab47276d786fd8e49c9c83856d70b612ce020608b83f1160 +generated: "2026-01-11T19:49:00.634395-08:00" diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml new file mode 100644 index 0000000000..490c262bad --- /dev/null +++ b/config/charts/epp-standalone/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: epp-standalone +description: A Helm chart for Endpoint Picker + +type: application + +version: 0.0.0 + +appVersion: "0.0.0" + +dependencies: + - name: inference-extension + version: 0.0.0 + repository: "file://../inference-extension" \ No newline at end of file diff --git a/config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz b/config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..86dc42fd120ed71be2e6f3d02421a85c0cf61333 GIT binary patch literal 5396 zcmV+v73=CBiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PH<&bK5qP{j6VsV{-GvnUH$%OIpq3gKH~!Jdb1RBsF)>bUGfG zTv4bgfCV8XuIu~lHz4>{&7Es?{Mfm6xdE}O3evkeM-W=IdP|%cd zG@y>p2o-2Z>^tBsis#Tr8sTb*dms8SnAS3n?76w||F7ugzks>#VF5w=#%6R%+%0uM z&HO((Iqnwb|NG<4$;0vV6Z^!}A(AZ&WTD5$sCnRHTfwkK-~kT6x{4pm zS}`gIrg-5A)AtrGp+P9Cee_2QA%wCNCrB*{;w$5Kgu2!@^X~h=BCNgaV$Yx9ZrDuq zEY@jCrKx^~uOm$T@QaC*M*D363orwlap1WZcT3a8_6*F0ARhT0OHHb)=X02nZ&BUM zdpSa6imWf#TR>nX-54aSlXI2Y_d^a2u?6E6P<-owqj;IaAS-}lMlG^v5b`m{a}dCz zp2f(&5;0o~ec-Xvub9J#Ed-%1`Xe4ABrC*J$r9kX?-A;L229mZXP;ymb~~Mp)yX!U z<8NbG7n}*Z0~)C2#itn%y1u5m{^>mEE16vCFSa2-Bz0ZRx zz0N>1)7nlAuFi|Bbe5x=P6*Z;2@coZOy^Q9L}-8Lj^hQq!+;Ew3Jpe zS-wUh-qW?HTjpv3RIPv6hKJ=bj7I9s3Clu&mAR&q>TK(FuM{vCR~l>J8i4{LECMm`3< zgqZQM6o-(}<6}ro1V5u;3;cgR4o05{qtC*_6ushNupS$8`a&bq@~ipy_(c1^n*F~6 zzQ^w8K)3M+uHOImx`*!%^ZWl^=cxPsW&gjAQn&w41)%H$z`h@}lc|25)ZDix03i?G zf|oVjn&MIoCAM^NIg&5~A8ZMagYOX=N88J4tq|ACTKS5$Am09j)Ty2L2y6~mKr1$L z8CS38SlF}6OmEigPYmr+)2|anPdvLLgqfj|#mnK|A@*dS8FNDq!+pvyc8pvxkmktv*=S>0YE!kSXy zGcF1o3m>gb$(D3@WC;|4V942Rf%Ump`=*Nnc1Q(b`FH!LEMN<6Lr^3tdc1FXmdsns z;j>ij1(LFFwx>!%W~`6F_t(|3VI^y}xJ8eT?Iu+tZQgHES73K5#jT+D>uKzw%9H$R zzeQaOor7910exl;ct~wv(B$3+bje&|qqptQ?8`QNJ(qP=)v#v&12UGLH!m#{^)3&H z&RBv7bMvZJATV{X$0+0|te|n_maYi8*f_&vZ9lWVw)JQjGUlz1t~@2%Z&A+*2N8{Z zu*sCz=%a_-KsSKTGc0manr2+xjBQ2DdfOaX{Cyk&)34M8%mQ;4gc&YsTVYp#hT(cy zWw?|iLlNUM=9yA)Xb8yeaaOhsH$XK2GMl8F08pq9#&m<1*$`%rOx1y@QAwH0*}^Zp zG9Dh#o51&oKt|iB#e87hbkO6YQ7849M`ViNxAN381Sf(~Qw}BLT$sgq6?Z`}fju*H ze>*0J-xd&vz?_5N#4dJSp>AV@sXp0d~Rk zf!jcZ{8)?eZg~?ny}9+6eU~6EJPz?sIS$=Z8Uj4VvD{g)3WNrFg~lKdrU>S~xJRlQ z5)3&cklP@1$wsH~2y;M`2^1`RIp4rRxA$lgjs?00VXYd@n8x3+ zZvwTM14U(F{2DM??v!%BnN#8wXNS>N?`qcd*xeu2{YX402_u;zSD-h|90G)j1Ju=Z zZH7&?nKV7X4sbI*;HnezwY`?pa52!3koqSzb2b9qU@ETOISY<$sXH_~=a`PD$_K$0 z&`5>G4W`%&paFSO3|EjiXOsxWC6zvBa}YCF1QuS1*($1|X_qc8vOzsbnk`jov%1{R z&%=zjLM}Fa^h2l9t88k{0DEFq3*k@;A?i?F>mNF6j6Qr59G_)D1jX@*jsEq3tg&yY z*);B8K1<&B3w-nS%uXC%{e-IG{#_&Vn<3D}U>0zd)h?k#5bT|Uhwl^T*>SFlU+}DT z8gGN2jn@fTe=cShIN7Uq0HxFP>h$wqJUl%c6etPx^+(R;McNUX67ZZ6Ov`VgcS9^@ z{lp<`RauydfZ_G|jU?h}@#mFHbEkgkoOfx|I25*AzZ6rhCxgqI@nH1j;%rdKoqlZx ziWW@<_~3tr*Q3c`baOqq9NrAB&WG0*R}(#&)izGtB__Zdd<%^;oVvRgYnJP^?5b-)|aiqxpcWgM)(3BY>P4n zuF6}p8mf0YA=V*UKVzB@tsk?0$Q@oX`@>HkUcX5+&cVCzJ#&&*^&_d;_{^zo60`B( zAGMMTje^813=5+LFmn58Wca2k!8pA7ppwQ6JqRaIRt14%1 z%C=}f6>rQ`g96j$vb_@Mc$Ze%a;>1c%Gcbi&$-rX`JFdkl+qJO?J;f2*)>imqXu~r zn|43Y6QS>Rqk~dCIlhP9)YDXeae!VoB(J&HC1GZHsogSSVSCAlx?Cw1^#c&c+NN#d zC;c^NMgmE5lE0B`2<8C~59l*;k$Jc-fmwKUG-gApo-Cs3uT^T|zuY_6J`uDo|EqWM zzL@{j>GodYzxGk;;=kNGP=Q~mbN!9SzGlvZe9o4{0TtD9fQ+z3+Mwp7N$+-R5CPrH zU#HIDluv5}*cqRq{s-i9Ld6sr{~06>Jg)q&0X7N;Fs2R)*c`%er5b2qy(SPx%mWR) z5=cXLy3-8&kAymex=pZ{oOxj0ss`(zR2^Q?AuWT#Z!K)^he z%-1A1T?pnAj|o{R^uQ)>a{&v9yt;Rp_}d(S4@e49^d&uw%>&Iu?2>7|eOv_<*w>)Ua)zIGgQ{LRzSXi&8=-Q!%)o<7*KFy|VFa#jYCt0~t2 zSvobJHGUK!Oeqsto91qD3!H+0FyS=v8JbwJ(OSW}D35eib5pQ8pe6QYl%&g;>jCyj zVA*V*hV2fS^Es)d=db2O`nkh$lcV@J30L`M^QRYqJpGP&|F9wk1dMywjU9|336nQRWSO9Xql4YQi96`PS&LY|g8xbGt+3;PT$ z{$4kC`7O4s99yg;**Z=S@fLxAP{-9z?PN)Ld~ESWl&WVU%IGMHaQkU=nMr5M4bT$! zZRVW|VoQ1>o!pGGQ*F4He3+KJ7jny2>*YTlOn1UgAEzj zM?Y6jy3{}S%cgx_Ek3IZfX4baGxl}Njink>+?nR zBiYs6;y^`d<;>|!uy1mc4P%~x{y@sl@&^)}%)@^RYNMl01Tg~m_-KOf=k}1QAJIyW zMn9gOp#>$sEnvfb*b0&<7X$ODIu!$?K?$5%GRiO;fXjP1!1s-a2TwHw|j0RU{ zgW>h{o^yts$OA<%}fZt z-)?)%#@>ttqW|!Ku;XOA5Vp`pDH;jqC!^DgtBb2odcNzBY;g&RFR7B)Hwttw3zF{- zR2)nuNjnXBPlI(<-f*(YVk}Reu?k*5Nxbq#R`rU2k?kkdD>RAN9%ucPh)x(550Dvz zHCqicDeDG$jZDjCpqgc3HiMO9YOa}~*$jFngEK^uqyw16So@vKrHnf<8B6)9r45U3 z7I;yII-kE?Iv&x*i0=k*AwgS%Ip;py71B8Ox`YqtG7bPW)0m@7;OEm_C>6zzYlF5+ zk*Vc))D=OdG012^nEZQ~f!aq#OoY)Y2MAWE6*CZwV^>9JWx@g5VKQIP(l3W^cC^kP zD5W?|-d3tb6tAH~B_<@{W=YeyLxqZ2wfTBGl8dTyC*U&bb0_dts?eQaD5}x;Y7z9a zu~-tDaT@ye!}NFbvypMScqJA-6i}p;VFbQ#_8lGV?-{E2M0t z-rqgW%sLR&4f|LYRL!a&t-Md^@uWHg-F_f*n5+niC81{yT%VXD+H|K~cTIyN#$U() z+BN1J`0Ca7i3T{2>hCqeg}&``hzqDSV~o%g6K}y`GUE`;nCCnJ-Xe-R6KiYi`!};* zH@J1OXlN-<#m(?~G|8DoPqUIdkBOJDck=5;mL9$5V72dHE$&n~530@1Iy4pp^p zGUn(;&Ver~ynhECfh192cRsdin>@V#;hgY33KM|CLg;WB+fR|37~%MR)|bc0xei`G4oQQ}F-yx+gF9zwV{f#s5nI$Pa%` z+ia4^Z<=N{2&{4N)b4JOF5R{AT;Z%yP@9>HSwn`+7D_6`~9###Ak;XBbMKYrU&#k9k%F~pZ^?#%FzF2D3|D*14uNeQ=J$lLi z+e@ii|4S?W#<_ftkD8$WJuL;A2sZ19*L1nxOjpt*P+j&9s33MhrE9L~HB}-9hb4ip z+AN0T68TD0n#DtlDO%#NZ9@<=!&sSjVZ@0S#;VMV%7lt%7KwFpGd8^>V9f~rbqs@w z9FMJ8^HBYRFI#1NWP&sUY{Nl6U#&~ks8#F9O7 z`fo6giW6uj+*^_1TAx5f2igxSY!C;MV7s+3ov9nt;jta5fg?roK zn=pOO;=A8{&IGfW*GH4HVKGc<^+oAJ&g?KeB+Z%pq0>n&vy8FMgz(M$Ehd{Hji8i} z{*e4ScfpeSE`!cJ-1HPm_d_n)E4#FJbLFnpDnyg+X17>a z!G2}8;b85;h#Hr#0aI5TMaIAiDcCw`SnYiAx!Wo#3ch z|2xNr$A$gh@!`?S`oEX*3JtLkz-fR4Lu$~`*BMZByC9xJsEd4T-(eR5tMv*^W+aGz z%q>8{3_K6H9=lb#fl&7y;^1LHmJsUi%yUeg)+AxxApdaTd&X;%n4fr=;C}3 yw4_e|6>QP6hy%E-s&n>h%UT9DbD;gAyzm<=AaClMk+~yg#md#2Oz zz~qWT4FW6x%26HP-+l)KNs$yOQZJU`rqkvivK&^gX8ZGjt&kFI^Q33x_{m8I{HiJJJi_@`sG(B z6&BxhHpW#P++XCv0r>(#PkRqdyDc09>DP|ku{$jnq$5b6qFX13l3Rhr4E+nJj~p#1 zXv_ubGdJLbDKsF?9f%ggKJ-wDaJAGdfL;QoHO(V?Zf^YFyUNzT0zaTwf!x1wI2#jh zLtRj_{*RB34u1;k|NEm(_htR>;(B6;=4+^_j;a&?Nv;H3*5Md4tT|jLQE4arq3kSq1L!^7Ok&PZ7gn9N?>+T0>6TUw^z%-cP zgQ%MsS)$X7N;Bh(9>JssCT!O_Bgtprx! z&GMiY9%mr7^>SBj^bP$Uas^rF4)+5tiGqKB$r8A5yL#@@rvG2B*is;wCk@~XqJ>)u zDEp`y5(JV^&iY>i$^~32;@ri|DtnUSeCMDV&?R_fP%gn+0s@c6gb_9SgcXc%XaE#$ z=6~ghe*ZiOZS@|nA_9mw6v$qQkCMXA7O4F)XLg@v!unSdK= zg)^M8{@Lhi+tU3}IZOp2vVyC_pnW+gm_@1)O1p2L|$dkW|9@ zv@BRNeci_*-Yx4>JT!)(cnp>{mr(@-kz8++c~n?qSL z*(E*EH>CJ~u&+g!5#|C@s5=Li($}&qs}&Q8nY#>&BrZ63E=diVI*IqCMF8#2N3&uA zR`k_s*jzrUDn@76oZ<1BOLkX$OO|$!5Me2S2_wu~gOvg^576s7n7u=>j)khKYz^5kUj)Q z09KHWD}u?Xn~Iukh+>JQ#~WeI1Max3%W}@(pzwFC?R3Knt-?`5~4mn z{`xbEDWebry#;j8>HNcR)}&_=&bE@+hYKS!K32};A|v7}3V8zUqD0t_jgTfPKSr2Y zzxtv%+*`z+CLfaJW&%GkB@Uc84iA~S+)RI%2|49VL&bX8XjxQRq<<9gfrkpoNi-uq z5rT_o#wceVNQFG1HB114a5q^!0e592Q?c9yoYNSO0^t)Dt*B4B=OF5M4Gw3n?4g6h zPAf?rFog-rWZLST*J5yx3308tl5GG$x6SrOi1^rR+&yYs(ti+AI^dKzvy~vtyJh3~ z^YJBD0|7F!JB4VzE`J|6k0N{@GdE9be6yFir5UIxZje1#l>{t2na8j1WRcHZP&?mn z8C8{ZZ9popX&wczX_3%44$H3KgH9vDno^;adVyont(7U+kPb?wz(C4@;I{?V=N|2v zE&KLmxhC6H*{4`(#^G6@P9e8X1CMRXrOuvM1p! zdVJh((lplRy(VoH_BK-821>A+&MqoF&9D8p=)gv2U`!^U&)fxyO#F;SvjETq_lSeu z?nhBaw(DzOw>3?pmc0+CJ*CuNFckF}H;7;H$ry9 zFu8P`*;MapG=B^@r>mn|6#xCVsB1@)h^_;0$e1|jBgLL9o51Ip7QHAfD`{>eUTKNN zZH_GYejNiVx77vA3JVXE6`8e-up2;w$YNHxE(?-jh|x}~r4>gB0of3{*w=)By7xRV z*P$7Y>Vq&%;-Dy)buddMN%6>8zC8KdzaE^N_O*9`rhhn&EK5m?fY;EMNde^Ki$R5~ z&(jx~bVs%zz?1{DE1BjA9tMR+?S}{SMh289)ZTB`V&1cFv`J~NXj9^$!S9PJyR8IK zoLO2@xDd)JF56@XiYrX5=;e-U3HUt(siftD62vJEicN2nP&btNH*w1w(@;Jn-*w#j z;T{u3n17cUPl9d!N=RZ4tN?@qOzDrc81JSxQP-PW%ALCuap87^e=Kkm&6f~BxxhL^ zQOlWa+wD--k8n+>NnH9DDNGL1_ zBN-!4p*PLaU4*GUbWk?53ARkMYMGRYl_Xy3`0@%3u#^b9E{XNZI5H+vf+ICjLXc@N zCO}<87@8W&XV|CtSy(yUZvdB!Zl2gLQ3o;T2CIB|)UO0~v|1{O`V%_hCLfdtp^@@W zI)98YmC%4ZE#XT@^f@Dn3r%GpxDRRqp|o+RCd;UfSM=O0>JIfJm4t(HZ`%_=eu-x? zE!s@>(2t!?x3a4>0Zi4T7Q$gEgm^%8y?^YiF#GUT34EFbA&c{q1T3ooDapW9GJI*g zy9G_5S>G$LEhy)8nj7q;R25SXB{IJrf`78w@G!ajWKd)n&zYJTbkC;~E>(O?sm*L6PsDE*D-5-8AKkZjCr&mjV(N)R-?|=XI z;A%MP4{xqU7lWJr<=Nos{Bl%|X1Vi}42dhC{jZVNC(_LimEm88l+p0yw11saR54Vp zXs~2F!~XTv&%@LH&Bi{rE; z;4>WxJo9ybBhN|nw-CxLA-XC_-fYOZ-c858nMInvIW2vSJ!k9+Z&u?Dd4Cd{j$W1% zQ9kTOFL=wz2`G9~Pg4QL9(vu7yyjAu)@J5~cEd8C%_Soqa-~?*Pe4+jKJSy<>8-dh zQb?tXVh!0)oTjBZX}8SHRSC?(tD`YnhxlY=h%eVOT{ZQe-W_bd5xj2yXZQH~;{MOh zOa13=uDbe9?+#4i=lu1<8-L30GcV-=!KcIpQ&_Zz+EI^rhniP?<=MjqJ0O~+9nFhS zlbRa>cCSxR?*j@1VQP%p|Bz`OQkV?2#O=ZxFslp-I0B-|v>IqpyCx8aoI(k_1(5Bs z_g~Gx|3sKenAZf0B{&7^)^u0}r5W&&0hKZ+`c}y$&kyE!dmm8S^M3eEyuF`CrY#Q_@y5rwKW^skC&1b5jE(?3B!X!!^f#-uB7d%0Cgk($4rV=MWMP+8 z1eG>zJEJM=Tai<~%RM20`kLl*nh^`-nxjRP^%+ zhw_il9X9=KuA2CN0yF?2Z52G-`A^;Vi}C-_;n7R{zkiGC;bH%6>y-OGXR*81db=Ml zwInal)LSzVLty7U)r~U4Nov#_NqCZ;PKQcSrKibhow(4YeCvAVV=H3Dc|s9NDT-9t z>U=TdNTa%2EKRXl&YjLwTc;;kaZcOl8!~y8Z%7H^NB=_Rp!b~^Vhr%{(Sjhz#gJwk zv86}DpMOqHQOL;eA*?wL$3U{QF|eL$PzgXL7r+^lu?VvPxO|oaf}kCf{F=>+rju?z zyhnv-+;B22?_PI-6kX?pd;4$E=>(j+D6v+kd55n=Dv3@m&-BwwSv!%MTu3VGcU@5* z(|vLE>HHLZyc(kG(aGi6$;H)WKdSove0A1OXn%ts*l6WLHs6v%Iyf2jFGuLj`Q^v{ zuzz{lA6#8s?B$TB@U`S^Mn)z#^R%Zi?Eb2;{|kq0tm6qRxBmy-RAvw70mxnck8{B(4Fbs1HQ^@le<4=-BtqN&J! zM1L`aHX(#9En9AMjEe?Uvb#%*OtkT4J9y^c7IDS(>O^A-nIbe9VR3g;nukV@kMABH zVxMNBRM6Ysr`*AG!lmjxJRt14*&#$dN~~-%TMKeFGI4yAw+?uN@HKE zUVl-Lf}n2-dNTXu=au{Os-vYR94+%07k`(}wW-5EY1#E!UiFgNm(3T|D>RDPo@~2Z zAUb7OGC`J%TDBT)Wv!d&6}GxI1J!IRY%^HtR@oJ|$TowX*+Ls3Nq03^#VrF{SxcF8 zVmg=lQLD7PzgZ1Q9qRlR`i0jcWwq|xL0s4)UxGQ8KHL`4Wc&L9K48u5#xfOUz zRp?eQ6xC>Qx(oWH9ZreE1&ea~(SP{^=$G~!%#E3$9u9)`JJhx;?YHPp(eDu09dlEw zNc}09$8EWJk;@fPwp8zLA7{o6#BHN7E(&UiRnT7Er}^ti^AdFPiOgZLVhPwe@|X3C^SXd(CiRY`dJ|0_u`EMre!)4SxlUCIaMy zQ}+q*7E#o>SQ}&Cy_@yA!Kt%FN6Ta?ZU$GwQO+%T8cXs#E?&mp$=i=CJ>{8$<++2E zxKoupsCGN+{Ip6x)U>|IxT9+&2fmn{3R}nsq@h_FCE$4sh^8fjRJEB*BD{ly>d;i}#DxUw` zJvcsm$^Y--s;mFk0+65noVHo#KmyCMvPEExi>J1Cg8ag1G|yGe8W*)$xtcZ9b~qPq ze3EQ>BGs=sze+cYh?M3@Qs9QlE8O0pwmDo_e=^4$>rX-Q&;<$#G=HcfEoF$K^N<%1 z&k-1~=-0wjCpB-+KI6&OhK4ggFLkRd*4(JbB~z%^r1Gv%thd#rP+5ET+A@EBIG&3@Y|`Y|NU6>K}aCXvasUNK@jr4)#}L>E841Nh@9O z^Q|XY*SQ^VS5|CENgegbbAOtRB|GHw-(VdT7tmI?w`7NF{eQOFzt(Rky__8K=f7(F ze~Hb`;&>?Q^Cjas$53FNdRy&*ECAIWtyXy46naa&lKy~{6@c6Kp|2h8tW&g)cu3G(X1Yq&U zKZ}0r-@#!`PJb84ADpqZQ!?HcYP}1`&%;+`1%f9>*aw0u?r_>eqtijLOltXg?*qY| zs61r8XY$8Rr<;UK3D${Hft5eSWL=~&lm(E?9NUQC=L5|Trt6$$tiB_;(t5#!_)>5ja2X9&1I8V!%mv+_|nHL8^ ze5`fu=DKg8%6^Zg^vJH#v$=A|Y89$Uw-YTEEZA@SHkzz$I8oP&$AFn5j$&tEi4<@5 z1hF#FK7U?J6h2|K+sjqF%Iv>A0qPTvaRFPwQDgr*M~6p+`0wcO{fqtI$@K~iuu>qH zM2aIb>F9m}4Bdu=x`cTsz|I}^K-#TW=C}CcpSzPtPJmM6QGRQ517Y4fB!EW0vmWX1 z%zez<)+@xoi_|T869^cSuizSk_W$;5bj9claeo%|)Fhz*1fqn2-LlWFZ>|*=(0Ya9 z(~7>FUL%(X*|I&N_M^Xs{FZ(DhuDw)CO0PDzW$SZl~cAq2Xu>_yD&gwLP56P+VVbV zy|r)gUF)r_eEpdV;f(7BBMCWIH+0r`wH*i$TCNAK8(_HZ1TJ}^r+=cxY c?HBcOy<9KXpZWUV00030|6|mxumGL_00HhhQUCw| delta 5038 zcmV;f6H)A?ExRX>JAXazbKAI%``Ld5&h4AmbSlO9(RS;Ze#j+ixgJvLs8EoTSO_UicxgB@zHZ5ClMwrhJMLxrYvZ)yNeUeEV=l zq;?l1WIx?W84L!4SNr?P|G{8T{Xf_p?EJLza%XR6@M{0Hd4In1a%cDTPcXO}^y^Ek zG?70IZjGzjxj#uE5&ewP4Eqq~yFC&``PYFva0fjfl_zPWlSc>e4`d;DMnGUHB>Xd; zpya4g^*9M}2sQH5(~Ag)!01F0x#-Qy@E@81|IbU^{MQ&pjA&Hb7oOl#8r)JBw9Nlk zuU=N?|Ld27$A9^MFXh`e2Yj4Tj?k}>1=k8dADrv!UP)K%^e!p)hj5rw_(Y;!h?@9B zlVK0QDuZv|fO3z;K0-er8n4L0ArW=z5SbQzaN+toZLT8XSs3mqG9(J!q?zs6i2ISC zT#u=DiPH8nd7WY!CSM$|>g`L6F*?c<5@qSd<=pXzH$x}Zl+k|^qe<)PMG~jKZn-w{J|ioE==h`)@SWAG0oz7!%nKYw@rUr2+Q zwlHd66(9}VpPL|!PZ}ZRuS$@1i;!jw1g$=ArhjJEK+5X#b~?_Gp=X5sOiQ9MSd>An zye>fOo9V9m@EyLyLSqrSC&EZ5s_~zniv+GbZXUb5>VKLUTMMM{rURUDGI47G6<;+& z!bnjj_~>iIgv2vVy~~tYb#F?XAM7*(I>w+5$}t9OK#=K_a;g{aiNH87+!&I^lPI20zo~KB3EYUEp*ji#lBpwk>#uoCjlT6{wX<1p# zOu0%$e7CMe+cMV*plSUpHr!hs!*ryc&3}mcCoa~E81&N)AI3_H(5~nGttFH-gI&`S zzJnJ3Lqa3MoN^zzhTSEwoW7ppIK7le!u@qv6!p;~OG)Zb)yu3eBLZ-LHky+Xu%WHi z!shy3H8Hxt<^qpzIA!mPZ`st2G9nx;3FVXrH(;eCJV1DQiHjHTbZ+_$!?zo8Ab&%C zhH{P><1;BvAj9=_LQM+4pkXWge>e`NpQzvu!owwc%SE*r8%z44MyAbI=lc2w`F}0` zzmE|UixBy3;(_b^|J}X8t3lcS-yQ7l>^=Jb_fgvX|D^z|#sEkZsqK8KALTXgI}!lE z$OKu@bnA-CHZ<6li_5-&sUq|YJb#H1qaI1!%Vw>R)hk;0mNu2SKM8eD;CzPQ#Znqn z5<*tM4E<# zL4 zreJvf&Gc#82(XCy;p&FMz0| ztrX0-01!uhsb^OrX1-y|+<%Nn)=vuyCTN7I``D+R*;l4xc zdGH~dZr1UWbL!!N=ZTo>qxg8zYO_h?J%widci1C4VClQYnQ@MvMs_ zpwbXXqhTVHRQTEKiG;5bnM&m@5`v|8G|G_jWJbL=Eyqd2GxP-a)ev^}2E8nFz%`+~ z5NT^SUaQVQ!Nql!O11+4Q@7X}fQBUK+&rmV(|#N?HWrL}i;W;Hn-%@}57T3z#}ZYt z?o_<_ws_>yiwHx){eLp8>BC{+kzt^rxI^?{Qxb>_R2jc{QbfKqLEUo2byPLdbqi9F z&&w!~&#Q!{epqn^?+iK-)|CpQ)GHiI9&Jp?E$Lu%jvSR5OEIaizVzz8>5@o%+CW(O z-ThOhL@Yg2Rie_z_f5}SusFm|W(lc~RD5$+R8<~j8X2Q#Q-3|1RB~tW1g@{QyHri} z`LIh}O@dpgZVM&aOlMb>p5fQ_bJ%g=2*eQBAi9&(JA%d9V3CN8^DJd7BeXwC#&vcR%y+O+Z|c<{VWAm zzgAZ;Ya#>GPO_}s3cCd~PFBn&(`7|6EHTDtb&TSuA)x4DSKC?_(6nAU=EgM>Nq!Jv znIDumvmW8OBx#Wt%f|`Z9b`2zL|{ z0&F@!zmaKy6me8})&KSlo~ejY4gKwYE9Nct!YnEM4cC;kYxre#X7{zET5!imijYz} z)oGh`K?_ZolWgwzj>Io9Dy^IlwWMCPQ+$4*rM_S+yhv+a*ox{~_TBicZ?6c|lm~_W zWZ0HlLVt<`=tL+z6vK6KjnGcl1+)}N|?e9AQ&`wf~JGivA0~wX4cp=hamdPQ!84PwC zn>sTjOwU>&?A1a@JJi+}5+Z?-E&eGfftx%Gv>vvLwRocO>DW^DHeUrW$6FnQ|F>|-ccC`eIPmV4O5kD4x zS;utW3|q;Mm^Z}h;ciwh%afDy(Z`Fk(dp+8hoeU33|r~1no0%WqyHYCoSu(PFMm$X zKaMX($4BFn569>AXx1A~S(k)HHu{=aeKJcFZg&3)q@14~9FER%iW<5qG>g{s=X7*- z^6Tm0=;Gk~{Pe>wzn+iIE{;!*nz^^RXjRfVRx1=_PetgtT=;mr>Qc5)eYf*r6Qb)U zOc$c-$L#OMb<8J!^6BlV#eI#w0NcgQyGB$4gy;btg4nB=PHVIxg z;m?w^?FU>F_h>|aad7(nE=f3$!3O&`EoN7FE2N{D#pfHDRZUUoCD&3vEpzy6g9^_= z)85K+=KZa>a%+fg3X*s0a;bIKzTa*|TJD_IZeuSQyTQ(C+MrBg*WRmoB7f?;-D!ij zo}7rmvv!&)Ft*@nNAkK$U7nj+7TQ~;`P^PI(k?fOMf(6`3F^x>*^}Xh6C;OIJ1K4; z8(OeDRVQy&c(^HnMR;vA7V{8)SQ_GEd7#pg{~283?GJ*to&Vf@^}2fgbMTn|c|WBs z|1-EmoA|lhet5>jRbf*ul7C`OeYA;1ThLEhEE}|J_0>lYJDh;%7Iw57p*A#kio`!V zfZMfT9JNb2h6F1f}X@AGpz+$Qm+fdQ^BwX-U`Tm-TqfM@V`** zQyz4|;z+^Jnb-!KptK!c)1g`hCEpsk6oks)JaIh%%u~ zr6iD?zb)mA0Ekg-pv<@Q+>s8IhlnH^4D2huG6yvv7z z**)d8LpSuYYQx%bEb+HYOQ%7tJ=1Yk%FnD%_O8si&Z*pu2TAOd>ws)IH6Jv89Dr~x zbkRJ!%=3?B;acPg+vHJx{@i?O=(R7ItR&xFAY;EwX)(QEtT;gH~R(}@mGmHG^y16SS#B)n* zar4U-vg2VaRvHr>*FLrLCFT0MC*$;}t^-`~(=^HO*VB)Mgzxw^(3d0Zo>IbO%FQCt;O8#G&Ns-pcz$OeF!oQj)-7?Vl z+fDnvUVPRW0G;)3VeH$On@LA17G1`?EW;K-ZjIE)XW9BlMYvY<%K?Y_j}PrO{bfpv z|35<(p;WGpe%$>}yZfvD|I7W|NB{pm%C~RZ&wGa=41Wbr&9&b1?R2VTae=PZn)MhW zw`{52Dl(jfM(ve^2l?%EP>U+RTF&Uil_vE^*9#we2{S%q6hTf=qRKYsi|t45)lG=9 z$z~;Ux{z(1-()2O>%(`jahC6(B@L5*G5279kU~rWUSB&HMI|57_9I>C;q;e-Lx?&3 z65|c~;eT03j#&(xAJwT0AXh8k?2@qxvjeztlmnxvpOXBPFO#OTW_3fOowwo^>H65V1lIDSG(?8^zdKHykb5nw2H*zG- zDPGm?V&wM)HKM9nJZdqeOC0e$RH_@s#(kH5w(-(mxAW4tq4K~YvFYB-P4gW$jNor# z7&Pqh+?qAtnlF63)s9agNTWy}o^8s~t%u!{uAT79t*4^t+>eB>8n&czLp`zFe}Bxz z(miteFEEd)6X;I3w?GO(caDo?t^bKOASzk}T(zYvR7@u@v4|$l}?BH-ka`D@_^Jnbwh0 z&SJhP(iF-H(l`0#nO*MgOs=X)<*9VrjcM%G{Se50u||HV(k@DA805qF(SH$XF5UEh zYWCc48ED)8-`#s%wg2}9uO9XPeUy&+-$^Cl?(F}YTJQy^o{%Ui$5~SCksfKeyB*{d z?StC&-IQnp+l^|qi_(phYVE-r=Pu6E*yW{PT#W3-L6lx=QF?gOU8uVI=xUGb)Sk^L z9jmDv%()wHv9f~wE+)xf-G7Aje#{%e77fv>O}k1YM`+A@uAh@U%u3> z|LzP~NCPe;-U*JD^?&ek?`8G(-}m+(*Z+GdPhf0+XejxaAv@@BHA4=Qm@=R803zaD zk^q(4dt!f!C%x`gftn#>5HK;Zx`FcG1xRGc@2n*HyYQTFzxM<<27igVg=hBHhS0YQ z+J9`haKhOF1jqchk0O-7C`Y&F9-UpBX(6%q1k%$ApAXN#r&9IYfa>k!uO+|dPW~;o zlfT)6S+H&XWM9>sZ!ZB&h<6!Bko@{*@42h4qTX|NLN0sHT^*XwLeil3{C|5-;4_g_ z#0owfja1L*^xx6bJtvp?NVaWtDSq#{bL9yiw;#3FV|gr(<&RwcF8~1l|LL`|c>sn0 E0E9&)SO5S3 From 0a48a9c17ebb2a5c78d74e7887955da327a94976 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 21:12:06 -0800 Subject: [PATCH 03/11] fixed part --- .../templates/_deployment.yaml | 112 +++++++++--------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/config/charts/inference-extension/templates/_deployment.yaml b/config/charts/inference-extension/templates/_deployment.yaml index d1729bbda4..cfc69415ce 100644 --- a/config/charts/inference-extension/templates/_deployment.yaml +++ b/config/charts/inference-extension/templates/_deployment.yaml @@ -52,77 +52,77 @@ spec: {{- end }} {{- with .Values.inferenceExtension.sidecar.livenessProbe }} livenessProbe: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.inferenceExtension.sidecar.readinessProbe }} readinessProbe: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.inferenceExtension.sidecar.resources }} resources: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- with .Values.inferenceExtension.sidecar.volumeMounts }} volumeMounts: - {{- toYaml . | nindent 10 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- end }} - name: epp image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }} imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "IfNotPresent" }} args: - {{- /* 1. Determine Model Server Type Logic */ -}} - {{- $modelServerType := "vllm" }} - {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} - {{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }} - {{- else }} - {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }} - {{- end }} - {{- /* 2. Mode Specific Flags */ -}} - {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} - - --endpoint-selector - - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }} - - --endpoint-target-ports - - {{ .Values.inferenceExtension.endpointsServer.targetPorts | quote }} - {{- else }} - - --pool-name - - {{ .Release.Name }} - # The pool namespace is optional because EPP can default to the NAMESPACE env var. - - --pool-namespace - - {{ .Release.Namespace }} - {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }} - - --pool-group - - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" - {{- end }} - {{- end }} - {{- if eq $modelServerType "triton-tensorrt-llm" }} - - --total-queued-requests-metric - - "nv_trt_llm_request_metrics{request_type=waiting}" - - --kv-cache-usage-percentage-metric - - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}" - - --lora-info-metric - - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet. - {{- end }} - - --zap-encoder - - "json" - - --config-file - - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}" - {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} - - --ha-enable-leader-election - {{- end }} - # Pass additional flags via the inferenceExtension.flags field in values.yaml. - {{- range $key, $value := .Values.inferenceExtension.flags }} - - --{{ $key }} - - "{{ $value }}" - {{- end }} - {{- if .Values.inferenceExtension.tracing.enabled }} - - --tracing=true - {{- else }} - - --tracing=false - {{- end }} - {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} - - --metrics-endpoint-auth=false - {{- end }} + {{- /* 1. Determine Model Server Type Logic */ -}} + {{- $modelServerType := "vllm" }} + {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} + {{- $modelServerType = .Values.inferenceExtension.endpointsServer.modelServerType | default "vllm" }} + {{- else }} + {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }} + {{- end }} + {{- /* 2. Mode Specific Flags */ -}} + {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} + - --endpoint-selector + - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }} + - --endpoint-target-ports + - {{ .Values.inferenceExtension.endpointsServer.targetPorts | quote }} + {{- else }} + - --pool-name + - {{ .Release.Name }} + # The pool namespace is optional because EPP can default to the NAMESPACE env var. + - --pool-namespace + - {{ .Release.Namespace }} + {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }} + - --pool-group + - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" + {{- end }} + {{- end }} + {{- if eq $modelServerType "triton-tensorrt-llm" }} + - --total-queued-requests-metric + - "nv_trt_llm_request_metrics{request_type=waiting}" + - --kv-cache-usage-percentage-metric + - "nv_trt_llm_kv_cache_block_metrics{kv_cache_block_type=fraction}" + - --lora-info-metric + - "" # Set an empty metric to disable LoRA metric scraping as they are not supported by Triton yet. + {{- end }} + - --zap-encoder + - "json" + - --config-file + - "/config/{{ .Values.inferenceExtension.pluginsConfigFile }}" + {{- if gt (.Values.inferenceExtension.replicas | int) 1 }} + - --ha-enable-leader-election + {{- end }} + # Pass additional flags via the inferenceExtension.flags field in values.yaml. + {{- range $key, $value := .Values.inferenceExtension.flags }} + - --{{ $key }} + - "{{ $value }}" + {{- end }} + {{- if .Values.inferenceExtension.tracing.enabled }} + - --tracing=true + {{- else }} + - --tracing=false + {{- end }} + {{- if not .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} + - --metrics-endpoint-auth=false + {{- end }} ports: - name: grpc containerPort: 9002 From 8e809af7aa7ae02bed1ea506a8ea23b0c7167773 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 21:44:24 -0800 Subject: [PATCH 04/11] epp standalone also works --- config/charts/epp-standalone/templates/epp-rbac.yaml | 1 + config/charts/inference-extension/templates/_config.yaml | 6 +++--- .../charts/inference-extension/templates/_deployment.yaml | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 config/charts/epp-standalone/templates/epp-rbac.yaml diff --git a/config/charts/epp-standalone/templates/epp-rbac.yaml b/config/charts/epp-standalone/templates/epp-rbac.yaml new file mode 100644 index 0000000000..3bf3469642 --- /dev/null +++ b/config/charts/epp-standalone/templates/epp-rbac.yaml @@ -0,0 +1 @@ +{{- include "inference-extension.rbac" . -}} \ No newline at end of file diff --git a/config/charts/inference-extension/templates/_config.yaml b/config/charts/inference-extension/templates/_config.yaml index 38bba5aec9..a2248abb14 100644 --- a/config/charts/inference-extension/templates/_config.yaml +++ b/config/charts/inference-extension/templates/_config.yaml @@ -56,14 +56,14 @@ data: {{- end }} --- -{{- if and .Values.inferenceExtension.sidecar.enabled .Values.inferenceExtension.sidecar.configMapData }} +{{- if and .Values.inferenceExtension.sidecar.enabled }} apiVersion: v1 kind: ConfigMap metadata: - name: {{ include "gateway-api-inference-extension.name" . }}-sidecar + name: {{ .Values.inferenceExtension.sidecar.configMap.name }} namespace: {{ .Release.Namespace }} data: - {{- .Values.inferenceExtension.sidecar.configMapData | toYaml | nindent 2 }} + {{- .Values.inferenceExtension.sidecar.configMap.data | toYaml | nindent 2 }} {{- end }} --- {{- if .Values.inferenceExtension.latencyPredictor.enabled }} diff --git a/config/charts/inference-extension/templates/_deployment.yaml b/config/charts/inference-extension/templates/_deployment.yaml index cfc69415ce..e99a69df2e 100644 --- a/config/charts/inference-extension/templates/_deployment.yaml +++ b/config/charts/inference-extension/templates/_deployment.yaml @@ -79,7 +79,7 @@ spec: {{- $modelServerType = .Values.inferencePool.modelServerType | default "vllm" }} {{- end }} {{- /* 2. Mode Specific Flags */ -}} - {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone -}} + {{- if and .Values.inferenceExtension.endpointsServer .Values.inferenceExtension.endpointsServer.standalone }} - --endpoint-selector - {{ .Values.inferenceExtension.endpointsServer.endpointSelector | quote }} - --endpoint-target-ports @@ -207,9 +207,9 @@ spec: {{- if .Values.inferenceExtension.sidecar.volumes }} {{- tpl (toYaml .Values.inferenceExtension.sidecar.volumes) $ | nindent 6 }} {{- end }} - - name: plugins-config-volume - configMap: - name: {{ include "gateway-api-inference-extension.name" . }} + - name: plugins-config-volume + configMap: + name: {{ include "gateway-api-inference-extension.name" . }} {{- include "gateway-api-inference-extension.latencyPredictor.volumes" . | nindent 6 }} {{- if .Values.inferenceExtension.affinity }} affinity: From f3b1992a6bbdfc7781ff6ba0545112723fe3067e Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 21:52:27 -0800 Subject: [PATCH 05/11] fixed format --- config/charts/epp-standalone/Chart.yaml | 2 +- .../charts/inference-extension-0.0.0.tgz | Bin 5396 -> 0 bytes .../epp-standalone/templates/epp-config.yaml | 2 +- .../epp-standalone/templates/epp-deployment.yaml | 2 +- .../charts/epp-standalone/templates/epp-gke.yaml | 2 +- .../templates/epp-leader-election-rbac.yaml | 2 +- .../epp-standalone/templates/epp-rbac.yaml | 2 +- .../templates/epp-sa-token-secret.yaml | 2 +- .../templates/epp-service-monitor.yaml | 2 +- .../epp-standalone/templates/epp-service.yaml | 2 +- .../inference-extension/templates/_config.yaml | 2 +- .../templates/_deployment.yaml | 2 +- .../inference-extension/templates/_gke.yaml | 2 +- .../inference-extension/templates/_helpers.tpl | 2 +- .../templates/_leader-election-rbac.yaml | 2 +- .../inference-extension/templates/_rbac.yaml | 2 +- .../templates/_sa-token-secret.yaml | 2 +- .../inference-extension/templates/_service.yaml | 2 +- config/charts/inferencepool/Chart.yaml | 2 +- .../charts/inference-extension-0.0.0.tgz | Bin 5797 -> 0 bytes .../inferencepool/templates/epp-config.yaml | 2 +- .../inferencepool/templates/epp-deployment.yaml | 2 +- .../templates/epp-leader-election-rbac.yaml | 2 +- .../templates/epp-sa-token-secret.yaml | 2 +- .../templates/epp-service-monitor.yaml | 2 +- .../inferencepool/templates/epp-service.yaml | 2 +- config/charts/inferencepool/templates/gke.yaml | 2 +- .../inferencepool/templates/httproute.yaml | 1 - .../inferencepool/templates/inferencepool.yaml | 2 -- config/charts/inferencepool/templates/rbac.yaml | 2 +- config/charts/inferencepool/values.yaml | 3 --- 31 files changed, 26 insertions(+), 32 deletions(-) delete mode 100644 config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz delete mode 100644 config/charts/inferencepool/charts/inference-extension-0.0.0.tgz diff --git a/config/charts/epp-standalone/Chart.yaml b/config/charts/epp-standalone/Chart.yaml index 490c262bad..07f84dc1f5 100644 --- a/config/charts/epp-standalone/Chart.yaml +++ b/config/charts/epp-standalone/Chart.yaml @@ -11,4 +11,4 @@ appVersion: "0.0.0" dependencies: - name: inference-extension version: 0.0.0 - repository: "file://../inference-extension" \ No newline at end of file + repository: "file://../inference-extension" diff --git a/config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz b/config/charts/epp-standalone/charts/inference-extension-0.0.0.tgz deleted file mode 100644 index 86dc42fd120ed71be2e6f3d02421a85c0cf61333..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5396 zcmV+v73=CBiwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PH<&bK5qP{j6VsV{-GvnUH$%OIpq3gKH~!Jdb1RBsF)>bUGfG zTv4bgfCV8XuIu~lHz4>{&7Es?{Mfm6xdE}O3evkeM-W=IdP|%cd zG@y>p2o-2Z>^tBsis#Tr8sTb*dms8SnAS3n?76w||F7ugzks>#VF5w=#%6R%+%0uM z&HO((Iqnwb|NG<4$;0vV6Z^!}A(AZ&WTD5$sCnRHTfwkK-~kT6x{4pm zS}`gIrg-5A)AtrGp+P9Cee_2QA%wCNCrB*{;w$5Kgu2!@^X~h=BCNgaV$Yx9ZrDuq zEY@jCrKx^~uOm$T@QaC*M*D363orwlap1WZcT3a8_6*F0ARhT0OHHb)=X02nZ&BUM zdpSa6imWf#TR>nX-54aSlXI2Y_d^a2u?6E6P<-owqj;IaAS-}lMlG^v5b`m{a}dCz zp2f(&5;0o~ec-Xvub9J#Ed-%1`Xe4ABrC*J$r9kX?-A;L229mZXP;ymb~~Mp)yX!U z<8NbG7n}*Z0~)C2#itn%y1u5m{^>mEE16vCFSa2-Bz0ZRx zz0N>1)7nlAuFi|Bbe5x=P6*Z;2@coZOy^Q9L}-8Lj^hQq!+;Ew3Jpe zS-wUh-qW?HTjpv3RIPv6hKJ=bj7I9s3Clu&mAR&q>TK(FuM{vCR~l>J8i4{LECMm`3< zgqZQM6o-(}<6}ro1V5u;3;cgR4o05{qtC*_6ushNupS$8`a&bq@~ipy_(c1^n*F~6 zzQ^w8K)3M+uHOImx`*!%^ZWl^=cxPsW&gjAQn&w41)%H$z`h@}lc|25)ZDix03i?G zf|oVjn&MIoCAM^NIg&5~A8ZMagYOX=N88J4tq|ACTKS5$Am09j)Ty2L2y6~mKr1$L z8CS38SlF}6OmEigPYmr+)2|anPdvLLgqfj|#mnK|A@*dS8FNDq!+pvyc8pvxkmktv*=S>0YE!kSXy zGcF1o3m>gb$(D3@WC;|4V942Rf%Ump`=*Nnc1Q(b`FH!LEMN<6Lr^3tdc1FXmdsns z;j>ij1(LFFwx>!%W~`6F_t(|3VI^y}xJ8eT?Iu+tZQgHES73K5#jT+D>uKzw%9H$R zzeQaOor7910exl;ct~wv(B$3+bje&|qqptQ?8`QNJ(qP=)v#v&12UGLH!m#{^)3&H z&RBv7bMvZJATV{X$0+0|te|n_maYi8*f_&vZ9lWVw)JQjGUlz1t~@2%Z&A+*2N8{Z zu*sCz=%a_-KsSKTGc0manr2+xjBQ2DdfOaX{Cyk&)34M8%mQ;4gc&YsTVYp#hT(cy zWw?|iLlNUM=9yA)Xb8yeaaOhsH$XK2GMl8F08pq9#&m<1*$`%rOx1y@QAwH0*}^Zp zG9Dh#o51&oKt|iB#e87hbkO6YQ7849M`ViNxAN381Sf(~Qw}BLT$sgq6?Z`}fju*H ze>*0J-xd&vz?_5N#4dJSp>AV@sXp0d~Rk zf!jcZ{8)?eZg~?ny}9+6eU~6EJPz?sIS$=Z8Uj4VvD{g)3WNrFg~lKdrU>S~xJRlQ z5)3&cklP@1$wsH~2y;M`2^1`RIp4rRxA$lgjs?00VXYd@n8x3+ zZvwTM14U(F{2DM??v!%BnN#8wXNS>N?`qcd*xeu2{YX402_u;zSD-h|90G)j1Ju=Z zZH7&?nKV7X4sbI*;HnezwY`?pa52!3koqSzb2b9qU@ETOISY<$sXH_~=a`PD$_K$0 z&`5>G4W`%&paFSO3|EjiXOsxWC6zvBa}YCF1QuS1*($1|X_qc8vOzsbnk`jov%1{R z&%=zjLM}Fa^h2l9t88k{0DEFq3*k@;A?i?F>mNF6j6Qr59G_)D1jX@*jsEq3tg&yY z*);B8K1<&B3w-nS%uXC%{e-IG{#_&Vn<3D}U>0zd)h?k#5bT|Uhwl^T*>SFlU+}DT z8gGN2jn@fTe=cShIN7Uq0HxFP>h$wqJUl%c6etPx^+(R;McNUX67ZZ6Ov`VgcS9^@ z{lp<`RauydfZ_G|jU?h}@#mFHbEkgkoOfx|I25*AzZ6rhCxgqI@nH1j;%rdKoqlZx ziWW@<_~3tr*Q3c`baOqq9NrAB&WG0*R}(#&)izGtB__Zdd<%^;oVvRgYnJP^?5b-)|aiqxpcWgM)(3BY>P4n zuF6}p8mf0YA=V*UKVzB@tsk?0$Q@oX`@>HkUcX5+&cVCzJ#&&*^&_d;_{^zo60`B( zAGMMTje^813=5+LFmn58Wca2k!8pA7ppwQ6JqRaIRt14%1 z%C=}f6>rQ`g96j$vb_@Mc$Ze%a;>1c%Gcbi&$-rX`JFdkl+qJO?J;f2*)>imqXu~r zn|43Y6QS>Rqk~dCIlhP9)YDXeae!VoB(J&HC1GZHsogSSVSCAlx?Cw1^#c&c+NN#d zC;c^NMgmE5lE0B`2<8C~59l*;k$Jc-fmwKUG-gApo-Cs3uT^T|zuY_6J`uDo|EqWM zzL@{j>GodYzxGk;;=kNGP=Q~mbN!9SzGlvZe9o4{0TtD9fQ+z3+Mwp7N$+-R5CPrH zU#HIDluv5}*cqRq{s-i9Ld6sr{~06>Jg)q&0X7N;Fs2R)*c`%er5b2qy(SPx%mWR) z5=cXLy3-8&kAymex=pZ{oOxj0ss`(zR2^Q?AuWT#Z!K)^he z%-1A1T?pnAj|o{R^uQ)>a{&v9yt;Rp_}d(S4@e49^d&uw%>&Iu?2>7|eOv_<*w>)Ua)zIGgQ{LRzSXi&8=-Q!%)o<7*KFy|VFa#jYCt0~t2 zSvobJHGUK!Oeqsto91qD3!H+0FyS=v8JbwJ(OSW}D35eib5pQ8pe6QYl%&g;>jCyj zVA*V*hV2fS^Es)d=db2O`nkh$lcV@J30L`M^QRYqJpGP&|F9wk1dMywjU9|336nQRWSO9Xql4YQi96`PS&LY|g8xbGt+3;PT$ z{$4kC`7O4s99yg;**Z=S@fLxAP{-9z?PN)Ld~ESWl&WVU%IGMHaQkU=nMr5M4bT$! zZRVW|VoQ1>o!pGGQ*F4He3+KJ7jny2>*YTlOn1UgAEzj zM?Y6jy3{}S%cgx_Ek3IZfX4baGxl}Njink>+?nR zBiYs6;y^`d<;>|!uy1mc4P%~x{y@sl@&^)}%)@^RYNMl01Tg~m_-KOf=k}1QAJIyW zMn9gOp#>$sEnvfb*b0&<7X$ODIu!$?K?$5%GRiO;fXjP1!1s-a2TwHw|j0RU{ zgW>h{o^yts$OA<%}fZt z-)?)%#@>ttqW|!Ku;XOA5Vp`pDH;jqC!^DgtBb2odcNzBY;g&RFR7B)Hwttw3zF{- zR2)nuNjnXBPlI(<-f*(YVk}Reu?k*5Nxbq#R`rU2k?kkdD>RAN9%ucPh)x(550Dvz zHCqicDeDG$jZDjCpqgc3HiMO9YOa}~*$jFngEK^uqyw16So@vKrHnf<8B6)9r45U3 z7I;yII-kE?Iv&x*i0=k*AwgS%Ip;py71B8Ox`YqtG7bPW)0m@7;OEm_C>6zzYlF5+ zk*Vc))D=OdG012^nEZQ~f!aq#OoY)Y2MAWE6*CZwV^>9JWx@g5VKQIP(l3W^cC^kP zD5W?|-d3tb6tAH~B_<@{W=YeyLxqZ2wfTBGl8dTyC*U&bb0_dts?eQaD5}x;Y7z9a zu~-tDaT@ye!}NFbvypMScqJA-6i}p;VFbQ#_8lGV?-{E2M0t z-rqgW%sLR&4f|LYRL!a&t-Md^@uWHg-F_f*n5+niC81{yT%VXD+H|K~cTIyN#$U() z+BN1J`0Ca7i3T{2>hCqeg}&``hzqDSV~o%g6K}y`GUE`;nCCnJ-Xe-R6KiYi`!};* zH@J1OXlN-<#m(?~G|8DoPqUIdkBOJDck=5;mL9$5V72dHE$&n~530@1Iy4pp^p zGUn(;&Ver~ynhECfh192cRsdin>@V#;hgY33KM|CLg;WB+fR|37~%MR)|bc0xei`G4oQQ}F-yx+gF9zwV{f#s5nI$Pa%` z+ia4^Z<=N{2&{4N)b4JOF5R{AT;Z%yP@9>HSwn`+7D_6`~9###Ak;XBbMKYrU&#k9k%F~pZ^?#%FzF2D3|D*14uNeQ=J$lLi z+e@ii|4S?W#<_ftkD8$WJuL;A2sZ19*L1nxOjpt*P+j&9s33MhrE9L~HB}-9hb4ip z+AN0T68TD0n#DtlDO%#NZ9@<=!&sSjVZ@0S#;VMV%7lt%7KwFpGd8^>V9f~rbqs@w z9FMJ8^HBYRFI#1NWP&sUY{Nl6U#&~ks8#F9O7 z`fo6giW6uj+*^_1TAx5f2igxSY!C;MV7s+3ov9nt;jta5fg?roK zn=pOO;=A8{&IGfW*GH4HVKGc<^+oAJ&g?KeB+Z%pq0>n&vy8FMgz(M$Ehd{Hji8i} z{*e4ScfpeSE`!cJ-1HPm_d_n)E4#FJbLFnpDnyg+X17>a z!G2}8;b85;h#Hr#0aI5TMaIAiDcCw`SnYiAx!Wo#3ch z|2xNr$A$gh@!`?S`oEX*3JtLkz-fR4Lu$~`*BMZByC9xJsEd4T-(eR5tMv*^W+aGz z%q>8{3_K6H9=lb#fl&7y;^1LHmJsUi%yUeg)+AxxApdaTd&X;%n4fr=;C}3 yw4_e|6>QP6hy%E-s&n>h%UT9DbD;gAyDc zVQyr3R8em|NM&qo0PKBjbKADEa6ju;;8Aks#F>zm<=AaClMk+~yg#md#2Oz zz~qWT4FW6x%26HP-+l)KNs$yOQZJU`rqkvivK&^gX8ZGjt&kFI^Q33y4~+O`b+0K)Y%UDudk;;!EgS^t*N)w>J1rNaBS@g4TPKK;TY<$4{R^m%94#nl z%mwN*H{gURG$76$h!(>>^iYX#wbU$tUIL~y%_DnmZv5Z7%GSREKcHBF+`n-+8xwCs zT~M?BkB^TI3hV#-qfYl_{qN#>c(9NQW5NKnOJu<|0?yEZ4WEB$1@8D#41Cid$f^_9v|cGGQf_3VGn@61Z@?dhXJu|6i}zQXrWp4d4u-gqB`d>rrRsY5`Q)zih|D#W{>;>dgepf5U9ajG=Zs;L}hl?i+jF-kU>NGT9|P(Kn>{ zf3UAbm=WdzQ>Z%!meSX&p zswzfj*qq_RzM6ozClaDQKK}YMiz%ZJ1HA=w z(CPfcaMq+}63(`g*oO-vGd@<%5frkpoNi-uq5rT_o#wceVNQFG1 zHB114a5q^!0e592Q?c9yoYNSO0^t)Dt*B4B=OF5M4Gw3n?4g6hPAf?rFog-rWZLST z*J5yx3308tl5GG$x6SrOi1^rR+&yYs(ti+AI^dKzvy~vtyJh3~^YJBD0|7F!JB4Vz zE+09MB77e+H&1JPvzNK08K^04kUdzH1S~w6$FJ{Xk72n=9JyT9YA3kdnQXnb&WLs1fZY3HSf?!oW8#S^g;VpW6+;7q}*5|z@Z58%5 zQr!khu$s;;Dm~4w{kQ1AMrUA5CZNyU1&U1kj7GBn&;|F1gWm2(QAf7xYhSlDO{12* z52!t*)L$?Z^%*yaU-8KpbMvOQRG7J#at1jHD`;GrQ7D2gcFr)lbe!2#?`kv*IH#+l zTNMBOx2S7JlZdVZaLAZA=p)6REStdRnHIe$Eh}kmC0=QX#chr(`F5sJ-@1{3V*PC0)ox2oq;dX?7EN~Rfmk>a?z&b?a6$snt6}kq6Fhk%6YKBZR zEv+t*QM7)+hbPHx=?EO?px>koW(W&Fj4LF=Krd%-(d`V4qPajbQ1-IvOlbTA2Np1g zyHM0c?cXG4i-R)BWot}madou!&9fS}A$ge&C*?jsXcU1Hna)0 zOtfm5l!=ujUh4Sr3JtK72)izc^~yLhCR2hVHBv&5X)q>0T|^j~8p>zbr}~Vs3yy(j#u>DEb0#RB$b4Nb8p)dL4Jv6GA-In_Rx==PPekFH33Z3q!z+q zDTH`Hb-jP=tT6lVRSA5W1tE*`lLRcQ0V&DARWf{Oyt@TWp;_N6uq`O(b($OOrBoGD z4<$0c9)hyk@G!ajWKd)n&zYJTbkC;~E z>(O?sm*L6PsDE*D-5-8AKkZjCr&mjV(N)R-@BjDUYB=f-Z>~lcgPZ>4+2HE@a#W6H zx$~3^i7TM}uaVa$(#;Q*;a`T7(eUK7f1OfPF;uQ-uw*>L{`J+*!_)rF$!Ih@|LNyZ z|N7?g>a3c3tBY2loMW*;M)qU?j?K6Wmy01~4b|IS533MezF?XVT|Q_3)S+X#_@hrB zUcX5d&fdG|Id{`X^)qSu1l+CdlCW{_pS6;Aee!v5Q6+fIgg+_LHXmR~-lG!z&B^f7 zHc2=U-U{z;YTT~;tB{6n7Mrf*R#j!8=Uj{9v?Sm&9SS`2b$=tzN%Xf6$}J(fDoNgK z$hqE4$G(|Gn!hcCSxR?*j@1VQP%p|Bz`OQkV?2 z#O=ZxFslp-I0B-|v>IqpyCx8aoI(k_1(5Bs_g~Gx|3sKenAZf0B{&7^)^u0}r5W&& z0hKZ+`c}y$&kyE!dmm8S^8-uHqym2~nXidF2^9|rkei;WMpBNRpdXEtzB`89XrPsJ3VY6*&aqisUXsE^-DW7 zDUXjW5yo3}EyOq*#zlrd4=*y?S#uLK1#z2s=7HLf-dHC$BRi|WwG@Z9zk#FT0hDUh z>bXBj=Df)o)P_9Hl`e9;0iUCk>L1eG>zJEJ zM=Tai<~%RM20`kLl*nh^`-nxjRP^%+hw_il9X9=KuA2CN0yF?2Z52G-`A^;Vi}C-_ z;n7R{zl-bPVgGIGl>0tsvAfoKyB{yLBrnj^TQdw4y6D`LiZLJ>+Sid5O^d@%(@a@Ak(yjcD(iP$Q6JNNarNo^6n(rJqU+Jg<=M%_)nz}b`u==%)=y}I zAlPW-LpI-%LpnGa_Af{1&H3fW{;+>}+8 z?EG|eesvjDi}i;$KMyZj^P;K9enc^ZHX(#9En9AMjEe?Uvb#%*OtkT4J9y^c7IDS( z>O^A-nIbe9VR3g;nukV@kMABHVxMNBRM6Ysr`*AG!lmjxJRt14*&#$dN~~-%TMKeFGI4yAw+?uN@HKEUVl-Lf}n2-dNTXu=au{Os-vYR94+%07njeqslz~N z+4Wjp^^)3`%@@@xG>X}tY`a__I%QZgL6(eKwi<3_t()i-wz@V0)od$lGg#?X*%i0Q zHiMqoLK`7TcQshWEdyIwOPO?HI+yxUtF*koSq(`Y>iib^h1VlxweH(NT-YODf;pEy z+!oSg`}+buVAG_M)tZ-2Whz*o?m)S?4!*Sbe4)g4v9NYYwQK@1HVC8t3^GuAsGShe zZY`ENR;U$ADlV{Rip?^Gjr-AdN7*Vjhi|rI=l6_K9HdVx)gnr4C@~u>(#pBCYcilh z!>rnTy&1_x)wvaL8TGjpcuQ62RxlLRXmYv>`lTICiNghpa{JNw1L&9b9L$ZGp&kx` z_B+(JEbX`GPtorX*d23It4RGRna6Fpd6CN%QMOd?Zy#sI4#aJvF)j*fiB-^E-lzHN zN%InP^NGx1vSTE+hMsxh`otV5-L|#up_L#>@E3M6Z5wkAe6{s`q6yBU`g_fAVQjmc z;sWZDIYwxV2@M5|CIaMyQ}+q*7E#o>SQ}&Cy_@yA!Kt%FN6Ta?ZU$GwQO+%T8cXs# zE?&mp$=i=CJ>{8$<++2ExKoupsCGN+{Ip6x)U>|IxT9+&2fmn{3R}nsqg9TxtH%DXW$%kujs1UraMUf<{~a8@?El-zRcHSfEdJVi`5qrjivD+G z3Q8*2>?gMAI^LXH(zH-rj}Mq4PC=!YoRn;tjT{1|1nx_BF{Hm2P>HI?Q^Cjas$53FNdRy&*ECAIWtyXy46naa&lKy~{6@c6Kp|2h8t zW&g)cu3G(X1Yq&UKZ}0r-@#!`P8Z1^oUyf2GTs+zy$i?B!&hYmf+t7V2ZAf^aN0wo z(?PLJYWaEZ1HqlBJY>FS^2bi6n}kdW)`?Pql|RK~U8FIT1*AWwKc2O44pY@p^|3tV zPP^Bg?Xn*Nf1hoU9;#@JQk(`kAG`!2)mJn9AG%nKBI$QuR9A_14kN8Nd?Y%*c(LSqR-%N>CuwAQGW0clXs-*{SS=%^I z%b1sT))<)=2SI$Sb?)Z6Z=uS5kEZmr^Fs!6vKEfy@;Z~QiztZg_^*Nex1 znIn#3XJCmGZ}$YTGSNO>OcXw0wcE>8yvppqJpt+yk8uH8!BJ!XJ4c5{h4}C2@coPZ z-^uj~4X{!mm_&*rGwJAl0u0@Tgt~-zD8SAg_CVULSLV0)Cx^dJ_m3lds?!gZBUSY;?uw3~?6q)Fhz*1fqn2-LlWF zZ>|*=(0Ya9(~7>FUL%(X*|I&N_M^Xs{FZ(DhuDw)CO0PDzW$SZl~cAq2Xu>_yD&gw zLP56P+VVbVy|r)gUF)r_eEpdV;f(7BBMCWIH+0r`wH*i$TCNAK8(_HZ1 jTJ}^r+=cxY?e%iKTrbz3`TE}g00960W7Ms%0G Date: Sun, 11 Jan 2026 21:55:09 -0800 Subject: [PATCH 06/11] fixed format --- config/charts/inferencepool/values.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index d21801a7fe..bce5c8e798 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -12,11 +12,11 @@ inferenceExtension: extraContainerPorts: [] # Define additional service ports extraServicePorts: [] -# extraServicePorts: -# - name: http -# port: 8081 -# protocol: TCP -# targetPort: 8081 + # extraServicePorts: + # - name: http + # port: 8081 + # protocol: TCP + # targetPort: 8081 # This is the plugins configuration file. # pluginsCustomConfig: @@ -183,7 +183,7 @@ provider: trafficPolicy: {} # connectionPool: # http: - # maxRequestsPerConnection: 256000 + # maxRequestsPerConnection: 256000 inferenceGateway: name: inference-gateway @@ -197,4 +197,4 @@ istio: trafficPolicy: {} # connectionPool: # http: - # maxRequestsPerConnection: 256000 + # maxRequestsPerConnection: 256000 \ No newline at end of file From ec37461d0ab18cf058551a09ce12efe27bdab018 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 22:00:03 -0800 Subject: [PATCH 07/11] fixed format --- config/charts/inferencepool/values.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index bce5c8e798..d21801a7fe 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -12,11 +12,11 @@ inferenceExtension: extraContainerPorts: [] # Define additional service ports extraServicePorts: [] - # extraServicePorts: - # - name: http - # port: 8081 - # protocol: TCP - # targetPort: 8081 +# extraServicePorts: +# - name: http +# port: 8081 +# protocol: TCP +# targetPort: 8081 # This is the plugins configuration file. # pluginsCustomConfig: @@ -183,7 +183,7 @@ provider: trafficPolicy: {} # connectionPool: # http: - # maxRequestsPerConnection: 256000 + # maxRequestsPerConnection: 256000 inferenceGateway: name: inference-gateway @@ -197,4 +197,4 @@ istio: trafficPolicy: {} # connectionPool: # http: - # maxRequestsPerConnection: 256000 \ No newline at end of file + # maxRequestsPerConnection: 256000 From 5f59002460bd2f87c6f9df88d0b1e99740acb7e5 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Sun, 11 Jan 2026 22:45:22 -0800 Subject: [PATCH 08/11] fixed ci cd --- hack/verify-helm.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hack/verify-helm.sh b/hack/verify-helm.sh index 0388b6e24d..e9bb8dd09c 100755 --- a/hack/verify-helm.sh +++ b/hack/verify-helm.sh @@ -27,6 +27,13 @@ test_cases_inference_pool["multiple-replicas"]="--set inferencePool.replicas=3 - # source (such as in the verify-all script) make helm-install +echo "Building dependencies for inferencePool chart..." +${SCRIPT_ROOT}/bin/helm dependency build ${SCRIPT_ROOT}/config/charts/inferencepool +if [ $? -ne 0 ]; then + echo "Helm dependency build failed." + exit 1 +fi + # Running tests cases echo "Running helm template command for inferencePool chart..." # Loop through the keys of the associative array From ca777959e182bb456073baccc4333d8114833870 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 12 Jan 2026 09:55:19 -0800 Subject: [PATCH 09/11] fixed comments --- .gitignore | 6 +++ config/charts/epp-standalone/Chart.lock | 6 --- .../inference-extension/templates/_rbac.yaml | 50 +++++++++---------- config/charts/inferencepool/Chart.lock | 6 --- .../charts/inferencepool/templates/rbac.yaml | 18 +++---- 5 files changed, 40 insertions(+), 46 deletions(-) delete mode 100644 config/charts/epp-standalone/Chart.lock delete mode 100644 config/charts/inferencepool/Chart.lock diff --git a/.gitignore b/.gitignore index 18a475b5e7..7f36fdaa53 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,9 @@ site # MacOS generated files **/.DS_Store + +# Ignore all Chart.lock files anywhere under config/charts +config/charts/**/Chart.lock + +# Ignore all .tgz files anywhere under config/charts +config/charts/**/*.tgz \ No newline at end of file diff --git a/config/charts/epp-standalone/Chart.lock b/config/charts/epp-standalone/Chart.lock deleted file mode 100644 index b14ce74b2d..0000000000 --- a/config/charts/epp-standalone/Chart.lock +++ /dev/null @@ -1,6 +0,0 @@ -dependencies: -- name: inference-extension - repository: file://../inference-extension - version: 0.0.0 -digest: sha256:4a8aeb7ac929d73eab47276d786fd8e49c9c83856d70b612ce020608b83f1160 -generated: "2026-01-11T19:49:00.634395-08:00" diff --git a/config/charts/inference-extension/templates/_rbac.yaml b/config/charts/inference-extension/templates/_rbac.yaml index e7ce36f5a1..0b77026048 100644 --- a/config/charts/inference-extension/templates/_rbac.yaml +++ b/config/charts/inference-extension/templates/_rbac.yaml @@ -7,31 +7,31 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: - - apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create - - apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create - - nonResourceURLs: - - "/metrics" - verbs: - - get +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- nonResourceURLs: + - "/metrics" + verbs: + - get --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: {{ include "gateway-api-inference-extension.cluster-rbac-name" . }} subjects: - - kind: ServiceAccount - name: {{ include "gateway-api-inference-extension.name" . }} - namespace: {{ .Release.Namespace }} +- kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole @@ -46,9 +46,9 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: - - apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -56,9 +56,9 @@ metadata: name: {{ printf "%s-sa" (include "gateway-api-inference-extension.name" .) }} namespace: {{ .Release.Namespace }} subjects: - - kind: ServiceAccount - name: {{ include "gateway-api-inference-extension.name" . }} - namespace: {{ .Release.Namespace }} +- kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role diff --git a/config/charts/inferencepool/Chart.lock b/config/charts/inferencepool/Chart.lock deleted file mode 100644 index 1fd39d5865..0000000000 --- a/config/charts/inferencepool/Chart.lock +++ /dev/null @@ -1,6 +0,0 @@ -dependencies: -- name: inference-extension - repository: file://../inference-extension - version: 0.0.0 -digest: sha256:4a8aeb7ac929d73eab47276d786fd8e49c9c83856d70b612ce020608b83f1160 -generated: "2026-01-11T18:29:35.481137-08:00" diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index 212725f1c7..c2fed9ab23 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -6,12 +6,12 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: - - apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferenceobjectives", "inferencemodelrewrites"] - verbs: ["get", "watch", "list"] - - apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferenceobjectives", "inferencemodelrewrites"] + verbs: ["get", "watch", "list"] +- apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"] + resources: ["inferencepools"] + verbs: ["get", "watch", "list"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -19,9 +19,9 @@ metadata: name: {{ printf "%s-non-sa" (include "gateway-api-inference-extension.name" .) }} namespace: {{ .Release.Namespace }} subjects: - - kind: ServiceAccount - name: {{ include "gateway-api-inference-extension.name" . }} - namespace: {{ .Release.Namespace }} +- kind: ServiceAccount + name: {{ include "gateway-api-inference-extension.name" . }} + namespace: {{ .Release.Namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role From 3827f7b66cf92a3fb7254d8e6f1968209d87509e Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 12 Jan 2026 10:15:13 -0800 Subject: [PATCH 10/11] fixed format Signed-off-by: Xiyue Yu --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7f36fdaa53..7e9674135f 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,4 @@ site config/charts/**/Chart.lock # Ignore all .tgz files anywhere under config/charts -config/charts/**/*.tgz \ No newline at end of file +config/charts/**/*.tgz From e115a9838c9cbd21a4fec3c1fa3b977f42fc447f Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 12 Jan 2026 12:45:31 -0800 Subject: [PATCH 11/11] fixed httproute timeouts Signed-off-by: Xiyue Yu --- config/charts/inferencepool/templates/httproute.yaml | 4 ++++ site-src/_includes/epp-latest.md | 4 ++++ site-src/_includes/epp.md | 4 ++++ site-src/guides/getting-started-latest.md | 2 +- site-src/guides/serve-multiple-genai-models.md | 4 ++++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/config/charts/inferencepool/templates/httproute.yaml b/config/charts/inferencepool/templates/httproute.yaml index e63d555d38..c448c7a430 100644 --- a/config/charts/inferencepool/templates/httproute.yaml +++ b/config/charts/inferencepool/templates/httproute.yaml @@ -24,4 +24,8 @@ spec: name: X-Gateway-Base-Model-Name value: {{ .Values.experimentalHttpRoute.baseModel }} {{- end }} + {{- if ne (lower .Values.provider.name) "gke" }} + timeouts: + request: 300s + {{- end }} {{- end }} diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md index 0867537549..8f10292c5c 100644 --- a/site-src/_includes/epp-latest.md +++ b/site-src/_includes/epp-latest.md @@ -3,6 +3,7 @@ ```bash export GATEWAY_PROVIDER=gke helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -14,6 +15,7 @@ ```bash export GATEWAY_PROVIDER=istio helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -25,6 +27,7 @@ ```bash export GATEWAY_PROVIDER=none helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -36,6 +39,7 @@ ```bash export GATEWAY_PROVIDER=none helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md index df0a7b6a89..61ffb9be8d 100644 --- a/site-src/_includes/epp.md +++ b/site-src/_includes/epp.md @@ -3,6 +3,7 @@ ```bash export GATEWAY_PROVIDER=gke helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --set experimentalHttpRoute.enabled=true \ @@ -15,6 +16,7 @@ ```bash export GATEWAY_PROVIDER=istio helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --set experimentalHttpRoute.enabled=true \ @@ -27,6 +29,7 @@ ```bash export GATEWAY_PROVIDER=none helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --set experimentalHttpRoute.enabled=true \ @@ -39,6 +42,7 @@ ```bash export GATEWAY_PROVIDER=none helm install vllm-llama3-8b-instruct \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ --set experimentalHttpRoute.enabled=true \ diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index 10b0aeb5b8..94b6625e2e 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -106,7 +106,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens 1. Install NGINX Gateway Fabric with the Inference Extension enabled by setting the `nginxGateway.gwAPIInferenceExtension.enable=true` Helm value ```bash - helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --set nginxGateway.gwAPIInferenceExtension.enable=true + helm install ngf oci://ghcr.io/nginx/charts/nginx-gateway-fabric --create-namespace -n nginx-gateway --dependency-update --set nginxGateway.gwAPIInferenceExtension.enable=true ``` This enables NGINX Gateway Fabric to watch and manage Inference Extension resources such as InferencePool and InferenceObjective. diff --git a/site-src/guides/serve-multiple-genai-models.md b/site-src/guides/serve-multiple-genai-models.md index f1b8185d8e..78b5026f8d 100644 --- a/site-src/guides/serve-multiple-genai-models.md +++ b/site-src/guides/serve-multiple-genai-models.md @@ -160,6 +160,7 @@ Select a tab to follow the provider-specific instructions. ```bash export GATEWAY_PROVIDER=gke helm install vllm-deepseek-r1 \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -171,6 +172,7 @@ Select a tab to follow the provider-specific instructions. ```bash export GATEWAY_PROVIDER=istio helm install vllm-deepseek-r1 \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -181,6 +183,7 @@ Select a tab to follow the provider-specific instructions. ```bash export GATEWAY_PROVIDER=none helm install vllm-deepseek-r1 \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \ @@ -192,6 +195,7 @@ Select a tab to follow the provider-specific instructions. ```bash export GATEWAY_PROVIDER=none helm install vllm-deepseek-r1 \ + --dependency-update \ --set inferencePool.modelServers.matchLabels.app=vllm-deepseek-r1 \ --set provider.name=$GATEWAY_PROVIDER \ --version $IGW_CHART_VERSION \