Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions config/charts/inferencepool/templates/epp-monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}}
{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}}
{{- $gmpNamespace := "gmp-system" -}}
{{- $isAutopilot := false -}}
{{- with .Values.provider.gke }}
{{- $isAutopilot = .autopilot | default false -}}
{{- end }}
{{- if $isAutopilot -}}
{{- $gmpNamespace = "gke-gmp-system" -}}
{{- end -}}
{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ $metricsReadSA }}
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ $metricsReadSecretName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
annotations:
kubernetes.io/service-account.name: {{ $metricsReadSA }}
type: kubernetes.io/service-account-token
---
apiVersion: monitoring.googleapis.com/v1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note this is only used in GKE (and therefore it was in gke.yaml)

Copy link
Contributor

@nirrozenbaum nirrozenbaum Nov 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1. for resources that are gke specific, it's preferred to have it in a separate gke.yaml file.
now we just need to change the conditional, so instead of checking gke.enabled, we might check for provider.name == gke

kind: PodMonitoring
metadata:
name: {{ .Release.Name }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
endpoints:
- port: metrics
scheme: http
interval: {{ .Values.inferenceExtension.monitoring.interval }}
path: /metrics
authorization:
type: Bearer
credentials:
secret:
name: {{ $metricsReadSecretName }}
key: token
selector:
matchLabels:
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ $metricsReadRoleName }}
rules:
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ $metricsReadRoleBindingName }}
subjects:
- kind: ServiceAccount
name: {{ $metricsReadSA }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ $metricsReadRoleName }}
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ $secretReadRoleName }}
rules:
- resources:
- secrets
apiGroups: [""]
verbs: ["get", "list", "watch"]
resourceNames: [{{ $metricsReadSecretName | quote }}]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ $gmpCollectorRoleBindingName }}
namespace: {{ .Release.Namespace }}
roleRef:
name: {{ $secretReadRoleName }}
kind: Role
apiGroup: rbac.authorization.k8s.io
subjects:
- name: collector
namespace: {{ $gmpNamespace }}
kind: ServiceAccount
{{- end }}
103 changes: 0 additions & 103 deletions config/charts/inferencepool/templates/gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,107 +40,4 @@ spec:
logging:
enabled: true # log all requests by default
---
{{- if or .Values.inferenceExtension.monitoring.gke.enabled (and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled) }}
{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}}
{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}}
{{- $gmpNamespace := "gmp-system" -}}
{{- $isAutopilot := false -}}
{{- with .Values.provider.gke }}
{{- $isAutopilot = .autopilot | default false -}}
{{- end }}
{{- if $isAutopilot -}}
{{- $gmpNamespace = "gke-gmp-system" -}}
{{- end -}}
{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ $metricsReadSA }}
namespace: {{ .Release.Namespace }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ $metricsReadSecretName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
annotations:
kubernetes.io/service-account.name: {{ $metricsReadSA }}
type: kubernetes.io/service-account-token
---
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
name: {{ .Release.Name }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
endpoints:
- port: metrics
scheme: http
interval: {{ .Values.inferenceExtension.monitoring.interval }}
path: /metrics
authorization:
type: Bearer
credentials:
secret:
name: {{ $metricsReadSecretName }}
key: token
selector:
matchLabels:
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ $metricsReadRoleName }}
rules:
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ $metricsReadRoleBindingName }}
subjects:
- kind: ServiceAccount
name: {{ $metricsReadSA }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ $metricsReadRoleName }}
apiGroup: rbac.authorization.k8s.io
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ $secretReadRoleName }}
rules:
- resources:
- secrets
apiGroups: [""]
verbs: ["get", "list", "watch"]
resourceNames: [{{ $metricsReadSecretName | quote }}]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ $gmpCollectorRoleBindingName }}
namespace: {{ .Release.Namespace }}
roleRef:
name: {{ $secretReadRoleName }}
kind: Role
apiGroup: rbac.authorization.k8s.io
subjects:
- name: collector
namespace: {{ $gmpNamespace }}
kind: ServiceAccount
{{- end }}
{{- end }}
4 changes: 0 additions & 4 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ inferenceExtension:
secretName: inference-gateway-sa-metrics-reader-secret
# additional labels for the ServiceMonitor
extraLabels: {}

# DEPRECATED: The 'gke' configuration will be removed in the next release.
gke:
enabled: false
tracing:
enabled: false
otelExporterEndpoint: "http://localhost:4317"
Expand Down