Skip to content

Commit

Permalink
feat(workers): Enable kube-state-metrics with managed prometheus (goo…
Browse files Browse the repository at this point in the history
…gle#2886)

This is basically the configuration
https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/kube_state_metrics
here with:

- Split into a file per resource
- Updated version to kube-state-metrics 2.14.0
- Changed scrape interval from 30s to 5s
- Updated to only look at cronjob and pod, we can expand this later if
we wish

---------

Co-authored-by: Andrew Pollock <[email protected]>
  • Loading branch information
rjerrems and andrewpollock authored Nov 25, 2024
1 parent baec3a9 commit 60b4ec8
Show file tree
Hide file tree
Showing 9 changed files with 329 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: monitoring.googleapis.com/v1
kind: ClusterPodMonitoring
metadata:
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: google-cloud-managed-prometheus
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
endpoints:
- port: metrics
# Customized from 30s to 5s.
interval: 5s
metricRelabeling:
- action: keep
# Curated subset of metrics to reduce costs, currently just targeting cronjob and pod resources.
regex: kube_(cronjob|pod)(_.+)?
sourceLabels: [__name__]
targetLabels:
metadata: [] # explicitly empty so the metric labels are respected
130 changes: 130 additions & 0 deletions deployment/clouddeploy/gke-workers/base/ksm_cluster_role.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: gmp-public:kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
rules:
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
- persistentvolumes
- namespaces
- endpoints
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
- daemonsets
- deployments
- replicasets
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- get
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
- ingresses
verbs:
- list
- watch
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- list
- watch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: gmp-public:kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gmp-public:kube-state-metrics
subjects:
- kind: ServiceAccount
namespace: gmp-public
name: kube-state-metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: kube-state-metrics
namespace: gmp-public
spec:
maxReplicas: 10
minReplicas: 1
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name: kube-state-metrics
metrics:
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 60
behavior:
scaleDown:
policies:
- type: Pods
value: 1
# Under-utilization needs to persist for `periodSeconds` before any action can be taken.
# Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/.
periodSeconds: 1800
# Current supported max from https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/horizontal-pod-autoscaler-v2beta2/.
stabilizationWindowSeconds: 3600
15 changes: 15 additions & 0 deletions deployment/clouddeploy/gke-workers/base/ksm_pod_monitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: monitoring.googleapis.com/v1
kind: PodMonitoring
metadata:
namespace: gmp-public
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: google-cloud-managed-prometheus
spec:
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
endpoints:
- port: metrics-self
interval: 30s
19 changes: 19 additions & 0 deletions deployment/clouddeploy/gke-workers/base/ksm_service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
namespace: gmp-public
name: kube-state-metrics
spec:
clusterIP: None
ports:
- name: metrics
port: 8080
targetPort: metrics
- name: metrics-self
port: 8081
targetPort: metrics-self
selector:
app.kubernetes.io/name: kube-state-metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: ServiceAccount
metadata:
namespace: gmp-public
name: kube-state-metrics
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
83 changes: 83 additions & 0 deletions deployment/clouddeploy/gke-workers/base/ksm_stateful_set.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
namespace: gmp-public
name: kube-state-metrics
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: kube-state-metrics
serviceName: kube-state-metrics
template:
metadata:
labels:
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/version: 2.14.0
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- arm64
- amd64
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: kube-state-metric
image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- --pod=$(POD_NAME)
- --pod-namespace=$(POD_NAMESPACE)
- --port=8080
- --telemetry-port=8081
ports:
- name: metrics
containerPort: 8080
- name: metrics-self
containerPort: 8081
resources:
requests:
cpu: 100m
memory: 190Mi
limits:
memory: 250Mi
securityContext:
allowPrivilegeEscalation: false
privileged: false
capabilities:
drop:
- all
runAsUser: 1000
runAsGroup: 1000
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 8081
initialDelaySeconds: 5
timeoutSeconds: 5
serviceAccountName: kube-state-metrics
8 changes: 8 additions & 0 deletions deployment/clouddeploy/gke-workers/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,11 @@ resources:
- nvd-mirror.yaml
- backup.yaml
- generate-sitemap.yaml
- ksm_cluster_pod_monitoring.yaml
- ksm_cluster_role_binding.yaml
- ksm_cluster_role.yaml
- ksm_horizontal_pod_autoscaler.yaml
- ksm_pod_monitoring.yaml
- ksm_service_account.yaml
- ksm_service.yaml
- ksm_stateful_set.yaml

0 comments on commit 60b4ec8

Please sign in to comment.