diff --git a/charts/karpenter_nodes/Chart.yaml b/charts/karpenter_nodes/Chart.yaml index 1d0304f..a43b4f7 100644 --- a/charts/karpenter_nodes/Chart.yaml +++ b/charts/karpenter_nodes/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: karpenter_nodes -version: 1.0.3 +version: 1.1.0 description: A Helm chart for generating NodeClasses and NodePools for Karpenter maintainers: - name: nadavbuc diff --git a/charts/karpenter_nodes/examples/argocd_example.yaml b/charts/karpenter_nodes/examples/argocd_example.yaml index 84b2a26..9bad517 100644 --- a/charts/karpenter_nodes/examples/argocd_example.yaml +++ b/charts/karpenter_nodes/examples/argocd_example.yaml @@ -9,7 +9,7 @@ spec: sources: - repoURL: 'https://opensource.fiverr.com/public_charts/' chart: karpenter_nodes - targetRevision: 1.0.3 + targetRevision: 1.1.0 helm: valueFiles: - $values/karpenter_nodes/eks-dev/common.yaml diff --git a/charts/karpenter_nodes/examples/output/output.yaml b/charts/karpenter_nodes/examples/output/output.yaml index b4d2e88..7ab739a 100644 --- a/charts/karpenter_nodes/examples/output/output.yaml +++ b/charts/karpenter_nodes/examples/output/output.yaml @@ -9,7 +9,7 @@ globalDefault: false description: "Used for dummy pods to generate headroom in karpenter" --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-canary-amd64" @@ -52,6 +52,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-canary @@ -115,7 +142,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-cilium-managed-amd64" @@ -158,6 +185,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-cilium-managed @@ -221,7 +275,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-default-amd64" @@ -264,6 +318,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-default @@ -327,7 +408,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-gpu-amd64" @@ -370,6 +451,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-gpu @@ -433,7 +541,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-ingress-multiarch" @@ -476,6 +584,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-ingress @@ -532,7 +667,7 @@ spec: echo "$(jq '.registryBurst=200' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-jobs-amd64" @@ -575,6 +710,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-jobs @@ -638,7 +800,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-monitoring-multiarch" @@ -682,6 +844,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-monitoring @@ -745,7 +934,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodeclass.yaml -apiVersion: karpenter.k8s.aws/v1beta1 +apiVersion: karpenter.k8s.aws/v1 kind: EC2NodeClass metadata: name: "nodes-workers-amd64" @@ -788,6 +977,33 @@ spec: httpProtocolIPv6: disabled httpPutResponseHopLimit: 2 httpTokens: required + kubelet: + systemReserved: + cpu: 250m + memory: 200Mi + ephemeral-storage: 2Gi + kubeReserved: + cpu: 250m + memory: 1Gi + ephemeral-storage: 4Gi + evictionHard: + memory.available: 768Mi + nodefs.available: 8% + nodefs.inodesFree: 8% + evictionSoft: + memory.available: 1280Mi + nodefs.available: 10% + nodefs.inodesFree: 15% + imagefs.available: 10% + imagefs.inodesFree: 10% + pid.available: 10% + evictionSoftGracePeriod: + imagefs.available: 10m0s + imagefs.inodesFree: 10m0s + memory.available: 5m0s + nodefs.available: 10m0s + nodefs.inodesFree: 10m0s + pid.available: 2m0s userData: | CLUSTER_NAME=eks-dev INSTANCEGROUP=nodes-workers @@ -851,7 +1067,7 @@ spec: EOF --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-canary-amd64" @@ -934,40 +1150,13 @@ spec: - "3" - "4" - "5" - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-cilium-managed-amd64" @@ -1046,40 +1235,13 @@ spec: operator: NotIn values: - metal - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-default-amd64" @@ -1157,40 +1319,13 @@ spec: - "3" - "4" - "5" - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 2 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-default-od-amd64" @@ -1264,40 +1399,13 @@ spec: operator: In values: - "6" - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-gpu-amd64" @@ -1362,42 +1470,15 @@ spec: - g5.xlarge - g5.2xlarge - g5.4xlarge - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized limits: cpu: 128 weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-ingress-multiarch" @@ -1467,40 +1548,13 @@ spec: operator: NotIn values: - metal - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: Never - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-jobs-amd64" @@ -1570,33 +1624,6 @@ spec: operator: NotIn values: - metal - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: Never consolidationPolicy: WhenEmpty @@ -1604,7 +1631,7 @@ spec: weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-monitoring-multiarch" @@ -1669,40 +1696,13 @@ spec: operator: NotIn values: - metal - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: Never - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-workers-amd64" @@ -1784,40 +1784,13 @@ spec: - "3" - "4" - "5" - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 2 --- # Source: karpenter_nodes/templates/nodepool.yaml -apiVersion: karpenter.sh/v1beta1 +apiVersion: karpenter.sh/v1 kind: NodePool metadata: name: "nodes-workers-c-amd64" @@ -1898,34 +1871,7 @@ spec: - "3" - "4" - "5" - kubelet: - systemReserved: - cpu: 250m - memory: 200Mi - ephemeral-storage: 2Gi - kubeReserved: - cpu: 250m - memory: 1Gi - ephemeral-storage: 4Gi - evictionHard: - memory.available: 768Mi - nodefs.available: 8% - nodefs.inodesFree: 8% - evictionSoft: - memory.available: 1280Mi - nodefs.available: 10% - nodefs.inodesFree: 15% - imagefs.available: 10% - imagefs.inodesFree: 10% - pid.available: 10% - evictionSoftGracePeriod: - imagefs.available: 10m0s - imagefs.inodesFree: 10m0s - memory.available: 5m0s - nodefs.available: 10m0s - nodefs.inodesFree: 10m0s - pid.available: 2m0s disruption: expireAfter: 720h - consolidationPolicy: WhenUnderutilized + consolidationPolicy: WhenEmptyOrUnderutilized weight: 1 diff --git a/charts/karpenter_nodes/grafana/Karpenter-OverView.json b/charts/karpenter_nodes/grafana/Karpenter-OverView.json deleted file mode 100644 index 0e372b4..0000000 --- a/charts/karpenter_nodes/grafana/Karpenter-OverView.json +++ /dev/null @@ -1,1935 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": [], - "__requires": [ - { - "type": "panel", - "id": "bargauge", - "name": "Bar gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "8.4.4" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "id": null, - "iteration": 1712640887031, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "super-light-purple", - "mode": "fixed" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 40, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "vertical", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { - "titleSize": 40, - "valueSize": 40 - }, - "textMode": "value_and_name" - }, - "pluginVersion": "8.4.4", - "repeat": "CLUSTER", - "repeatDirection": "v", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": false, - "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=~\"$CLUSTER\"}) by (nodepool) - (sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=~\"$CLUSTER\"}) by (nodepool) offset $timediff)", - "instant": true, - "interval": "", - "legendFormat": "{{nodepool}}", - "refId": "A" - } - ], - "title": "$CLUSTER Total Interruptions - during $timediff", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 - }, - "id": 38, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{pod=~\"karpenter.*\"})) by (cluster, pod)", - "interval": "", - "legendFormat": "[{{cluster}}]{{pod}}", - "refId": "A" - } - ], - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 - }, - "id": 39, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(container_memory_usage_bytes{pod=~\"karpenter.*\"}) by (cluster, pod)", - "interval": "", - "legendFormat": "[{{cluster}}]{{pod}}", - "refId": "A" - } - ], - "title": "Memory Usage", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 22, - "panels": [], - "title": "Cluster Capacity", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 35, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "repeat": "CLUSTER", - "repeatDirection": "v", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "count(kube_node_labels{cluster=~\"$CLUSTER\",label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_nodepool, label_karpenter_sh_capacity_type)", - "interval": "", - "legendFormat": "[{{label_karpenter_sh_capacity_type}}]{{label_karpenter_sh_nodepool}} ", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "count(kube_node_labels{cluster=~\"$CLUSTER\",label_karpenter_sh_capacity_type!=\"\"}) by ( label_karpenter_sh_capacity_type)", - "hide": false, - "interval": "", - "legendFormat": "TOTAL [{{label_karpenter_sh_capacity_type}}]", - "refId": "B" - } - ], - "title": "Spot/OD by Provisioners - $CLUSTER", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 30, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage) by (cluster, nodepool, resource_type) / sum(karpenter_nodepool_limit) by (cluster, nodepool, resource_type)", - "interval": "", - "legendFormat": "[{{cluster}}] {{nodepool}} {{resource_type}}", - "range": true, - "refId": "A" - } - ], - "title": "NodeGroup Usage Out Of limit", - "type": "timeseries" - }, - { - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 13, - "title": "Provisioning", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "opm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max", - "sum" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(karpenter_nodeclaims_created[1m])) by (cluster, nodepool, reason)", - "interval": "", - "legendFormat": "[A][{{cluster}} {{nodepool}}]{{reason}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(increase(karpenter_nodeclaims_terminated[1m])) by (cluster, reason, nodepool)", - "hide": false, - "interval": "", - "legendFormat": "[R][{{cluster}} {{nodepool}}]{{reason}}", - "range": true, - "refId": "B" - } - ], - "title": "Created/Removed Nodes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "left", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 20, - "maxDataPoints": 9999999999, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Total", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(idelta(karpenter_nodeclaims_terminated{reason=\"interruption\"}[1m])) by (nodepool, cluster)", - "hide": false, - "interval": "", - "legendFormat": "[{{cluster}}] {{nodepool}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "op": "gt", - "value": 10, - "visible": true - } - ], - "title": "Karpenter Spot Interruptions", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "opm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean", - "sum" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(irate(karpenter_cloudprovider_duration_seconds_count{method!=\"GetInstanceTypes\"}[1m])) by (cluster, method) * 60", - "interval": "", - "legendFormat": "{{cluster}} {{method}}", - "range": true, - "refId": "A" - } - ], - "title": "Instance Provisioning Actions", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "opm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 - }, - "id": 9, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max", - "sum" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(karpenter_interruption_actions_performed[1m])) by (action_type,cluster)", - "interval": "", - "legendFormat": "[{{cluster}}]{{action_type}}", - "range": true, - "refId": "A" - } - ], - "title": "Interruption actions", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(karpenter_deprovisioning_actions_performed[5m])) by (action)", - "interval": "", - "legendFormat": "{{action}}", - "range": true, - "refId": "A" - } - ], - "title": "Deprovisioning", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 - }, - "id": 4, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "histogram_quantile($perc, sum(rate(karpenter_cloudprovider_duration_seconds_bucket{method!=\"GetInstanceTypes\"}[5m])) by (le, method))", - "interval": "", - "legendFormat": "{{method}}", - "range": true, - "refId": "A" - } - ], - "title": "AWS Requests", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "opm" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 56 - }, - "id": 8, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "mean", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(irate(karpenter_interruption_received_messages[1m])) by (message_type, cluster) * 60", - "hide": false, - "interval": "", - "legendFormat": "{{cluster}}_{{message_type}}", - "range": true, - "refId": "B" - } - ], - "title": "Interruption messages", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 64 - }, - "id": 24, - "panels": [], - "title": "Workers", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 65 - }, - "id": 26, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(controller_runtime_active_workers{service=\"karpenter\"}) by (cluster, controller) / sum(controller_runtime_max_concurrent_reconciles{service=\"karpenter\"}) by (cluster, controller)", - "legendFormat": "[{{cluster}}] {{controller}}", - "range": true, - "refId": "A" - } - ], - "title": "Workers Utilization", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 65 - }, - "id": 28, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum(rate(controller_runtime_reconcile_errors_total{service=\"karpenter\"}[5m])) by (cluster, controller)", - "legendFormat": "[{{cluster}}] {{controller}}", - "range": true, - "refId": "A" - } - ], - "title": "Reconcile errors rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 14, - "w": 12, - "x": 0, - "y": 73 - }, - "id": 50, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "histogram_quantile($perc, rate(controller_runtime_reconcile_time_seconds_bucket{controller=~\"$controller\",cluster=~\"$CLUSTER\"}[10m]))", - "hide": false, - "interval": "", - "legendFormat": "[$perc {{controller}}] {{cluster}}", - "range": true, - "refId": "Minimum" - } - ], - "title": "Controller Reconciliation Latency [$controller]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 14, - "w": 12, - "x": 12, - "y": 73 - }, - "id": 54, - "options": { - "displayMode": "gradient", - "minVizHeight": 10, - "minVizWidth": 0, - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(controller_runtime_reconcile_total{cluster=~\"$CLUSTER\",controller=~\"$controller\"}[10m])) by (controller,cluster)", - "interval": "", - "legendFormat": "[{{cluster}}]{{controller}}", - "range": true, - "refId": "A", - "target": "" - } - ], - "title": "Controller Reconciliation Rate", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 87 - }, - "id": 46, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "karpenter_nodes_termination_time_seconds{quantile=\"$quantile\"}", - "hide": false, - "interval": "", - "legendFormat": "[$quantile][{{cluster}}]{{nodepool}}", - "range": true, - "refId": "C" - } - ], - "title": "Node Termination Latency", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 35, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "0.9", - "value": "0.9" - }, - "hide": 0, - "includeAll": false, - "label": "Percentile", - "multi": false, - "name": "perc", - "options": [ - { - "selected": false, - "text": "0.5", - "value": "0.5" - }, - { - "selected": false, - "text": "0.8", - "value": "0.8" - }, - { - "selected": true, - "text": "0.9", - "value": "0.9" - }, - { - "selected": false, - "text": "1", - "value": "1" - } - ], - "query": "0.5, 0.8, 0.9,1", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" - }, - { - "current": { - "selected": false, - "text": "1d", - "value": "1d" - }, - "hide": 0, - "includeAll": false, - "multi": false, - "name": "timediff", - "options": [ - { - "selected": true, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "2d", - "value": "2d" - }, - { - "selected": false, - "text": "5d", - "value": "5d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - }, - { - "selected": false, - "text": "60d", - "value": "60d" - }, - { - "selected": false, - "text": "90d", - "value": "90d" - } - ], - "query": "1d,2d,5d,7d,14d,30d,60d,90d", - "queryValue": "", - "skipUrlSync": false, - "type": "custom" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(cluster)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "CLUSTER", - "options": [], - "query": { - "query": "label_values(cluster)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(karpenter_nodes_termination_time_seconds,quantile)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "quantile", - "options": [], - "query": { - "query": "label_values(karpenter_nodes_termination_time_seconds,quantile)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(controller_runtime_reconcile_time_seconds_count, controller)", - "hide": 0, - "includeAll": true, - "multi": true, - "name": "controller", - "options": [], - "query": { - "query": "label_values(controller_runtime_reconcile_time_seconds_count, controller)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Karpenter-for-export", - "uid": "ctAxtWaIk", - "version": 12, - "weekStart": "" -} \ No newline at end of file diff --git a/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json index 014fd68..7a8b926 100644 --- a/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json +++ b/charts/karpenter_nodes/grafana/Karpenter-Per-NodeGroup.json @@ -1,2499 +1,2530 @@ { - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__elements": [], - "__requires": [ - { - "type": "panel", - "id": "bargauge", - "name": "Bar gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "8.4.4" - }, - { - "type": "panel", - "id": "piechart", - "name": "Pie chart", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "stat", - "name": "Stat", - "version": "" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" - } - ], - "annotations": { - "list": [ + "__inputs": [ { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 1, - "id": null, - "iteration": 1713189648192, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "super-light-purple", - "mode": "fixed" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 32, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { - "titleSize": 40, - "valueSize": 40 - }, - "textMode": "value_and_name" + ], + "__elements": [], + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": false, - "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\",cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", - "instant": true, - "interval": "", - "legendFormat": "{{nodepool}}", - "refId": "A" - } - ], - "title": "Total Interruptions", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.4.4" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [] - }, - "overrides": [] + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" }, - "gridPos": { - "h": 7, - "w": 4, - "x": 4, - "y": 0 + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" }, - "id": 36, - "options": { - "displayLabels": [ - "percent", - "name" - ], - "legend": { - "displayMode": "hidden", - "placement": "bottom" - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" }, - "pluginVersion": "8.4.4", - "targets": [ + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" }, - "exemplar": false, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type)", - "instant": true, - "interval": "", - "legendFormat": "{{label_karpenter_sh_capacity_type}}", - "refId": "A" + "type": "dashboard" } - ], - "title": "LifeCycles", - "transparent": true, - "type": "piechart" + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "iteration": 1723708108672, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "super-light-purple", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 4, - "x": 8, - "y": 0 - }, - "id": 40, - "options": { - "displayLabels": [ - "percent", - "name" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 32, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 40, + "valueSize": 40 + }, + "textMode": "value_and_name" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "sum(karpenter_nodeclaims_terminated_total{reason=\"interruption\",cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "instant": true, + "interval": "", + "legendFormat": "{{nodepool}}", + "refId": "A" + } ], - "legend": { - "displayMode": "hidden", - "placement": "bottom" - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": false, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_topology_kubernetes_io_zone!=\"\"}) by (label_topology_kubernetes_io_zone)", - "instant": true, - "interval": "", - "legendFormat": "{{label_topology_kubernetes_io_zone}}", - "refId": "A" - } - ], - "title": "AZ ", - "transparent": true, - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "title": "Total Interruptions", + "transparent": true, + "type": "stat" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false } - ] + }, + "mappings": [] }, - "unit": "none" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 12, - "y": 0 - }, - "id": 39, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 36, + "options": { + "displayLabels": [ + "percent", + "name" ], - "fields": "", - "values": false + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "textMode": "auto" - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} / 1024 /1024 / 1024) / \nsum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) ", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Memory / CPU Ratio", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type)", + "instant": true, + "interval": "", + "legendFormat": "{{label_karpenter_sh_capacity_type}}", + "refId": "A" + } + ], + "title": "LifeCycles", + "transparent": true, + "type": "piechart" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "semi-dark-blue", - "value": null - }, - { - "color": "red", - "value": 1536 + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false } - ] + }, + "mappings": [] }, - "unit": "none" + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 15, - "y": 0 - }, - "id": 38, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 40, + "options": { + "displayLabels": [ + "percent", + "name" ], - "fields": "", - "values": false + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "textMode": "auto" - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total cores $NODEGROUP", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_topology_kubernetes_io_zone!=\"\"}) by (label_topology_kubernetes_io_zone)", + "instant": true, + "interval": "", + "legendFormat": "{{label_topology_kubernetes_io_zone}}", + "refId": "A" + } + ], + "title": "AZ ", + "transparent": true, + "type": "piechart" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-BlYlRd" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1536 - } - ] - }, - "unit": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 18, - "y": 0 + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 39, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} / 1024 /1024 / 1024) / \nsum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) ", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Memory / CPU Ratio", + "transparent": true, + "type": "stat" }, - "id": 37, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^Value$/", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "textMode": "auto" - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} /1024 /1024 /1024)", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Memory (Gb)", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "semi-dark-blue", + "value": null + }, + { + "color": "red", + "value": 1536 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 38, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total cores $NODEGROUP", + "transparent": true, + "type": "stat" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 21, - "y": 0 + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-BlYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1536 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 37, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\",nodepool=~\"$NODEPOOL\",resource_type=\"memory\"} /1024 /1024 /1024)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Memory (Gb)", + "transparent": true, + "type": "stat" }, - "id": 41, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "textMode": "auto" - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool) / sum(karpenter_nodepool_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool)", - "instant": false, - "interval": "", - "legendFormat": "{{resource_type}} {{nodepool}}", - "range": true, - "refId": "A" - } - ], - "title": "NodeGroup Usage Out Of limit", - "transparent": true, - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool) / sum(karpenter_nodepools_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type, nodepool)", + "instant": false, + "interval": "", + "legendFormat": "{{resource_type}} {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "NodeGroup Usage Out Of limit", + "transparent": true, + "type": "stat" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] }, - "mappings": [] + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 0, - "y": 7 - }, - "id": 44, - "options": { - "displayLabels": [ - "percent", - "name" - ], - "legend": { - "displayMode": "table", - "placement": "right", - "values": [ - "value" - ] + "gridPos": { + "h": 8, + "w": 7, + "x": 0, + "y": 7 }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "id": 44, + "options": { + "displayLabels": [ + "percent", + "name" ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": false, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", - "instant": true, - "interval": "", - "legendFormat": "{{label_node_kubernetes_io_instance_type}}", - "refId": "A" - } - ], - "title": "Instance Types", - "transparent": true, - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 500 - } + "legend": { + "displayMode": "table", + "placement": "right", + "values": [ + "value" ] }, - "unit": "none" + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 3, - "x": 7, - "y": 7 + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": false, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", + "instant": true, + "interval": "", + "legendFormat": "{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types", + "transparent": true, + "type": "piechart" }, - "id": 42, - "options": { - "displayMode": "gradient", - "orientation": "vertical", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "showUnfilled": true - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(karpenter_nodes_created{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "Created by {{provisioner}}", - "range": true, - "refId": "A" - } - ], - "title": "Total Created Nodes", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 500 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 7, + "y": 7 + }, + "id": 42, + "options": { + "displayMode": "gradient", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(karpenter_nodes_created_total{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Created by {{provisioner}}", + "range": true, + "refId": "A" + } + ], + "title": "Total Created Nodes", + "type": "bargauge" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] }, - "mappings": [] + "overrides": [] }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 10, - "y": 7 - }, - "id": 47, - "options": { - "displayLabels": [ - "name" - ], - "legend": { - "displayMode": "hidden", - "placement": "bottom" - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 7 + }, + "id": 47, + "options": { + "displayLabels": [ + "name" ], - "fields": "", - "values": false + "legend": { + "displayMode": "hidden", + "placement": "bottom" + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_disrupted_total{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\", reason=~\"drift.*\"}) by (type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{type}}", + "range": true, + "refId": "B" + } + ], + "title": "Drift types", + "type": "piechart" }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodeclaims_drifted{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{type}}", - "range": true, - "refId": "B" - } - ], - "title": "Drift types", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-GrYlRd" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 9, - "x": 15, - "y": 7 + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 7 + }, + "id": 43, + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_disrupted_total{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool, reason)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Removed by {{provisioner}} {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Total Removed Nodes", + "type": "bargauge" }, - "id": 43, - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "showUnfilled": true - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 52, + "interval": "1h", + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[1d])", + "instant": false, + "interval": "", + "legendFormat": "Day", + "refId": "A" }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodeclaims_terminated{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool, reason)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Removed by {{provisioner}} {{reason}}", - "range": true, - "refId": "B" - } - ], - "title": "Total Removed Nodes", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[7d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Week", + "refId": "B" }, - "unit": "currencyUSD" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 15 - }, - "id": 52, - "interval": "1h", - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[30d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Month", + "refId": "C" + } + ], + "title": "Spot Costs on $NODEGROUP", + "type": "bargauge" }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[1d])", - "instant": false, - "interval": "", - "legendFormat": "Day", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 53, + "interval": "1h", + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true + }, + "pluginVersion": "8.4.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[1d])", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "Day", + "refId": "A" }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[7d])", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Week", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[7d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Week", + "refId": "B" }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))[30d])", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Month", - "refId": "C" - } - ], - "title": "Spot Costs on $NODEGROUP", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[30d])", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Month", + "refId": "C" + } + ], + "title": "On-Demand Costs on $NODEGROUP", + "type": "bargauge" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "color": "red", - "value": 80 + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "currencyUSD" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 15 - }, - "id": 53, - "interval": "1h", - "options": { - "displayMode": "gradient", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true - }, - "pluginVersion": "8.4.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[1d])", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "Day", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[7d])", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Week", - "refId": "B" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 49, + "interval": "1h", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type)", + "interval": "", + "legendFormat": "{{instance_type}} / Hour", + "refId": "A" }, - "exemplar": true, - "expr": "sum_over_time(sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))[30d])", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "Month", - "refId": "C" - } - ], - "title": "On-Demand Costs on $NODEGROUP", - "type": "bargauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))", + "hide": false, + "interval": "", + "legendFormat": "Total Hourly Price", + "refId": "B" + } + ], + "title": "Spot Hourly Pricing for $NODEGROUP", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "color": "red", - "value": 80 + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "currencyUSD" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "currencyUSD" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 50, + "interval": "1h", + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 23 + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type)", + "interval": "", + "legendFormat": "{{instance_type}} Hourly Price", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_offering_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))", + "hide": false, + "interval": "", + "legendFormat": "Total Hourly Price", + "refId": "B" + } + ], + "title": "On-Demand Hourly Pricing for $NODEGROUP", + "type": "timeseries" }, - "id": 49, - "interval": "1h", - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "sum" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Total", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 22, + "panels": [], + "title": "NodeGroup Capacity", + "type": "row" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type)", - "interval": "", - "legendFormat": "{{instance_type}} / Hour", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"spot\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"spot\"}) by (instance_type))", - "hide": false, - "interval": "", - "legendFormat": "Total Hourly Price", - "refId": "B" - } - ], - "title": "Spot Hourly Pricing for $NODEGROUP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "currencyUSD" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 23 + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(increase(karpenter_nodeclaims_created_total{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\" }[1m])) by (reason)", + "interval": "", + "legendFormat": "[ADD] {{reason}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(increase(karpenter_nodeclaims_disrupted_total{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (reason)", + "hide": false, + "interval": "", + "legendFormat": "[REM] {{reason}}", + "range": true, + "refId": "B" + } + ], + "title": "Created/Removed Nodes $NODEGROUP", + "type": "timeseries" }, - "id": 50, - "interval": "1h", - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "sum" - ], - "displayMode": "table", - "placement": "right" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, - "exemplar": true, - "expr": "count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type)", - "interval": "", - "legendFormat": "{{instance_type}} Hourly Price", - "refId": "A" + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "Total Counter" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 20, + "maxDataPoints": 9999999999, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Total", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(karpenter_nodeclaims_disrupted_total{reason=\"spot_interrupted\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (nodepool)", + "hide": false, + "interval": "", + "legendFormat": "{{provisioner}}", + "range": true, + "refId": "B" }, - "exemplar": true, - "expr": "sum(count(label_replace(kube_node_labels_mixin{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\",label_karpenter_sh_capacity_type=\"on-demand\"}, \"instance_type\", \"$1\", \"label_node_kubernetes_io_instance_type\", \"(.+)\")) by (instance_type) * on (instance_type) max(karpenter_cloudprovider_instance_type_price_estimate{cluster=\"$CLUSTER\",capacity_type=\"on-demand\"}) by (instance_type))", - "hide": false, - "interval": "", - "legendFormat": "Total Hourly Price", - "refId": "B" - } - ], - "title": "On-Demand Hourly Pricing for $NODEGROUP", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 22, - "panels": [], - "title": "NodeGroup Capacity", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(karpenter_nodeclaims_disrupted_total{reason=\"spot_interrupted\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})", + "hide": false, + "interval": "", + "legendFormat": "Total Counter", + "refId": "C" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 10, + "visible": true + } + ], + "title": "Karpenter Spot Interruptions", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "color": "red", - "value": 80 + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max", - "sum" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 55, + "maxDataPoints": 9999999999, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "sum(increase(karpenter_nodeclaims_drifted{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})) by (type) > 0", + "interval": "", + "legendFormat": "{{type}}", + "refId": "A" + } + ], + "title": "Drift Tracker", + "type": "timeseries" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(increase(karpenter_nodeclaims_created{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\" }[1m])) by (reason)", - "interval": "", - "legendFormat": "[ADD] {{reason}}", - "range": true, - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "- sum(increase(karpenter_nodeclaims_terminated{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (reason)", - "hide": false, - "interval": "", - "legendFormat": "[REM] {{reason}}", - "range": true, - "refId": "B" - } - ], - "title": "Created/Removed Nodes $NODEGROUP", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "left", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "color": "red", - "value": 80 + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "unit": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [ + "targets": [ { - "matcher": { - "id": "byRegexp", - "options": "Total Counter" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{resource_type=\"cpu\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "interval": "", + "legendFormat": "{{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned Cores", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "id": "unit", - "value": "none" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 - }, - "id": 20, - "maxDataPoints": 9999999999, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Total", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(increase(karpenter_nodeclaims_terminated{reason=\"interruption\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}[1m])) by (nodepool)", - "hide": false, - "interval": "", - "legendFormat": "{{provisioner}}", - "range": true, - "refId": "B" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(karpenter_nodeclaims_terminated{reason=\"interruption\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})", - "hide": false, - "interval": "", - "legendFormat": "Total Counter", - "refId": "C" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "op": "gt", - "value": 10, - "visible": true - } - ], - "title": "Karpenter Spot Interruptions", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{resource_type=\"memory\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "interval": "", + "legendFormat": "{{cluster}} {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned Memory", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "color": "red", - "value": 80 + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 40 + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type, label_karpenter_sh_nodepool)", + "interval": "", + "legendFormat": "[{{label_karpenter_sh_capacity_type}}]{{label_karpenter_sh_nodepool }}", + "refId": "A" + } + ], + "title": "LifeCycle", + "type": "timeseries" }, - "id": 55, - "maxDataPoints": 9999999999, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "list", - "placement": "bottom" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(increase(karpenter_nodeclaims_drifted{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"})) by (type) > 0", - "interval": "", - "legendFormat": "{{type}}", - "refId": "A" - } - ], - "title": "Drift Tracker", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "color": "red", - "value": 80 + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 55 + }, + "id": 56, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 47 - }, - "id": 15, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{resource_type=\"cpu\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", - "interval": "", - "legendFormat": "{{nodepool}}", - "range": true, - "refId": "A" - } - ], - "title": "Current Provisioned Cores", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{resource_type=\"nvidia.com/gpu\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", + "interval": "", + "legendFormat": "{{cluster}} {{nodepool}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned GPU", + "type": "timeseries" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "color": "red", - "value": 80 + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "unit": "bytes" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 45, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 47 + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", + "interval": "", + "legendFormat": "{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types", + "type": "timeseries" }, - "id": 16, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "showLegend": true + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{resource_type=\"memory\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool)", - "interval": "", - "legendFormat": "{{cluster}} {{nodepool}}", - "range": true, - "refId": "A" - } - ], - "title": "Current Provisioned Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "color": "red", - "value": 80 + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 55 - }, - "id": 57, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type) / sum(karpenter_nodepools_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type)", + "interval": "", + "legendFormat": "{{resource_type}}", + "range": true, + "refId": "A" + } + ], + "title": "NodeGroup Usage Out Of limit", + "type": "timeseries" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(karpenter_nodes_total_pod_requests{nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) by (nodepool)", - "instant": false, - "interval": "", - "legendFormat": "[{{nodepool}}] Pods", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(karpenter_nodes_total_daemon_requests{nodepool=~\"$NODEPOOL\",resource_type=\"cpu\"}) by (nodepool)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "[{{nodepool}}] DaemonSets", - "refId": "C" - } - ], - "title": "CPU Requests for Pods and DaemonSets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "color": "red", - "value": 80 + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 55 - }, - "id": 59, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "right" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 71 + }, + "id": 57, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(karpenter_nodepools_usage{resource_type!~\"cpu|memory|ephemeral_storage|pods|vpc.amazonaws.com/pod_eni|hugepages.*\", cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (nodepool, resource_type)", + "interval": "", + "legendFormat": "{{nodepool}} - {{resource_type}}", + "range": true, + "refId": "A" + } + ], + "title": "Current Provisioned resources", + "type": "timeseries" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(karpenter_nodes_total_pod_requests{nodepool=~\"$NODEPOOL\",resource_type=\"memory\"}) by (nodepool)", - "instant": false, - "interval": "", - "legendFormat": "[{{nodepool}}] Pods", - "refId": "A" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "sum(karpenter_nodes_total_daemon_requests{nodepool=~\"$NODEPOOL\",resource_type=\"memory\"}) by (nodepool)", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "[{{nodepool}}] DaemonSets", - "refId": "C" - } - ], - "title": "Memory Requests for Pods and DaemonSets", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "color": "red", - "value": 80 + "thresholdsStyle": { + "mode": "off" } - ] + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 71 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 63 - }, - "id": 35, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "exemplar": true, + "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type,label_karpenter_sh_capacity_type, label_topology_kubernetes_io_zone)", + "interval": "", + "legendFormat": "[{{label_topology_kubernetes_io_zone}}][{{label_karpenter_sh_capacity_type}}]{{label_node_kubernetes_io_instance_type}}", + "refId": "A" + } + ], + "title": "Instance Types, lifecycle and zone", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "exemplar": true, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_karpenter_sh_capacity_type, label_karpenter_sh_nodepool)", - "interval": "", - "legendFormat": "[{{label_karpenter_sh_capacity_type}}]{{label_karpenter_sh_nodepool }}", - "refId": "A" - } - ], - "title": "LifeCycle", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "current": { + "selected": false, + "text": "0.9", + "value": "0.9" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "perc", + "options": [ + { + "selected": false, + "text": "0.5", + "value": "0.5" + }, + { + "selected": false, + "text": "0.8", + "value": "0.8" + }, + { + "selected": true, + "text": "0.9", + "value": "0.9" + }, + { + "selected": false, + "text": "0.95", + "value": "0.95" + }, + { + "selected": false, + "text": "0.99", + "value": "0.99" + }, + { + "selected": false, + "text": "1.0", + "value": "1.0" } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 63 - }, - "id": 30, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "query": "0.5, 0.8, 0.9, 0.95, 0.99, 1.0", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { + "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(karpenter_nodepool_usage{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type) / sum(karpenter_nodepool_limit{cluster=\"$CLUSTER\", nodepool=~\"$NODEPOOL\"}) by (resource_type)", - "interval": "", - "legendFormat": "{{resource_type}}", - "range": true, - "refId": "A" - } - ], - "title": "NodeGroup Usage Out Of limit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "definition": "label_values(kube_node_labels,cluster)", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "CLUSTER", + "options": [], + "query": { + "query": "label_values(kube_node_labels,cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 71 - }, - "id": 45, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { + "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "exemplar": true, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type)", - "interval": "", - "legendFormat": "{{label_node_kubernetes_io_instance_type}}", - "refId": "A" - } - ], - "title": "Instance Types", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", + "hide": 0, + "includeAll": false, + "label": "Node Group", + "multi": false, + "name": "NODEGROUP", + "options": [], + "query": { + "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 71 - }, - "id": 46, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "right", - "sortBy": "Max", - "sortDesc": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { + "current": {}, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "exemplar": true, - "expr": "count(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\", label_karpenter_sh_capacity_type!=\"\"}) by (label_node_kubernetes_io_instance_type,label_karpenter_sh_capacity_type, label_topology_kubernetes_io_zone)", - "interval": "", - "legendFormat": "[{{label_topology_kubernetes_io_zone}}][{{label_karpenter_sh_capacity_type}}]{{label_node_kubernetes_io_instance_type}}", - "refId": "A" + "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", + "hide": 0, + "includeAll": true, + "label": "NodePool", + "multi": true, + "name": "NODEPOOL", + "options": [], + "query": { + "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } - ], - "title": "Instance Types, lifecycle and zone", - "type": "timeseries" - } - ], - "refresh": false, - "schemaVersion": 35, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(kube_node_labels,cluster)", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "CLUSTER", - "options": [], - "query": { - "query": "label_values(kube_node_labels,cluster)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", - "hide": 0, - "includeAll": false, - "label": "Node Group", - "multi": false, - "name": "NODEGROUP", - "options": [], - "query": { - "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\"},label_nodegroup)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": {}, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "definition": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", - "hide": 0, - "includeAll": true, - "label": "NodePool", - "multi": true, - "name": "NODEPOOL", - "options": [], - "query": { - "query": "label_values(kube_node_labels{cluster=\"$CLUSTER\",label_nodegroup=\"$NODEGROUP\"},label_karpenter_sh_nodepool)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Karpenter Per Node - Export", - "uid": "5DCs2Z-Sk", - "version": 2, - "weekStart": "" -} + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Karpenter Per Node Group", + "uid": "OJL3Qeq4k", + "version": 22, + "weekStart": "" + } diff --git a/charts/karpenter_nodes/templates/nodeclass.yaml b/charts/karpenter_nodes/templates/nodeclass.yaml index fd02ddd..407bc84 100644 --- a/charts/karpenter_nodes/templates/nodeclass.yaml +++ b/charts/karpenter_nodes/templates/nodeclass.yaml @@ -64,6 +64,63 @@ spec: httpProtocolIPv6: {{ $v.metaDataHttpProtocolIPv6 | default $.Values.metaDataHttpProtocolIPv6 }} httpPutResponseHopLimit: {{ $v.metaDataHttpPutResponseHopLimit | default $.Values.metaDataHttpPutResponseHopLimit }} httpTokens: {{ $v.metaDataHttpTokens | default $.Values.metaDataHttpTokens }} + kubelet: + {{- if or (hasKey $v "kubeletClusterDNS") (hasKey $.Values "kubeletClusterDNS") }} + clusterDNS: + {{- if hasKey $v "kubeletClusterDNS" }} + {{- range $v.kubeletClusterDNS }} + - {{ . }} + {{- end }} + {{- else }} + {{- range $.Values.kubeletClusterDNS }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} + systemReserved: + cpu: {{ $v.kubeletSystemReservedCpu | default $.Values.kubeletSystemReservedCpu }} + memory: {{ $v.kubeletSystemReservedMemory | default $.Values.kubeletSystemReservedMemory }} + ephemeral-storage: {{ $v.kubeletSystemReservedEphemeralStorage | default $.Values.kubeletSystemReservedEphemeralStorage }} + kubeReserved: + cpu: {{ $v.kubeletKubeReservedCpu | default $.Values.kubeletKubeReservedCpu }} + memory: {{ $v.kubeletKubeReservedMemory | default $.Values.kubeletKubeReservedMemory }} + ephemeral-storage: {{ $v.kubeletKubeReservedEphemeralStorage | default $.Values.kubeletKubeReservedEphemeralStorage }} + evictionHard: + memory.available: {{ $v.kubeletEvictionHardMemoryAvailable | default $.Values.kubeletEvictionHardMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionHardNodefsAvailable | default $.Values.kubeletEvictionHardNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionHardNodefsInodesFree | default $.Values.kubeletEvictionHardNodefsInodesFree }} + evictionSoft: + memory.available: {{ $v.kubeletEvictionSoftMemoryAvailable | default $.Values.kubeletEvictionSoftMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionSoftNodefsAvailable | default $.Values.kubeletEvictionSoftNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionSoftNodefsInodesFree | default $.Values.kubeletEvictionSoftNodefsInodesFree }} + imagefs.available: {{ $v.kubeletEvictionSoftImagefsAvailable | default $.Values.kubeletEvictionSoftImagefsAvailable }} + imagefs.inodesFree: {{ $v.kubeletEvictionSoftImagefsInodesFree | default $.Values.kubeletEvictionSoftImagefsInodesFree }} + pid.available: {{ $v.kubeletEvictionSoftPidAvailable | default $.Values.kubeletEvictionSoftPidAvailable }} + evictionSoftGracePeriod: + imagefs.available: {{ $v.kubeletEvictionSoftGracePeriodImagefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodImagefsAvailable }} + imagefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodImagefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodImagefsInodesFree }} + memory.available: {{ $v.kubeletEvictionSoftGracePeriodMemoryAvailable | default $.Values.kubeletEvictionSoftGracePeriodMemoryAvailable }} + nodefs.available: {{ $v.kubeletEvictionSoftGracePeriodNodefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodNodefsAvailable }} + nodefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodNodefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodNodefsInodesFree }} + pid.available: {{ $v.kubeletEvictionSoftGracePeriodPidAvailable | default $.Values.kubeletEvictionSoftGracePeriodPidAvailable }} + {{- if or (hasKey $v "kubeletImageGCHighThresholdPercent") (hasKey $.Values "kubeletImageGCHighThresholdPercent") }} + imageGCHighThresholdPercent: {{ $v.kubeletImageGCHighThresholdPercent | default $.Values.kubeletImageGCHighThresholdPercent }} + {{- end }} + {{- if or (hasKey $v "kubeletImageGCLowThresholdPercent") (hasKey $.Values "kubeletImageGCLowThresholdPercent") }} + imageGCLowThresholdPercent: {{ $v.kubeletImageGCLowThresholdPercent | default $.Values.kubeletImageGCLowThresholdPercent }} + {{- end }} + {{- if or (hasKey $v "kubeletImageMinimumGCAge") (hasKey $.Values "kubeletImageMinimumGCAge") }} + imageMinimumGCAge: {{ $v.kubeletImageMinimumGCAge | default $.Values.kubeletImageMinimumGCAge }} + {{- end }} + {{- if or (hasKey $v "kubeletCpuCFSQuota") (hasKey $.Values "kubeletCpuCFSQuota") }} + cpuCFSQuota: {{ $v.kubeletCpuCFSQuota | default $.Values.kubeletCpuCFSQuota }} + {{- end }} + {{- if or (hasKey $v "kubeletPodsPerCore") (hasKey $.Values "kubeletPodsPerCore") }} + podsPerCore: {{ $v.kubeletPodsPerCore | default $.Values.kubeletPodsPerCore }} + {{- end }} + {{- if or (hasKey $v "kubeletMaxPods") (hasKey $.Values "kubeletMaxPods") }} + maxPods: {{ $v.kubeletMaxPods | default $.Values.kubeletMaxPods }} + {{- end }} {{- if or (hasKey $v "userData") (hasKey $.Values "userData") }} userData: | {{- if hasKey $v "userData" }} diff --git a/charts/karpenter_nodes/templates/nodepool.yaml b/charts/karpenter_nodes/templates/nodepool.yaml index 018a70e..b23da5f 100644 --- a/charts/karpenter_nodes/templates/nodepool.yaml +++ b/charts/karpenter_nodes/templates/nodepool.yaml @@ -151,63 +151,6 @@ spec: {{- if or (hasKey $.Values "additionalRequirements") (hasKey $v "additionalRequirements") }} {{- toYaml ($v.additionalRequirements | default $.Values.additionalRequirements) | nindent 8 }} {{- end }} - kubelet: - {{- if or (hasKey $v "kubeletClusterDNS") (hasKey $.Values "kubeletClusterDNS") }} - clusterDNS: - {{- if hasKey $v "kubeletClusterDNS" }} - {{- range $v.kubeletClusterDNS }} - - {{ . }} - {{- end }} - {{- else }} - {{- range $.Values.kubeletClusterDNS }} - - {{ . }} - {{- end }} - {{- end }} - {{- end }} - systemReserved: - cpu: {{ $v.kubeletSystemReservedCpu | default $.Values.kubeletSystemReservedCpu }} - memory: {{ $v.kubeletSystemReservedMemory | default $.Values.kubeletSystemReservedMemory }} - ephemeral-storage: {{ $v.kubeletSystemReservedEphemeralStorage | default $.Values.kubeletSystemReservedEphemeralStorage }} - kubeReserved: - cpu: {{ $v.kubeletKubeReservedCpu | default $.Values.kubeletKubeReservedCpu }} - memory: {{ $v.kubeletKubeReservedMemory | default $.Values.kubeletKubeReservedMemory }} - ephemeral-storage: {{ $v.kubeletKubeReservedEphemeralStorage | default $.Values.kubeletKubeReservedEphemeralStorage }} - evictionHard: - memory.available: {{ $v.kubeletEvictionHardMemoryAvailable | default $.Values.kubeletEvictionHardMemoryAvailable }} - nodefs.available: {{ $v.kubeletEvictionHardNodefsAvailable | default $.Values.kubeletEvictionHardNodefsAvailable }} - nodefs.inodesFree: {{ $v.kubeletEvictionHardNodefsInodesFree | default $.Values.kubeletEvictionHardNodefsInodesFree }} - evictionSoft: - memory.available: {{ $v.kubeletEvictionSoftMemoryAvailable | default $.Values.kubeletEvictionSoftMemoryAvailable }} - nodefs.available: {{ $v.kubeletEvictionSoftNodefsAvailable | default $.Values.kubeletEvictionSoftNodefsAvailable }} - nodefs.inodesFree: {{ $v.kubeletEvictionSoftNodefsInodesFree | default $.Values.kubeletEvictionSoftNodefsInodesFree }} - imagefs.available: {{ $v.kubeletEvictionSoftImagefsAvailable | default $.Values.kubeletEvictionSoftImagefsAvailable }} - imagefs.inodesFree: {{ $v.kubeletEvictionSoftImagefsInodesFree | default $.Values.kubeletEvictionSoftImagefsInodesFree }} - pid.available: {{ $v.kubeletEvictionSoftPidAvailable | default $.Values.kubeletEvictionSoftPidAvailable }} - evictionSoftGracePeriod: - imagefs.available: {{ $v.kubeletEvictionSoftGracePeriodImagefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodImagefsAvailable }} - imagefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodImagefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodImagefsInodesFree }} - memory.available: {{ $v.kubeletEvictionSoftGracePeriodMemoryAvailable | default $.Values.kubeletEvictionSoftGracePeriodMemoryAvailable }} - nodefs.available: {{ $v.kubeletEvictionSoftGracePeriodNodefsAvailable | default $.Values.kubeletEvictionSoftGracePeriodNodefsAvailable }} - nodefs.inodesFree: {{ $v.kubeletEvictionSoftGracePeriodNodefsInodesFree | default $.Values.kubeletEvictionSoftGracePeriodNodefsInodesFree }} - pid.available: {{ $v.kubeletEvictionSoftGracePeriodPidAvailable | default $.Values.kubeletEvictionSoftGracePeriodPidAvailable }} - {{- if or (hasKey $v "kubeletImageGCHighThresholdPercent") (hasKey $.Values "kubeletImageGCHighThresholdPercent") }} - imageGCHighThresholdPercent: {{ $v.kubeletImageGCHighThresholdPercent | default $.Values.kubeletImageGCHighThresholdPercent }} - {{- end }} - {{- if or (hasKey $v "kubeletImageGCLowThresholdPercent") (hasKey $.Values "kubeletImageGCLowThresholdPercent") }} - imageGCLowThresholdPercent: {{ $v.kubeletImageGCLowThresholdPercent | default $.Values.kubeletImageGCLowThresholdPercent }} - {{- end }} - {{- if or (hasKey $v "kubeletImageMinimumGCAge") (hasKey $.Values "kubeletImageMinimumGCAge") }} - imageMinimumGCAge: {{ $v.kubeletImageMinimumGCAge | default $.Values.kubeletImageMinimumGCAge }} - {{- end }} - {{- if or (hasKey $v "kubeletCpuCFSQuota") (hasKey $.Values "kubeletCpuCFSQuota") }} - cpuCFSQuota: {{ $v.kubeletCpuCFSQuota | default $.Values.kubeletCpuCFSQuota }} - {{- end }} - {{- if or (hasKey $v "kubeletPodsPerCore") (hasKey $.Values "kubeletPodsPerCore") }} - podsPerCore: {{ $v.kubeletPodsPerCore | default $.Values.kubeletPodsPerCore }} - {{- end }} - {{- if or (hasKey $v "kubeletMaxPods") (hasKey $.Values "kubeletMaxPods") }} - maxPods: {{ $v.kubeletMaxPods | default $.Values.kubeletMaxPods }} - {{- end }} disruption: expireAfter: {{ $v.expireAfter | default $.Values.expireAfter }} consolidationPolicy: {{$v.consolidationPolicy | default $.Values.consolidationPolicy}} diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_class_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_class_test.yaml new file mode 100644 index 0000000..0832a2d --- /dev/null +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_class_test.yaml @@ -0,0 +1,16 @@ +suite: test nodeclass - nodes-default +templates: + - nodeclass.yaml +values: + - values.yaml + +tests: + - it: Verify nodes-default kubelet + documentIndex: 0 + asserts: + - equal: + path: spec.kubelet.systemReserved.cpu + value: 250m + - equal: + path: spec.kubelet.kubeReserved.ephemeral-storage + value: 4Gi diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml index 473ef5f..0e5fc16 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_default_test.yaml @@ -127,16 +127,6 @@ tests: path: spec.template.spec.requirements[8].values[0] value: metal - - it: Verify nodes-default kubelet - documentIndex: 0 - asserts: - - equal: - path: spec.template.spec.kubelet.systemReserved.cpu - value: 250m - - equal: - path: spec.template.spec.kubelet.kubeReserved.ephemeral-storage - value: 4Gi - - it: Verify nodes-default Options documentIndex: 0 asserts: @@ -145,7 +135,7 @@ tests: value: 720h - equal: path: spec.disruption.consolidationPolicy - value: WhenUnderutilized + value: WhenEmptyOrUnderutilized - isNull: path: spec.disruption.consolidateAfter - isNull: @@ -155,4 +145,3 @@ tests: - equal: path: spec.weight value: 1 - diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_explicittypes_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_explicittypes_test.yaml index 565e6db..bf18c2a 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_explicittypes_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_explicittypes_test.yaml @@ -50,8 +50,6 @@ tests: - equal: path: spec.template.spec.requirements[5].values[1] value: t3a.xlarge - - isNull: - path: spec.template.spec.requirements[6] - equal: path: spec.template.spec.startupTaints[0].key value: "node.cilium.io/agent-not-ready" diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_class_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_class_test.yaml new file mode 100644 index 0000000..53762bf --- /dev/null +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_class_test.yaml @@ -0,0 +1,22 @@ +suite: test nodepool - nodes-workers +templates: + - nodeclass.yaml +values: + - values.yaml + +tests: + - it: Verify nodes-workers kubelet + documentIndex: 1 + asserts: + - equal: + path: spec.kubelet.systemReserved.cpu + value: 750m + - equal: + path: spec.kubelet.kubeReserved.ephemeral-storage + value: 4Gi + - equal: + path: spec.kubelet.clusterDNS[0] + value: "1.1.1.1" + - equal: + path: spec.kubelet.clusterDNS[1] + value: "2.2.2.2" diff --git a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml index 33f897d..ee1bbde 100644 --- a/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml +++ b/charts/karpenter_nodes/tests/nodepool_nodes_workers_test.yaml @@ -159,23 +159,6 @@ tests: path: spec.template.spec.requirements[11].values[1] value: "value2" - - - it: Verify nodes-workers kubelet - documentIndex: 3 - asserts: - - equal: - path: spec.template.spec.kubelet.systemReserved.cpu - value: 750m - - equal: - path: spec.template.spec.kubelet.kubeReserved.ephemeral-storage - value: 4Gi - - equal: - path: spec.template.spec.kubelet.clusterDNS[0] - value: "1.1.1.1" - - equal: - path: spec.template.spec.kubelet.clusterDNS[1] - value: "2.2.2.2" - - it: Verify nodes-workers Options documentIndex: 3 asserts: diff --git a/charts/karpenter_nodes/values.yaml b/charts/karpenter_nodes/values.yaml index d548ae9..61a079e 100644 --- a/charts/karpenter_nodes/values.yaml +++ b/charts/karpenter_nodes/values.yaml @@ -1,7 +1,7 @@ ## Global Configuration # Karpenter API Version in CRD -ApiVersion: v1beta1 +ApiVersion: v1 # Nodes Configuration clusterName: "eks-cluster" # My Cluster Name @@ -60,7 +60,7 @@ detailedMonitoring: false associatePublicIPAddress: false # Consolidation Options expireAfter: "720h" -consolidationPolicy: "WhenUnderutilized" +consolidationPolicy: "WhenEmptyOrUnderutilized" consolidateAfter: "5m" # Default Instance Sizing