From e996e00fa3a0c17a7a9f5d01f6c1a3544731bd33 Mon Sep 17 00:00:00 2001 From: Prashant Balachandran Date: Wed, 9 Jun 2021 15:28:52 +0530 Subject: [PATCH] changing rate function to irate for recording rule --- dashboards/controller-manager.libsonnet | 2 +- dashboards/resources/cluster.libsonnet | 10 +++++----- dashboards/resources/multi-cluster.libsonnet | 8 ++++---- dashboards/resources/namespace.libsonnet | 14 +++++++------- dashboards/resources/node.libsonnet | 8 ++++---- dashboards/resources/pod.libsonnet | 10 +++++----- dashboards/resources/workload-namespace.libsonnet | 4 ++-- dashboards/resources/workload.libsonnet | 2 +- rules/apps.libsonnet | 4 ++-- tests.yaml | 4 ++-- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/dashboards/controller-manager.libsonnet b/dashboards/controller-manager.libsonnet index b3af034c8..3debf5a45 100644 --- a/dashboards/controller-manager.libsonnet +++ b/dashboards/controller-manager.libsonnet @@ -193,4 +193,4 @@ local singlestat = grafana.singlestat; .addPanel(goroutines) ) + { refresh: $._config.grafanaK8s.refresh }, }, -} \ No newline at end of file +} diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index 554b96202..7acbfed00 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -168,7 +168,7 @@ local template = grafana.template; g.row('CPU') .addPanel( g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + g.stack ) ) @@ -177,11 +177,11 @@ local template = grafana.template; .addPanel( g.panel('CPU Quota') + g.tablePanel(podWorkloadColumns + [ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config, 'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config, ], tableStyles { 'Value #C': { alias: 'CPU Usage' }, 'Value #D': { alias: 'CPU Requests' }, @@ -320,7 +320,7 @@ local template = grafana.template; sort: { col: 4, desc: true, - } + }, }, ) ) + { diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 152e21dfd..ef998e08c 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -51,7 +51,7 @@ local template = grafana.template; g.row('CPU') .addPanel( g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config) + { fill: 0, linewidth: 2 }, ) ) @@ -60,11 +60,11 @@ local template = grafana.template; .addPanel( g.panel('CPU Quota') + g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, 'sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config, 'sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 60daa0b7b..a38690b3c 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -76,7 +76,7 @@ local template = grafana.template; }, }; - local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config; + local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config; local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""}) by (pod)' % $._config; @@ -141,11 +141,11 @@ local template = grafana.template; }) .addPanel( g.panel('CPU Utilisation (from requests)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('CPU Utilisation (from limits)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('Memory Utilisation (from requests)') + @@ -198,11 +198,11 @@ local template = grafana.template; .addPanel( g.panel('CPU Quota') + g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, 'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, 'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -361,7 +361,7 @@ local template = grafana.template; sort: { col: 4, desc: true, - } + }, }, ) ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh }, diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index cde0de61a..8cfdc90e1 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -44,7 +44,7 @@ local template = grafana.template; g.row('CPU Usage') .addPanel( g.panel('CPU Usage') + - g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') + + g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') + g.stack, ) ) @@ -53,11 +53,11 @@ local template = grafana.template; .addPanel( g.panel('CPU Quota') + g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, 'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, 'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index 4a9bc8f1d..37e13d433 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -109,7 +109,7 @@ local template = grafana.template; g.panel('CPU Usage') + g.queryPanel( [ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config, cpuRequestsQuery, cpuLimitsQuery, ], [ @@ -172,11 +172,11 @@ local template = grafana.template; .addPanel( g.panel('CPU Quota') + g.tablePanel([ - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, 'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, 'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -339,7 +339,7 @@ local template = grafana.template; sort: { col: 4, desc: true, - } + }, }, ) ) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh }, diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index a58acc4f1..e0664cd76 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -16,7 +16,7 @@ local template = grafana.template; includeAll=false, sort=1 ), - + local typeTemplate = template.new( name='type', @@ -128,7 +128,7 @@ local template = grafana.template; local cpuUsageQuery = ||| sum( - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} + node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index bd30f4b29..654e1633c 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -126,7 +126,7 @@ local template = grafana.template; local cpuUsageQuery = ||| sum( - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"} + node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index fdc621f9c..9d6b83b41 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -13,10 +13,10 @@ // Reduces cardinality of this timeseries by #cores, which makes it // more useable in dashboards. Also, allows us to do things like // quantile_over_time(...) which would otherwise not be possible. - record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate', + record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate', expr: ||| sum by (%(clusterLabel)s, namespace, pod, container) ( - rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m]) + irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m]) ) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) ( 1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info{node!=""}) ) diff --git a/tests.yaml b/tests.yaml index 231890e4b..a7071ce53 100644 --- a/tests.yaml +++ b/tests.yaml @@ -335,10 +335,10 @@ tests: values: '1+0x5' promql_expr_test: - eval_time: 5m - expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate + expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate exp_samples: - value: 5.0e-2 - labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' + labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' - interval: 1m input_series: