Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dashboards/controller-manager.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,4 @@ local singlestat = grafana.singlestat;
.addPanel(goroutines)
) + { refresh: $._config.grafanaK8s.refresh },
},
}
}
10 changes: 5 additions & 5 deletions dashboards/resources/cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ local template = grafana.template;
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.stack
)
)
Expand All @@ -177,11 +177,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel(podWorkloadColumns + [
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'CPU Usage' },
'Value #D': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -320,7 +320,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + {
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/multi-cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ local template = grafana.template;
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
+ { fill: 0, linewidth: 2 },
)
)
Expand All @@ -60,11 +60,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down
14 changes: 7 additions & 7 deletions dashboards/resources/namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ local template = grafana.template;
},
};

local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;
local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;

local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""}) by (pod)' % $._config;

Expand Down Expand Up @@ -141,11 +141,11 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation (from requests)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('CPU Utilisation (from limits)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation (from requests)') +
Expand Down Expand Up @@ -198,11 +198,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -361,7 +361,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/node.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ local template = grafana.template;
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') +
g.stack,
)
)
Expand All @@ -53,11 +53,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down
10 changes: 5 additions & 5 deletions dashboards/resources/pod.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ local template = grafana.template;
g.panel('CPU Usage') +
g.queryPanel(
[
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
cpuRequestsQuery,
cpuLimitsQuery,
], [
Expand Down Expand Up @@ -172,11 +172,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -339,7 +339,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
Expand Down
4 changes: 2 additions & 2 deletions dashboards/resources/workload-namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ local template = grafana.template;
includeAll=false,
sort=1
),

local typeTemplate =
template.new(
name='type',
Expand Down Expand Up @@ -128,7 +128,7 @@ local template = grafana.template;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ local template = grafana.template;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
) by (pod)
Expand Down
4 changes: 2 additions & 2 deletions rules/apps.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
// Reduces cardinality of this timeseries by #cores, which makes it
// more useable in dashboards. Also, allows us to do things like
// quantile_over_time(...) which would otherwise not be possible.
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate',
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate',
expr: |||
sum by (%(clusterLabel)s, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m])
irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing rate function to irate.

) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info{node!=""})
)
Expand Down
4 changes: 2 additions & 2 deletions tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,10 @@ tests:
values: '1+0x5'
promql_expr_test:
- eval_time: 5m
expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
exp_samples:
- value: 5.0e-2
labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}'
labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}'

- interval: 1m
input_series:
Expand Down