Skip to content

Commit f1fb713

Browse files
authored
Merge pull request grafana/cortex-jsonnet#284 from grafana/optimize-recording-rules
Pre-compute aggregations to optimize scaling recording rules
2 parents aaaefee + 7ba8424 commit f1fb713

File tree

1 file changed

+51
-27
lines changed

1 file changed

+51
-27
lines changed

jsonnet/mimir-mixin/recording_rules.libsonnet

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,30 @@ local utils = import 'mixin-utils/utils.libsonnet';
197197
)
198198
|||,
199199
},
200+
{
201+
// Convenience rule to get the CPU utilization for both a deployment and a statefulset.
202+
record: 'cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate',
203+
expr: |||
204+
sum by (cluster, namespace, deployment) (
205+
label_replace(
206+
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
207+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
208+
)
209+
)
210+
|||,
211+
},
212+
{
213+
// Convenience rule to get the CPU request for both a deployment and a statefulset.
214+
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum',
215+
expr: |||
216+
sum by (cluster, namespace, deployment) (
217+
label_replace(
218+
kube_pod_container_resource_requests_cpu_cores,
219+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
220+
)
221+
)
222+
|||,
223+
},
200224
{
201225
// Jobs should be sized to their CPU usage.
202226
// We do this by comparing 99th percentile usage over the last 24hrs to
@@ -209,20 +233,32 @@ local utils = import 'mixin-utils/utils.libsonnet';
209233
ceil(
210234
cluster_namespace_deployment:actual_replicas:count
211235
*
212-
quantile_over_time(0.99,
213-
sum by (cluster, namespace, deployment) (
214-
label_replace(
215-
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
216-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
217-
)
218-
)[24h:5m]
219-
)
236+
quantile_over_time(0.99, cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate[24h])
220237
/
221-
sum by (cluster, namespace, deployment) (
222-
label_replace(
223-
kube_pod_container_resource_requests_cpu_cores,
224-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
225-
)
238+
cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum
239+
)
240+
|||,
241+
},
242+
{
243+
// Convenience rule to get the Memory utilization for both a deployment and a statefulset.
244+
record: 'cluster_namespace_deployment:container_memory_usage_bytes:sum',
245+
expr: |||
246+
sum by (cluster, namespace, deployment) (
247+
label_replace(
248+
container_memory_usage_bytes,
249+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
250+
)
251+
)
252+
|||,
253+
},
254+
{
255+
// Convenience rule to get the Memory request for both a deployment and a statefulset.
256+
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum',
257+
expr: |||
258+
sum by (cluster, namespace, deployment) (
259+
label_replace(
260+
kube_pod_container_resource_requests_memory_bytes,
261+
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
226262
)
227263
)
228264
|||,
@@ -239,21 +275,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
239275
ceil(
240276
cluster_namespace_deployment:actual_replicas:count
241277
*
242-
quantile_over_time(0.99,
243-
sum by (cluster, namespace, deployment) (
244-
label_replace(
245-
container_memory_usage_bytes,
246-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
247-
)
248-
)[24h:5m]
249-
)
278+
quantile_over_time(0.99, cluster_namespace_deployment:container_memory_usage_bytes:sum[24h])
250279
/
251-
sum by (cluster, namespace, deployment) (
252-
label_replace(
253-
kube_pod_container_resource_requests_memory_bytes,
254-
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
255-
)
256-
)
280+
cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum
257281
)
258282
|||,
259283
},

0 commit comments

Comments
 (0)