Skip to content

Commit b997476

Browse files
authored
configure rules to merge rabbitmq_identity_info (#1856)
1 parent 46b65b0 commit b997476

File tree

6 files changed

+16
-16
lines changed

6 files changed

+16
-16
lines changed

observability/prometheus/rules/rabbitmq/cluster-alarms.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ spec:
1414
expr: |
1515
max by(rabbitmq_cluster) (
1616
max_over_time(rabbitmq_alarms_memory_used_watermark[5m])
17-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
17+
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
1818
) > 0
1919
keep_firing_for: 5m
2020
annotations:
@@ -30,7 +30,7 @@ spec:
3030
expr: |
3131
max by(rabbitmq_cluster) (
3232
max_over_time(rabbitmq_alarms_free_disk_space_watermark[5m])
33-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
33+
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
3434
) > 0
3535
keep_firing_for: 5m
3636
annotations:
@@ -46,7 +46,7 @@ spec:
4646
expr: |
4747
max by(rabbitmq_cluster) (
4848
max_over_time(rabbitmq_alarms_file_descriptor_limit[5m])
49-
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info
49+
* on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
5050
) > 0
5151
keep_firing_for: 5m
5252
annotations:

observability/prometheus/rules/rabbitmq/container-restarts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ spec:
1212
rules:
1313
- alert: ContainerRestarts
1414
expr: |
15-
increase(kube_pod_container_status_restarts_total[10m]) * on(namespace, pod, container) group_left(rabbitmq_cluster) rabbitmq_identity_info
15+
increase(kube_pod_container_status_restarts_total[10m]) * on(namespace, pod, container) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
1616
>=
1717
1
1818
for: 5m

observability/prometheus/rules/rabbitmq/file-descriptors-near-limit.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ spec:
1212
rules:
1313
- alert: FileDescriptorsNearLimit
1414
expr: |
15-
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
15+
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (max_over_time(rabbitmq_process_open_fds[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
1616
/
17-
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_fds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) rabbitmq_identity_info)
17+
sum by(namespace, rabbitmq_cluster, pod, rabbitmq_node) (rabbitmq_process_max_fds * on(instance) group_left(rabbitmq_cluster, rabbitmq_node, pod) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
1818
> 0.8
1919
for: 10m
2020
annotations:

observability/prometheus/rules/rabbitmq/high-connection-churn.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ spec:
1313
- alert: HighConnectionChurn
1414
expr: |
1515
(
16-
sum(rate(rabbitmq_connections_closed_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info) by(namespace, rabbitmq_cluster)
16+
sum(rate(rabbitmq_connections_closed_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by(namespace, rabbitmq_cluster)
1717
+
18-
sum(rate(rabbitmq_connections_opened_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info) by(namespace, rabbitmq_cluster)
18+
sum(rate(rabbitmq_connections_opened_total[5m]) * on(instance) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by(namespace, rabbitmq_cluster)
1919
)
2020
/
21-
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) by (namespace, rabbitmq_cluster)
21+
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by (namespace, rabbitmq_cluster)
2222
> 0.1
2323
unless
24-
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) by (namespace, rabbitmq_cluster)
24+
sum (rabbitmq_connections * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) by (namespace, rabbitmq_cluster)
2525
< 100
2626
for: 10m
2727
annotations:

observability/prometheus/rules/rabbitmq/insufficient-established-erlang-distribution-links.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ spec:
1313
- alert: InsufficientEstablishedErlangDistributionLinks
1414
# erlang_vm_dist_node_state: 1=pending, 2=up_pending, 3=up
1515
expr: |
16-
count by (namespace, rabbitmq_cluster) (erlang_vm_dist_node_state * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info == 3)
16+
count by (namespace, rabbitmq_cluster) (erlang_vm_dist_node_state * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster) == 3)
1717
<
18-
count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info)
18+
count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster))
1919
*
20-
(count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) rabbitmq_identity_info) -1 )
20+
(count by (namespace, rabbitmq_cluster) (rabbitmq_build_info * on(instance) group_left(rabbitmq_cluster) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)) -1 )
2121
for: 10m
2222
annotations:
2323
description: |

observability/prometheus/rules/rabbitmq/low-disk-watermark-predicted.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ spec:
1414
# The 2nd condition ensures that data points are available until 24 hours ago such that no false positive alerts are triggered for newly created RabbitMQ clusters.
1515
expr: |
1616
(
17-
predict_linear(rabbitmq_disk_space_available_bytes[24h], 60*60*24) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info
17+
predict_linear(rabbitmq_disk_space_available_bytes[24h], 60*60*24) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
1818
<
19-
rabbitmq_disk_space_available_limit_bytes * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info
19+
rabbitmq_disk_space_available_limit_bytes * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
2020
)
2121
and
2222
(
23-
count_over_time(rabbitmq_disk_space_available_limit_bytes[2h] offset 22h) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) rabbitmq_identity_info
23+
count_over_time(rabbitmq_disk_space_available_limit_bytes[2h] offset 22h) * on (instance, pod) group_left(rabbitmq_cluster, rabbitmq_node) max(rabbitmq_identity_info) by (namespace, pod, container, rabbitmq_cluster)
2424
>
2525
0
2626
)

0 commit comments

Comments
 (0)