From 0d5415e593e421f5f92849350545d1b51bca5823 Mon Sep 17 00:00:00 2001 From: Chris Martin Date: Thu, 5 Sep 2024 14:07:37 +0100 Subject: [PATCH] Fix rules (#3908) --- deployment/scheduler/templates/scheduler-prometheusrule.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/scheduler/templates/scheduler-prometheusrule.yaml b/deployment/scheduler/templates/scheduler-prometheusrule.yaml index fe2d090f9ab..892ecc398a3 100644 --- a/deployment/scheduler/templates/scheduler-prometheusrule.yaml +++ b/deployment/scheduler/templates/scheduler-prometheusrule.yaml @@ -22,7 +22,7 @@ spec: expr: sum by (cluster, category, subCategory) (armada_scheduler_error_classification_by_node) # Per-queue failures. - record: queue_category_subCategory:armada_scheduler_failed_jobs - expr: sum by (queue, category, subCategory) (job_error_classification_by_queue) + expr: sum by (queue, category, subCategory) (armada_scheduler_job_error_classification_by_queue) # Per-node successes. - record: node:armada_scheduler_succeeded_jobs expr: sum by (node) (armada_scheduler_job_state_counter_by_node{state="succeeded"}) @@ -31,7 +31,7 @@ spec: expr: sum by (cluster, category, subCategory) (armada_scheduler_job_state_counter_by_node{state="succeeded"}) # Per-queue successes. - record: queue_category_subCategory:armada_scheduler_succeeded_jobs - expr: sum by (queue) (job_state_counter_by_queue{state="succeeded"}) + expr: sum by (queue) (armada_scheduler_job_state_counter_by_queue{state="succeeded"}) # Per-node failures increase. # increase(sum... is safe here, since all metrics that make up the sum reset at the same time. - record: node:armada_scheduler_failed_jobs:increase1m