Skip to content

Commit 83dd411

Browse files
committed
expose the key label
1 parent 35dda1c commit 83dd411

File tree

2 files changed

+64
-8
lines changed

2 files changed

+64
-8
lines changed

pkg/alertmanager/alertmanager_metrics.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -168,19 +168,19 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
168168
partialMerges: prometheus.NewDesc(
169169
"cortex_alertmanager_partial_state_merges_total",
170170
"Number of times we have received a partial state to merge for a key.",
171-
[]string{"user"}, nil),
171+
[]string{"user", "key"}, nil),
172172
partialMergesFailed: prometheus.NewDesc(
173173
"cortex_alertmanager_partial_state_merges_failed_total",
174174
"Number of times we have failed to merge a partial state received for a key.",
175-
[]string{"user"}, nil),
175+
[]string{"user", "key"}, nil),
176176
replicationTotal: prometheus.NewDesc(
177177
"cortex_alertmanager_state_replication_total",
178178
"Number of times we have tried to replicate a state to other alertmanagers",
179-
[]string{"user"}, nil),
179+
[]string{"user", "key"}, nil),
180180
replicationFailed: prometheus.NewDesc(
181181
"cortex_alertmanager_state_replication_failed_total",
182182
"Number of times we have failed to replicate a state to other alertmanagers",
183-
[]string{"user"}, nil),
183+
[]string{"user", "key"}, nil),
184184
fetchReplicaStateTotal: prometheus.NewDesc(
185185
"cortex_alertmanager_state_fetch_replica_state_total",
186186
"Number of times we have tried to read and merge the full state from another replica.",
@@ -317,10 +317,10 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
317317

318318
data.SendMaxOfGaugesPerUser(out, m.configHashValue, "alertmanager_config_hash")
319319

320-
data.SendSumOfCountersPerUser(out, m.partialMerges, "alertmanager_partial_state_merges_total")
321-
data.SendSumOfCountersPerUser(out, m.partialMergesFailed, "alertmanager_partial_state_merges_failed_total")
322-
data.SendSumOfCountersPerUser(out, m.replicationTotal, "alertmanager_state_replication_total")
323-
data.SendSumOfCountersPerUser(out, m.replicationFailed, "alertmanager_state_replication_failed_total")
320+
data.SendSumOfCountersPerUserWithLabels(out, m.partialMerges, "alertmanager_partial_state_merges_total", "key")
321+
data.SendSumOfCountersPerUserWithLabels(out, m.partialMergesFailed, "alertmanager_partial_state_merges_failed_total", "key")
322+
data.SendSumOfCountersPerUserWithLabels(out, m.replicationTotal, "alertmanager_state_replication_total", "key")
323+
data.SendSumOfCountersPerUserWithLabels(out, m.replicationFailed, "alertmanager_state_replication_failed_total", "key")
324324
data.SendSumOfCounters(out, m.fetchReplicaStateTotal, "alertmanager_state_fetch_replica_state_total")
325325
data.SendSumOfCounters(out, m.fetchReplicaStateFailed, "alertmanager_state_fetch_replica_state_failed_total")
326326
data.SendSumOfCounters(out, m.initialSyncTotal, "alertmanager_state_initial_sync_total")

pkg/alertmanager/alertmanager_metrics_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,17 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
211211
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
212212
cortex_alertmanager_notifications_total{integration="wechat",user="user3"} 200
213213
214+
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
215+
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
216+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
217+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
218+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user3"} 200
219+
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
220+
# TYPE cortex_alertmanager_partial_state_merges_total counter
221+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
222+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
223+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user3"} 300
224+
214225
# HELP cortex_alertmanager_silences How many silences by state.
215226
# TYPE cortex_alertmanager_silences gauge
216227
cortex_alertmanager_silences{state="active",user="user1"} 1
@@ -506,6 +517,17 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
506517
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
507518
cortex_alertmanager_notifications_total{integration="wechat",user="user3"} 200
508519
520+
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
521+
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
522+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
523+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
524+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user3"} 200
525+
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
526+
# TYPE cortex_alertmanager_partial_state_merges_total counter
527+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
528+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
529+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user3"} 300
530+
509531
# HELP cortex_alertmanager_silences How many silences by state.
510532
# TYPE cortex_alertmanager_silences gauge
511533
cortex_alertmanager_silences{state="active",user="user1"} 1
@@ -758,6 +780,15 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
758780
cortex_alertmanager_notifications_total{integration="wechat",user="user1"} 2
759781
cortex_alertmanager_notifications_total{integration="wechat",user="user2"} 20
760782
783+
# HELP cortex_alertmanager_partial_state_merges_failed_total Number of times we have failed to merge a partial state received for a key.
784+
# TYPE cortex_alertmanager_partial_state_merges_failed_total counter
785+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user1"} 2
786+
cortex_alertmanager_partial_state_merges_failed_total{key="nil",user="user2"} 20
787+
# HELP cortex_alertmanager_partial_state_merges_total Number of times we have received a partial state to merge for a key.
788+
# TYPE cortex_alertmanager_partial_state_merges_total counter
789+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user1"} 3
790+
cortex_alertmanager_partial_state_merges_total{key="nil",user="user2"} 30
791+
761792
# HELP cortex_alertmanager_silences How many silences by state.
762793
# TYPE cortex_alertmanager_silences gauge
763794
cortex_alertmanager_silences{state="active",user="user1"} 1
@@ -898,6 +929,10 @@ func populateAlertmanager(base float64) *prometheus.Registry {
898929
lm.size.Set(100 * base)
899930
lm.insertFailures.Add(7 * base)
900931

932+
sr := newStateReplicationMetrics(reg)
933+
sr.partialStateMergesFailed.WithLabelValues("nil").Add(base * 2)
934+
sr.partialStateMergesTotal.WithLabelValues("nil").Add(base * 3)
935+
901936
return reg
902937
}
903938

@@ -1130,3 +1165,24 @@ func newLimiterMetrics(r prometheus.Registerer) *limiterMetrics {
11301165
insertFailures: insertAlertFailures,
11311166
}
11321167
}
1168+
1169+
type stateReplicationMetrics struct {
1170+
partialStateMergesTotal *prometheus.CounterVec
1171+
partialStateMergesFailed *prometheus.CounterVec
1172+
}
1173+
1174+
func newStateReplicationMetrics(r prometheus.Registerer) *stateReplicationMetrics {
1175+
partialStateMergesTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
1176+
Name: "alertmanager_partial_state_merges_total",
1177+
Help: "Number of times we have received a partial state to merge for a key.",
1178+
}, []string{"key"})
1179+
partialStateMergesFailed := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
1180+
Name: "alertmanager_partial_state_merges_failed_total",
1181+
Help: "Number of times we have failed to merge a partial state received for a key.",
1182+
}, []string{"key"})
1183+
1184+
return &stateReplicationMetrics{
1185+
partialStateMergesTotal: partialStateMergesTotal,
1186+
partialStateMergesFailed: partialStateMergesFailed,
1187+
}
1188+
}

0 commit comments

Comments
 (0)