Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 909450d

Browse files
committed
Support Alertmanager HA
With this, we can now support increasing the number of replicas for a Cortex AM thus enabling HA. Please note that Alerts themselves are not gossiped between Alertmanagers. Each Ruler needs to send the alert to every Alertmanager available thus the reason why a headless service gets created when the number of replicas is more than 1.
1 parent 796e073 commit 909450d

File tree

3 files changed

+36
-7
lines changed

3 files changed

+36
-7
lines changed

cortex/alertmanager.libsonnet

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,23 @@
44
local container = $.core.v1.container,
55
local statefulSet = $.apps.v1.statefulSet,
66
local service = $.core.v1.service,
7+
local isGossiping = $._config.alertmanager.replicas > 1,
8+
local peers = if isGossiping then
9+
[
10+
'alertmanager-%d.alertmanager.%s.svc.%s.local:%s' % [i, $._config.namespace, $._config.cluster, $._config.alertmanager_gossip_port]
11+
for i in std.range(0, $._config.alertmanager.replicas - 1)
12+
]
13+
else [],
714

815
alertmanager_args::
916
{
1017
target: 'alertmanager',
1118
'log.level': 'debug',
1219

20+
'experimental.alertmanager.enable-api': 'true',
1321
'alertmanager.storage.type': 'gcs',
1422
'alertmanager.storage.path': '/data',
15-
'alertmanager.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config,
23+
'alertmanager.storage.gcs.bucketname': '%(cluster)s-cortex-%(namespace)s' % $._config,
1624
'alertmanager.web.external-url': '%s/alertmanager' % $._config.external_url,
1725
},
1826

@@ -27,8 +35,22 @@
2735
alertmanager_container::
2836
if $._config.alertmanager_enabled then
2937
container.new('alertmanager', $._images.alertmanager) +
30-
container.withPorts($.util.defaultPorts) +
31-
container.withArgsMixin($.util.mapToFlags($.alertmanager_args)) +
38+
container.withPorts(
39+
$.util.defaultPorts +
40+
if isGossiping then [
41+
$.core.v1.containerPort.newUDP('gossip-udp', $._config.alertmanager_gossip_port),
42+
$.core.v1.containerPort.new('gossip-tcp', $._config.alertmanager_gossip_port),
43+
]
44+
else [],
45+
) +
46+
container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) +
47+
container.withArgsMixin(
48+
$.util.mapToFlags($.alertmanager_args) +
49+
if isGossiping then
50+
['--cluster.listen-address=[$(POD_IP)]:%s' % $._config.alertmanager_gossip_port] +
51+
['--cluster.peer=%s' % peer for peer in peers]
52+
else [],
53+
) +
3254
container.withVolumeMountsMixin([volumeMount.new('alertmanager-data', '/data')]) +
3355
$.util.resourcesRequests('100m', '1Gi') +
3456
$.util.readinessProbe +
@@ -37,7 +59,7 @@
3759

3860
alertmanager_statefulset:
3961
if $._config.alertmanager_enabled then
40-
statefulSet.new('alertmanager', 1, [$.alertmanager_container], $.alertmanager_pvc) +
62+
statefulSet.new('alertmanager', $._config.alertmanager.replicas, [$.alertmanager_container], $.alertmanager_pvc) +
4163
statefulSet.mixin.spec.withServiceName('alertmanager') +
4264
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
4365
statefulSet.mixin.metadata.withLabels({ name: 'alertmanager' }) +
@@ -50,6 +72,10 @@
5072

5173
alertmanager_service:
5274
if $._config.alertmanager_enabled then
53-
$.util.serviceFor($.alertmanager_statefulset)
75+
if $._config.alertmanager.replicas > 1 then
76+
$.util.serviceFor($.alertmanager_statefulset) +
77+
service.mixin.spec.withClusterIp('None')
78+
else
79+
$.util.serviceFor($.alertmanager_statefulset)
5480
else {},
5581
}

cortex/config.libsonnet

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@
227227
},
228228
}[$._config.ruler_client_type],
229229

230+
alertmanager: {
231+
replicas: 1,
232+
},
233+
230234
overrides: {
231235
// === Per-tenant usage limits. ===
232236
//

cortex/images.libsonnet

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
store_gateway: self.cortex,
1919

2020
query_tee: 'quay.io/cortexproject/query-tee:master-5d7b05c3',
21-
// TODO(gouthamve/jtlisi): Upstream the ruler and AM configs.
22-
alertmanager: 'jtlisi/cortex:20190819_alertmanager_update-faa66aa43',
21+
alertmanager: 'quay.io/cortexproject/cortex:master-2b41aa38d\t',
2322
testExporter: 'cortexproject/test-exporter:master-be013707',
2423
},
2524
}

0 commit comments

Comments
 (0)