Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 5ec0459

Browse files
authored
Merge pull request #147 from grafana/support-am-ha
Support Alertmanager HA
2 parents 5009433 + eeb2c80 commit 5ec0459

File tree

4 files changed

+40
-9
lines changed

4 files changed

+40
-9
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
## master / unreleased
44

5+
* [BUGFIX] Add support the `local` ruler client type
56
* [CHANGE] The project is now licensed with Apache-2.0 license. #169
67
* [CHANGE] Add overrides config to tsdb store-gateway. #167
78
* [CHANGE] Ingesters now default to running as `StatefulSet` with WAL enabled. It is controlled by the config `$._config.ingester_deployment_without_wal` which is `false` by default. Setting the config to `true` will yeild the old behaviour (stateless `Deployment` without WAL enabled). #72
89
* [CHANGE] We now allow queries that are 32 days long. For example, rate(metric[32d]). Before it was 31d. #173
9-
* [BUGFIX] Add support the `local` ruler client type
10+
* [ENHANCEMENT] Enable support for HA in the Cortex Alertmanager #147
1011

1112
## 1.3.0 / 2020-08-21
1213

cortex/alertmanager.libsonnet

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,23 @@
44
local container = $.core.v1.container,
55
local statefulSet = $.apps.v1.statefulSet,
66
local service = $.core.v1.service,
7+
local isHA = $._config.alertmanager.replicas > 1,
8+
local peers = if isHA then
9+
[
10+
'alertmanager-%d.alertmanager.%s.svc.%s.local:%s' % [i, $._config.namespace, $._config.cluster, $._config.alertmanager.gossip_port]
11+
for i in std.range(0, $._config.alertmanager.replicas - 1)
12+
]
13+
else [],
714

815
alertmanager_args::
916
{
1017
target: 'alertmanager',
1118
'log.level': 'debug',
1219

20+
'experimental.alertmanager.enable-api': 'true',
1321
'alertmanager.storage.type': 'gcs',
1422
'alertmanager.storage.path': '/data',
15-
'alertmanager.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config,
23+
'alertmanager.storage.gcs.bucketname': '%(cluster)s-cortex-%(namespace)s' % $._config,
1624
'alertmanager.web.external-url': '%s/alertmanager' % $._config.external_url,
1725
},
1826

@@ -27,8 +35,22 @@
2735
alertmanager_container::
2836
if $._config.alertmanager_enabled then
2937
container.new('alertmanager', $._images.alertmanager) +
30-
container.withPorts($.util.defaultPorts) +
31-
container.withArgsMixin($.util.mapToFlags($.alertmanager_args)) +
38+
container.withPorts(
39+
$.util.defaultPorts +
40+
if isHA then [
41+
$.core.v1.containerPort.newUDP('gossip-udp', $._config.alertmanager.gossip_port),
42+
$.core.v1.containerPort.new('gossip-tcp', $._config.alertmanager.gossip_port),
43+
]
44+
else [],
45+
) +
46+
container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) +
47+
container.withArgsMixin(
48+
$.util.mapToFlags($.alertmanager_args) +
49+
if isHA then
50+
['--cluster.listen-address=[$(POD_IP)]:%s' % $._config.alertmanager_gossip_port] +
51+
['--cluster.peer=%s' % peer for peer in peers]
52+
else [],
53+
) +
3254
container.withVolumeMountsMixin([volumeMount.new('alertmanager-data', '/data')]) +
3355
$.util.resourcesRequests('100m', '1Gi') +
3456
$.util.readinessProbe +
@@ -37,7 +59,7 @@
3759

3860
alertmanager_statefulset:
3961
if $._config.alertmanager_enabled then
40-
statefulSet.new('alertmanager', 1, [$.alertmanager_container], $.alertmanager_pvc) +
62+
statefulSet.new('alertmanager', $._config.alertmanager.replicas, [$.alertmanager_container], $.alertmanager_pvc) +
4163
statefulSet.mixin.spec.withServiceName('alertmanager') +
4264
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
4365
statefulSet.mixin.metadata.withLabels({ name: 'alertmanager' }) +
@@ -50,6 +72,10 @@
5072

5173
alertmanager_service:
5274
if $._config.alertmanager_enabled then
53-
$.util.serviceFor($.alertmanager_statefulset)
75+
if isHA then
76+
$.util.serviceFor($.alertmanager_statefulset) +
77+
service.mixin.spec.withClusterIp('None')
78+
else
79+
$.util.serviceFor($.alertmanager_statefulset)
5480
else {},
5581
}

cortex/config.libsonnet

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,11 @@
243243
},
244244
}[$._config.ruler_client_type],
245245

246+
alertmanager: {
247+
replicas: 3,
248+
gossip_port: 9094,
249+
},
250+
246251
overrides: {
247252
// === Per-tenant usage limits. ===
248253
//

cortex/images.libsonnet

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
memcachedExporter: 'prom/memcached-exporter:v0.6.0',
66

77
// Our services.
8-
cortex: 'cortexproject/cortex:v1.2.0',
8+
cortex: 'cortexproject/cortex:v1.3.0',
99

10+
alertmanager: self.cortex,
1011
distributor: self.cortex,
1112
ingester: self.cortex,
1213
querier: self.cortex,
@@ -18,8 +19,6 @@
1819
store_gateway: self.cortex,
1920

2021
query_tee: 'quay.io/cortexproject/query-tee:master-5d7b05c3',
21-
// TODO(gouthamve/jtlisi): Upstream the ruler and AM configs.
22-
alertmanager: 'jtlisi/cortex:20190819_alertmanager_update-faa66aa43',
2322
testExporter: 'cortexproject/test-exporter:master-be013707',
2423
},
2524
}

0 commit comments

Comments
 (0)