diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ecdc3fc..1e47ebeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,12 @@ ## master / unreleased +* [BUGFIX] Add support the `local` ruler client type * [CHANGE] The project is now licensed with Apache-2.0 license. #169 * [CHANGE] Add overrides config to tsdb store-gateway. #167 * [CHANGE] Ingesters now default to running as `StatefulSet` with WAL enabled. It is controlled by the config `$._config.ingester_deployment_without_wal` which is `false` by default. Setting the config to `true` will yeild the old behaviour (stateless `Deployment` without WAL enabled). #72 * [CHANGE] We now allow queries that are 32 days long. For example, rate(metric[32d]). Before it was 31d. #173 -* [BUGFIX] Add support the `local` ruler client type +* [ENHANCEMENT] Enable support for HA in the Cortex Alertmanager #147 ## 1.3.0 / 2020-08-21 diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet index e5bb15d8..61d04f5b 100644 --- a/cortex/alertmanager.libsonnet +++ b/cortex/alertmanager.libsonnet @@ -4,15 +4,23 @@ local container = $.core.v1.container, local statefulSet = $.apps.v1.statefulSet, local service = $.core.v1.service, + local isHA = $._config.alertmanager.replicas > 1, + local peers = if isHA then + [ + 'alertmanager-%d.alertmanager.%s.svc.%s.local:%s' % [i, $._config.namespace, $._config.cluster, $._config.alertmanager.gossip_port] + for i in std.range(0, $._config.alertmanager.replicas - 1) + ] + else [], alertmanager_args:: { target: 'alertmanager', 'log.level': 'debug', + 'experimental.alertmanager.enable-api': 'true', 'alertmanager.storage.type': 'gcs', 'alertmanager.storage.path': '/data', - 'alertmanager.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config, + 'alertmanager.storage.gcs.bucketname': '%(cluster)s-cortex-%(namespace)s' % $._config, 'alertmanager.web.external-url': '%s/alertmanager' % $._config.external_url, }, @@ -27,8 +35,22 @@ alertmanager_container:: if $._config.alertmanager_enabled then container.new('alertmanager', $._images.alertmanager) + - container.withPorts($.util.defaultPorts) + - container.withArgsMixin($.util.mapToFlags($.alertmanager_args)) + + container.withPorts( + $.util.defaultPorts + + if isHA then [ + $.core.v1.containerPort.newUDP('gossip-udp', $._config.alertmanager.gossip_port), + $.core.v1.containerPort.new('gossip-tcp', $._config.alertmanager.gossip_port), + ] + else [], + ) + + container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) + + container.withArgsMixin( + $.util.mapToFlags($.alertmanager_args) + + if isHA then + ['--cluster.listen-address=[$(POD_IP)]:%s' % $._config.alertmanager_gossip_port] + + ['--cluster.peer=%s' % peer for peer in peers] + else [], + ) + container.withVolumeMountsMixin([volumeMount.new('alertmanager-data', '/data')]) + $.util.resourcesRequests('100m', '1Gi') + $.util.readinessProbe + @@ -37,7 +59,7 @@ alertmanager_statefulset: if $._config.alertmanager_enabled then - statefulSet.new('alertmanager', 1, [$.alertmanager_container], $.alertmanager_pvc) + + statefulSet.new('alertmanager', $._config.alertmanager.replicas, [$.alertmanager_container], $.alertmanager_pvc) + statefulSet.mixin.spec.withServiceName('alertmanager') + statefulSet.mixin.metadata.withNamespace($._config.namespace) + statefulSet.mixin.metadata.withLabels({ name: 'alertmanager' }) + @@ -50,6 +72,10 @@ alertmanager_service: if $._config.alertmanager_enabled then - $.util.serviceFor($.alertmanager_statefulset) + if isHA then + $.util.serviceFor($.alertmanager_statefulset) + + service.mixin.spec.withClusterIp('None') + else + $.util.serviceFor($.alertmanager_statefulset) else {}, } diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet index d359f22f..35cc73d4 100644 --- a/cortex/config.libsonnet +++ b/cortex/config.libsonnet @@ -243,6 +243,11 @@ }, }[$._config.ruler_client_type], + alertmanager: { + replicas: 3, + gossip_port: 9094, + }, + overrides: { // === Per-tenant usage limits. === // diff --git a/cortex/images.libsonnet b/cortex/images.libsonnet index a4be104a..14bc4781 100644 --- a/cortex/images.libsonnet +++ b/cortex/images.libsonnet @@ -5,8 +5,9 @@ memcachedExporter: 'prom/memcached-exporter:v0.6.0', // Our services. - cortex: 'cortexproject/cortex:v1.2.0', + cortex: 'cortexproject/cortex:v1.3.0', + alertmanager: self.cortex, distributor: self.cortex, ingester: self.cortex, querier: self.cortex, @@ -18,8 +19,6 @@ store_gateway: self.cortex, query_tee: 'quay.io/cortexproject/query-tee:master-5d7b05c3', - // TODO(gouthamve/jtlisi): Upstream the ruler and AM configs. - alertmanager: 'jtlisi/cortex:20190819_alertmanager_update-faa66aa43', testExporter: 'cortexproject/test-exporter:master-be013707', }, }