diff --git a/CHANGELOG.md b/CHANGELOG.md index dfc25038..7fc3cc32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ * [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329 * [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335 * [BUGFIX] Fixed scaling dashboard to correctly work when a Cortex service deployment spans across multiple zones (a zone is expected to have the `zone-[a-z]` suffix). #365 +* [BUGFIX] Fixed rollout progress dashboard to correctly work when a Cortex service deployment spans across multiple zones (a zone is expected to have the `zone-[a-z]` suffix). #366 ## 1.9.0 / 2021-05-18 diff --git a/cortex-mixin/dashboards/rollout-progress.libsonnet b/cortex-mixin/dashboards/rollout-progress.libsonnet index 83a5abb7..e481ce6a 100644 --- a/cortex-mixin/dashboards/rollout-progress.libsonnet +++ b/cortex-mixin/dashboards/rollout-progress.libsonnet @@ -6,7 +6,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; gateway_job_matcher: $.jobMatcher($._config.job_names.gateway), gateway_write_routes_regex: 'api_(v1|prom)_push', gateway_read_routes_regex: '(prometheus|api_prom)_api_v1_.+', - all_services_regex: std.join('|', ['cortex-gw', 'distributor', 'ingester', 'query-frontend', 'querier', 'compactor', 'store-gateway', 'ruler', 'alertmanager']), + all_services_regex: std.join('|', ['cortex-gw', 'distributor', 'ingester.*', 'query-frontend', 'querier', 'compactor', 'store-gateway', 'ruler', 'alertmanager']), }, 'cortex-rollout-progress.json': @@ -22,29 +22,61 @@ local utils = import 'mixin-utils/utils.libsonnet'; // $.panel('Rollout progress') + $.barGauge([ + // Multi-zone deployments are grouped together removing the "zone-X" suffix. + // After the grouping, the resulting label is called "cortex_service". ||| ( - kube_statefulset_status_replicas_updated{%(namespace_matcher)s,statefulset=~"%(all_services_regex)s"} + sum by(cortex_service) ( + label_replace( + kube_statefulset_status_replicas_updated{%(namespace_matcher)s,statefulset=~"%(all_services_regex)s"}, + "cortex_service", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?" + ) + ) / - kube_statefulset_replicas{%(namespace_matcher)s} + sum by(cortex_service) ( + label_replace( + kube_statefulset_replicas{%(namespace_matcher)s}, + "cortex_service", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?" + ) + ) ) and ( - kube_statefulset_replicas{%(namespace_matcher)s} + sum by(cortex_service) ( + label_replace( + kube_statefulset_replicas{%(namespace_matcher)s}, + "cortex_service", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?" + ) + ) > 0 ) ||| % config, ||| ( - kube_deployment_status_replicas_updated{%(namespace_matcher)s,deployment=~"%(all_services_regex)s"} + sum by(cortex_service) ( + label_replace( + kube_deployment_status_replicas_updated{%(namespace_matcher)s,deployment=~"%(all_services_regex)s"}, + "cortex_service", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) / - kube_deployment_spec_replicas{%(namespace_matcher)s} + sum by(cortex_service) ( + label_replace( + kube_deployment_spec_replicas{%(namespace_matcher)s}, + "cortex_service", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) ) and ( - kube_deployment_spec_replicas{%(namespace_matcher)s} + sum by(cortex_service) ( + label_replace( + kube_deployment_spec_replicas{%(namespace_matcher)s}, + "cortex_service", "$1", "deployment", "(.*?)(?:-zone-[a-z])?" + ) + ) > 0 ) ||| % config, ], legends=[ - '{{statefulset}}', - '{{deployment}}', + '{{cortex_service}}', + '{{cortex_service}}', ], thresholds=[ { color: 'yellow', value: null }, { color: 'yellow', value: 0.999 },