diff --git a/CHANGELOG.md b/CHANGELOG.md index 8994df78..28a1aa1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,9 @@ ## master / unreleased * [CHANGE] Add the default preset 'extra_small_user' and reference it in the CLI flags. This will raise the limits of the 'small_user' preset to the defaults for `ingester.max-samples-per-query` and `ingester.max-series-per-query`. #200 +* [ENHANCEMENT] Add the Ruler to the read resources dashboard #205 * [ENHANCEMENT] Read dashboards now use `cortex_querier_request_duration_seconds` metrics to allow for accurate dashboards when deploying Cortex as a single-binary. #199 -* [ENHANCEMENT] Improved Ruler dashboard. Includes information about notifications and per user per rule group evaluation. #197 +* [ENHANCEMENT] Improved Ruler dashboard. Includes information about notifications, reads/writes, and per user per rule group evaluation. #197, #205 * [FEATURE] Latency recording rules for the metric`cortex_querier_request_duration_seconds` are now part of a `cortex_querier_api` rule group. #199 * [FEATURE] Add overrides-exporter as optional deployment to expose configured runtime overrides and presets. #198 diff --git a/cortex-mixin/config.libsonnet b/cortex-mixin/config.libsonnet index 08336b3c..1bfa3a69 100644 --- a/cortex-mixin/config.libsonnet +++ b/cortex-mixin/config.libsonnet @@ -29,6 +29,7 @@ ingester: '(ingester|cortex$)', distributor: '(distributor|cortex$)', querier: '(querier|cortex$)', + ruler: '(ruler|cortex$)', query_frontend: '(query-frontend|cortex$)', table_manager: '(table-manager|cortex$)', store_gateway: '(store-gateway|cortex$)', diff --git a/cortex-mixin/dashboards/reads-resources.libsonnet b/cortex-mixin/dashboards/reads-resources.libsonnet index 6dd8d0ce..9291d7bc 100644 --- a/cortex-mixin/dashboards/reads-resources.libsonnet +++ b/cortex-mixin/dashboards/reads-resources.libsonnet @@ -52,6 +52,25 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), ) ) + .addRow( + $.row('Ruler') + .addPanel( + $.panel('Rules') + + $.queryPanel('sum by(instance) (cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher($._config.job_names.ruler), '{{instance}}'), + ) + .addPanel( + $.containerCPUUsagePanel('CPU', 'ruler'), + ) + ) + .addRow( + $.row('') + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ruler'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'ruler'), + ) + ) .addRowIf( std.member($._config.storage_engine, 'blocks'), $.row('Store-gateway') diff --git a/cortex-mixin/dashboards/ruler.libsonnet b/cortex-mixin/dashboards/ruler.libsonnet index 007457a9..b16e753a 100644 --- a/cortex-mixin/dashboards/ruler.libsonnet +++ b/cortex-mixin/dashboards/ruler.libsonnet @@ -61,6 +61,28 @@ local utils = import 'mixin-utils/utils.libsonnet'; 'ruler.json': $.dashboard('Cortex / Ruler') .addClusterSelectorTemplates() + .addRow( + ($.row('Headlines') + { + height: '100px', + showTitle: false, + }) + .addPanel( + $.panel('Active Configurations') + + $.statPanel('sum(cortex_ruler_managers_total{%s})' % $.jobMatcher('ruler'), format='short') + ) + .addPanel( + $.panel('Total Rules') + + $.statPanel('sum(cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher('ruler'), format='short') + ) + .addPanel( + $.panel('Read from Ingesters - QPS') + + $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}[5m]))' % $.jobMatcher('ruler'), format='reqps') + ) + .addPanel( + $.panel('Write to Ingesters - QPS') + + $.statPanel('sum(rate(cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}[5m]))' % $.jobMatcher('ruler'), format='reqps') + ) + ) .addRow( $.row('Rule Evaluations Global') .addPanel( @@ -82,7 +104,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) ) .addRow( - $.row('Gateway Latency') + $.row('Configuration API (gateway)') .addPanel( $.panel('QPS') + $.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"api_prom_rules.*|api_prom_api_v1_(rules|alerts)"}' % $.jobMatcher($._config.job_names.gateway)) @@ -92,6 +114,28 @@ local utils = import 'mixin-utils/utils.libsonnet'; utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_prom_rules.*|api_prom_api_v1_(rules|alerts)')]) ) ) + .addRow( + $.row('Writes (Ingesters)') + .addPanel( + $.panel('QPS') + + $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher('ruler')) + ) + .addPanel( + $.panel('Latency') + + $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/Push"}' % $.jobMatcher('ruler')) + ) + ) + .addRow( + $.row('Reads (Ingesters)') + .addPanel( + $.panel('QPS') + + $.qpsPanel('cortex_ingester_client_request_duration_seconds_count{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher('ruler')) + ) + .addPanel( + $.panel('Latency') + + $.latencyPanel('cortex_ingester_client_request_duration_seconds', '{%s, operation="/cortex.Ingester/QueryStream"}' % $.jobMatcher('ruler')) + ) + ) .addRow( $.row('Group Evaluations') .addPanel( diff --git a/cortex-mixin/dashboards/writes-resources.libsonnet b/cortex-mixin/dashboards/writes-resources.libsonnet index ccdc966f..f9a31175 100644 --- a/cortex-mixin/dashboards/writes-resources.libsonnet +++ b/cortex-mixin/dashboards/writes-resources.libsonnet @@ -46,7 +46,8 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addPanel( $.goHeapInUsePanel('Memory (go heap inuse)', 'ingester'), ) - ) + { + ) + + { templating+: { list: [ // Do not allow to include all clusters/namespaces otherwise this dashboard