diff --git a/CHANGELOG.md b/CHANGELOG.md index a65a43c6..f3abb174 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * [ENHANCEMENT] Improved blocks storage observability: #237 - Cortex / Queries: added bucket index load operations and latency (available only when bucket index is enabled) - Alerts: added "CortexBucketIndexNotUpdated" (bucket index only) and "CortexTenantHasPartialBlocks" +* [BUGFIX] Honor configured `per_instance_label` in all panels. #239 ## 1.6.0 / 2021-01-05 diff --git a/cortex-mixin/dashboards/alertmanager-resources.libsonnet b/cortex-mixin/dashboards/alertmanager-resources.libsonnet index 9bff5fca..5fdd92a2 100644 --- a/cortex-mixin/dashboards/alertmanager-resources.libsonnet +++ b/cortex-mixin/dashboards/alertmanager-resources.libsonnet @@ -2,9 +2,6 @@ local utils = import 'mixin-utils/utils.libsonnet'; (import 'dashboard-utils.libsonnet') { 'alertmanager-resources.json': - local filterNodeDiskByAlertmanager = ||| - ignoring(pod) group_right() (label_replace(count by(pod, instance, device) (container_fs_writes_bytes_total{%s,container="alertmanager",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) - ||| % $.namespaceMatcher(); ($.dashboard('Cortex / Alertmanager Resources') + { uid: '68b66aed90ccab448009089544a8d6c6' }) .addClusterSelectorTemplates() .addRow( @@ -62,13 +59,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Disk') .addPanel( $.panel('Writes') + - $.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % filterNodeDiskByAlertmanager, '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) .addPanel( $.panel('Reads') + - $.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % filterNodeDiskByAlertmanager, '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) diff --git a/cortex-mixin/dashboards/chunks.libsonnet b/cortex-mixin/dashboards/chunks.libsonnet index 0481569a..b82c6880 100644 --- a/cortex-mixin/dashboards/chunks.libsonnet +++ b/cortex-mixin/dashboards/chunks.libsonnet @@ -43,7 +43,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Flush Stats') .addPanel( $.panel('Queue Length') + - $.queryPanel('cortex_ingester_flush_queue_length{%s}' % $.jobMatcher($._config.job_names.ingester), '{{instance}}'), + $.queryPanel('cortex_ingester_flush_queue_length{%s}' % $.jobMatcher($._config.job_names.ingester), '{{%s}}' % $._config.per_instance_label), ) .addPanel( $.panel('Flush Rate') + @@ -88,13 +88,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Checkpoint') .addPanel( $.panel('Checkpoint creation/deletion / sec') + - $.queryPanel('rate(cortex_ingester_checkpoint_creations_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-creation') + - $.queryPanel('rate(cortex_ingester_checkpoint_deletions_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-deletion'), + $.queryPanel('rate(cortex_ingester_checkpoint_creations_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-creation' % $._config.per_instance_label) + + $.queryPanel('rate(cortex_ingester_checkpoint_deletions_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-deletion' % $._config.per_instance_label), ) .addPanel( $.panel('Checkpoint creation/deletion failed / sec') + - $.queryPanel('rate(cortex_ingester_checkpoint_creations_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-creation') + - $.queryPanel('rate(cortex_ingester_checkpoint_deletions_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-deletion'), + $.queryPanel('rate(cortex_ingester_checkpoint_creations_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-creation' % $._config.per_instance_label) + + $.queryPanel('rate(cortex_ingester_checkpoint_deletions_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-deletion' % $._config.per_instance_label), ) ), } diff --git a/cortex-mixin/dashboards/compactor-resources.libsonnet b/cortex-mixin/dashboards/compactor-resources.libsonnet index 6ce5f35c..c24f600b 100644 --- a/cortex-mixin/dashboards/compactor-resources.libsonnet +++ b/cortex-mixin/dashboards/compactor-resources.libsonnet @@ -35,13 +35,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Disk') .addPanel( $.panel('Disk Writes') + - $.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('compactor'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('compactor')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) .addPanel( $.panel('Disk Reads') + - $.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('compactor'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('compactor')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) diff --git a/cortex-mixin/dashboards/compactor.libsonnet b/cortex-mixin/dashboards/compactor.libsonnet index 36bff1d3..d767e28d 100644 --- a/cortex-mixin/dashboards/compactor.libsonnet +++ b/cortex-mixin/dashboards/compactor.libsonnet @@ -30,7 +30,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; cortex_compactor_tenants_processing_failed{%s} + cortex_compactor_tenants_skipped{%s} ) / cortex_compactor_tenants_discovered{%s} - ||| % [$.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor')], '{{instance}}') + + ||| % [$.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor')], '{{%s}}' % $._config.per_instance_label) + { yaxes: $.yaxes({ format: 'percentunit', max: 1 }) }, ) ) diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index 5644af34..f4f1ec6a 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -154,7 +154,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; goHeapInUsePanel(title, jobName):: $.panel(title) + - $.queryPanel('sum by(instance) (go_memstats_heap_inuse_bytes{%s})' % $.jobMatcher(jobName), '{{instance}}') + + $.queryPanel( + 'sum by(%s) (go_memstats_heap_inuse_bytes{%s})' % [$._config.per_instance_label, $.jobMatcher(jobName)], + '{{%s}}' % $._config.per_instance_label + ) + { yaxes: $.yaxes('bytes') }, // Switches a panel from lines (default) to bars. @@ -244,6 +247,6 @@ local utils = import 'mixin-utils/utils.libsonnet'; filterNodeDiskContainer(containerName):: ||| - ignoring(pod) group_right() (label_replace(count by(pod, instance, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) - ||| % [$.namespaceMatcher(), containerName], + ignoring(%s) group_right() (label_replace(count by(%s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) + ||| % [$._config.per_instance_label, $._config.per_instance_label, $.namespaceMatcher(), containerName], } diff --git a/cortex-mixin/dashboards/queries.libsonnet b/cortex-mixin/dashboards/queries.libsonnet index 5f51425b..fedbc949 100644 --- a/cortex-mixin/dashboards/queries.libsonnet +++ b/cortex-mixin/dashboards/queries.libsonnet @@ -18,7 +18,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) .addPanel( $.panel('Queue Length') + - $.queryPanel('cortex_query_frontend_queue_length{%s}' % $.jobMatcher($._config.job_names.query_frontend), '{{cluster}} / {{namespace}} / {{instance}}'), + $.queryPanel('cortex_query_frontend_queue_length{%s}' % $.jobMatcher($._config.job_names.query_frontend), '{{cluster}} / {{namespace}} / {{%s}}' % $._config.per_instance_label), ) ) .addRow( @@ -29,7 +29,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; ) .addPanel( $.panel('Queue Length') + - $.queryPanel('cortex_query_scheduler_queue_length{%s}' % $.jobMatcher($._config.job_names.query_scheduler), '{{cluster}} / {{namespace}} / {{instance}}'), + $.queryPanel('cortex_query_scheduler_queue_length{%s}' % $.jobMatcher($._config.job_names.query_scheduler), '{{cluster}} / {{namespace}} / {{%s}}' % $._config.per_instance_label), ) ) .addRow( @@ -220,7 +220,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('') .addPanel( $.panel('Blocks currently loaded') + - $.queryPanel('cortex_bucket_store_blocks_loaded{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{instance}}') + $.queryPanel('cortex_bucket_store_blocks_loaded{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{%s}}' % $._config.per_instance_label) ) .addPanel( $.successFailurePanel( @@ -242,7 +242,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('') .addPanel( $.panel('Lazy loaded index-headers') + - $.queryPanel('cortex_bucket_store_indexheader_lazy_load_total{%s} - cortex_bucket_store_indexheader_lazy_unload_total{%s}' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{instance}}') + $.queryPanel('cortex_bucket_store_indexheader_lazy_load_total{%s} - cortex_bucket_store_indexheader_lazy_unload_total{%s}' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{%s}}' % $._config.per_instance_label) ) .addPanel( $.panel('Index-header lazy load duration') + diff --git a/cortex-mixin/dashboards/reads-resources.libsonnet b/cortex-mixin/dashboards/reads-resources.libsonnet index 7c3cc307..715673b6 100644 --- a/cortex-mixin/dashboards/reads-resources.libsonnet +++ b/cortex-mixin/dashboards/reads-resources.libsonnet @@ -68,7 +68,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Ruler') .addPanel( $.panel('Rules') + - $.queryPanel('sum by(instance) (cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher($._config.job_names.ruler), '{{instance}}'), + $.queryPanel( + 'sum by(%s) (cortex_prometheus_rule_group_rules{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.ruler)], + '{{%s}}' % $._config.per_instance_label + ), ) .addPanel( $.containerCPUUsagePanel('CPU', 'ruler'), @@ -101,13 +104,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('') .addPanel( $.panel('Disk Writes') + - $.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('store-gateway'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) .addPanel( $.panel('Disk Reads') + - $.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('store-gateway'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) diff --git a/cortex-mixin/dashboards/writes-resources.libsonnet b/cortex-mixin/dashboards/writes-resources.libsonnet index 5e7391ba..9d7e3e80 100644 --- a/cortex-mixin/dashboards/writes-resources.libsonnet +++ b/cortex-mixin/dashboards/writes-resources.libsonnet @@ -32,7 +32,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Ingester') .addPanel( $.panel('In-memory series') + - $.queryPanel('sum by(instance) (cortex_ingester_memory_series{%s})' % $.jobMatcher($._config.job_names.ingester), '{{instance}}'), + $.queryPanel( + 'sum by(%s) (cortex_ingester_memory_series{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.ingester)], + '{{%s}}' % $._config.per_instance_label + ), ) .addPanel( $.containerCPUUsagePanel('CPU', 'ingester'), @@ -51,13 +54,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('') .addPanel( $.panel('Disk Writes') + - $.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('ingester'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('ingester')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, ) .addPanel( $.panel('Disk Reads') + - $.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('ingester'), '{{pod}} - {{device}}') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('ingester')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + $.stack + { yaxes: $.yaxes('Bps') }, )