Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* [ENHANCEMENT] Improved blocks storage observability: #237
- Cortex / Queries: added bucket index load operations and latency (available only when bucket index is enabled)
- Alerts: added "CortexBucketIndexNotUpdated" (bucket index only) and "CortexTenantHasPartialBlocks"
* [BUGFIX] Honor configured `per_instance_label` in all panels. #239

## 1.6.0 / 2021-01-05

Expand Down
13 changes: 8 additions & 5 deletions cortex-mixin/dashboards/alertmanager-resources.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ local utils = import 'mixin-utils/utils.libsonnet';

(import 'dashboard-utils.libsonnet') {
'alertmanager-resources.json':
local filterNodeDiskByAlertmanager = |||
ignoring(pod) group_right() (label_replace(count by(pod, instance, device) (container_fs_writes_bytes_total{%s,container="alertmanager",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0)
||| % $.namespaceMatcher();
($.dashboard('Cortex / Alertmanager Resources') + { uid: '68b66aed90ccab448009089544a8d6c6' })
.addClusterSelectorTemplates()
.addRow(
Expand Down Expand Up @@ -62,13 +59,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Disk')
.addPanel(
$.panel('Writes') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % filterNodeDiskByAlertmanager, '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Reads') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % filterNodeDiskByAlertmanager, '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand Down
10 changes: 5 additions & 5 deletions cortex-mixin/dashboards/chunks.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Flush Stats')
.addPanel(
$.panel('Queue Length') +
$.queryPanel('cortex_ingester_flush_queue_length{%s}' % $.jobMatcher($._config.job_names.ingester), '{{instance}}'),
$.queryPanel('cortex_ingester_flush_queue_length{%s}' % $.jobMatcher($._config.job_names.ingester), '{{%s}}' % $._config.per_instance_label),
)
.addPanel(
$.panel('Flush Rate') +
Expand Down Expand Up @@ -88,13 +88,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Checkpoint')
.addPanel(
$.panel('Checkpoint creation/deletion / sec') +
$.queryPanel('rate(cortex_ingester_checkpoint_creations_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-creation') +
$.queryPanel('rate(cortex_ingester_checkpoint_deletions_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-deletion'),
$.queryPanel('rate(cortex_ingester_checkpoint_creations_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-creation' % $._config.per_instance_label) +
$.queryPanel('rate(cortex_ingester_checkpoint_deletions_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-deletion' % $._config.per_instance_label),
)
.addPanel(
$.panel('Checkpoint creation/deletion failed / sec') +
$.queryPanel('rate(cortex_ingester_checkpoint_creations_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-creation') +
$.queryPanel('rate(cortex_ingester_checkpoint_deletions_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{instance}}-deletion'),
$.queryPanel('rate(cortex_ingester_checkpoint_creations_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-creation' % $._config.per_instance_label) +
$.queryPanel('rate(cortex_ingester_checkpoint_deletions_failed_total{%s}[$__rate_interval])' % $.jobMatcher($._config.job_names.ingester), '{{%s}}-deletion' % $._config.per_instance_label),
)
),
}
10 changes: 8 additions & 2 deletions cortex-mixin/dashboards/compactor-resources.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Disk')
.addPanel(
$.panel('Disk Writes') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('compactor'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('compactor')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Disk Reads') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('compactor'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('compactor')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand Down
2 changes: 1 addition & 1 deletion cortex-mixin/dashboards/compactor.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
cortex_compactor_tenants_processing_failed{%s} +
cortex_compactor_tenants_skipped{%s}
) / cortex_compactor_tenants_discovered{%s}
||| % [$.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor')], '{{instance}}') +
||| % [$.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor'), $.jobMatcher('compactor')], '{{%s}}' % $._config.per_instance_label) +
{ yaxes: $.yaxes({ format: 'percentunit', max: 1 }) },
)
)
Expand Down
9 changes: 6 additions & 3 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,10 @@ local utils = import 'mixin-utils/utils.libsonnet';

goHeapInUsePanel(title, jobName)::
$.panel(title) +
$.queryPanel('sum by(instance) (go_memstats_heap_inuse_bytes{%s})' % $.jobMatcher(jobName), '{{instance}}') +
$.queryPanel(
'sum by(%s) (go_memstats_heap_inuse_bytes{%s})' % [$._config.per_instance_label, $.jobMatcher(jobName)],
'{{%s}}' % $._config.per_instance_label
) +
{ yaxes: $.yaxes('bytes') },

// Switches a panel from lines (default) to bars.
Expand Down Expand Up @@ -244,6 +247,6 @@ local utils = import 'mixin-utils/utils.libsonnet';

filterNodeDiskContainer(containerName)::
|||
ignoring(pod) group_right() (label_replace(count by(pod, instance, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0)
||| % [$.namespaceMatcher(), containerName],
ignoring(%s) group_right() (label_replace(count by(%s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0)
||| % [$._config.per_instance_label, $._config.per_instance_label, $.namespaceMatcher(), containerName],
}
8 changes: 4 additions & 4 deletions cortex-mixin/dashboards/queries.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addPanel(
$.panel('Queue Length') +
$.queryPanel('cortex_query_frontend_queue_length{%s}' % $.jobMatcher($._config.job_names.query_frontend), '{{cluster}} / {{namespace}} / {{instance}}'),
$.queryPanel('cortex_query_frontend_queue_length{%s}' % $.jobMatcher($._config.job_names.query_frontend), '{{cluster}} / {{namespace}} / {{%s}}' % $._config.per_instance_label),
)
)
.addRow(
Expand All @@ -29,7 +29,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addPanel(
$.panel('Queue Length') +
$.queryPanel('cortex_query_scheduler_queue_length{%s}' % $.jobMatcher($._config.job_names.query_scheduler), '{{cluster}} / {{namespace}} / {{instance}}'),
$.queryPanel('cortex_query_scheduler_queue_length{%s}' % $.jobMatcher($._config.job_names.query_scheduler), '{{cluster}} / {{namespace}} / {{%s}}' % $._config.per_instance_label),
)
)
.addRow(
Expand Down Expand Up @@ -220,7 +220,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('Blocks currently loaded') +
$.queryPanel('cortex_bucket_store_blocks_loaded{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{instance}}')
$.queryPanel('cortex_bucket_store_blocks_loaded{component="store-gateway",%s}' % $.jobMatcher($._config.job_names.store_gateway), '{{%s}}' % $._config.per_instance_label)
)
.addPanel(
$.successFailurePanel(
Expand All @@ -242,7 +242,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('Lazy loaded index-headers') +
$.queryPanel('cortex_bucket_store_indexheader_lazy_load_total{%s} - cortex_bucket_store_indexheader_lazy_unload_total{%s}' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{instance}}')
$.queryPanel('cortex_bucket_store_indexheader_lazy_load_total{%s} - cortex_bucket_store_indexheader_lazy_unload_total{%s}' % [$.jobMatcher($._config.job_names.store_gateway), $.jobMatcher($._config.job_names.store_gateway)], '{{%s}}' % $._config.per_instance_label)
)
.addPanel(
$.panel('Index-header lazy load duration') +
Expand Down
15 changes: 12 additions & 3 deletions cortex-mixin/dashboards/reads-resources.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Ruler')
.addPanel(
$.panel('Rules') +
$.queryPanel('sum by(instance) (cortex_prometheus_rule_group_rules{%s})' % $.jobMatcher($._config.job_names.ruler), '{{instance}}'),
$.queryPanel(
'sum by(%s) (cortex_prometheus_rule_group_rules{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.ruler)],
'{{%s}}' % $._config.per_instance_label
),
)
.addPanel(
$.containerCPUUsagePanel('CPU', 'ruler'),
Expand Down Expand Up @@ -101,13 +104,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('Disk Writes') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('store-gateway'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Disk Reads') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('store-gateway'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand Down
15 changes: 12 additions & 3 deletions cortex-mixin/dashboards/writes-resources.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Ingester')
.addPanel(
$.panel('In-memory series') +
$.queryPanel('sum by(instance) (cortex_ingester_memory_series{%s})' % $.jobMatcher($._config.job_names.ingester), '{{instance}}'),
$.queryPanel(
'sum by(%s) (cortex_ingester_memory_series{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.ingester)],
'{{%s}}' % $._config.per_instance_label
),
)
.addPanel(
$.containerCPUUsagePanel('CPU', 'ingester'),
Expand All @@ -51,13 +54,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('Disk Writes') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('ingester'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('ingester')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Disk Reads') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % $.filterNodeDiskContainer('ingester'), '{{pod}} - {{device}}') +
$.queryPanel(
'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_instance_label, $.filterNodeDiskContainer('ingester')],
'{{%s}} - {{device}}' % $._config.per_instance_label
) +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand Down