Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cortex-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
(import 'dashboards/writes.libsonnet') +

(if std.setMember('tsdb', $._config.storage_engine)
then import 'dashboards/compactor.libsonnet'
then
(import 'dashboards/compactor.libsonnet') +
(import 'dashboards/object-store.libsonnet')
else {}) +

(if std.setMember('chunks', $._config.storage_engine)
Expand Down
4 changes: 2 additions & 2 deletions cortex-mixin/dashboards/compactor.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,6 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.latencyPanel('cortex_compactor_meta_sync_duration_seconds', '{%s}' % $.jobMatcher('compactor')),
)
)
.addRow($.objectStorePanels1('Object Store', 'cortex_compactor'))
.addRow($.objectStorePanels2('', 'cortex_compactor')),
.addRow($.objectStorePanels1('Object Store', 'compactor'))
.addRow($.objectStorePanels2('', 'compactor')),
}
38 changes: 16 additions & 22 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -123,51 +123,45 @@ local utils = import 'mixin-utils/utils.libsonnet';
type: 'text',
} + options,

objectStorePanels1(title, metricPrefix)::
local opsTotal = '%s_thanos_objstore_bucket_operations_total' % [metricPrefix];
local opsTotalFailures = '%s_thanos_objstore_bucket_operation_failures_total' % [metricPrefix];
local operationDuration = '%s_thanos_objstore_bucket_operation_duration_seconds' % [metricPrefix];
objectStorePanels1(title, component)::
super.row(title)
.addPanel(
// We use 'up' to add 0 if there are no failed operations.
self.successFailurePanel(
'Operations/sec',
'sum(rate(%s{%s}[$__interval])) - sum(rate(%s{%s}[$__interval]) or (up{%s}*0))' % [opsTotal, $.namespaceMatcher(), opsTotalFailures, $.namespaceMatcher(), $.namespaceMatcher()],
'sum(rate(%s{%s}[$__interval]) or (up{%s}*0))' % [opsTotalFailures, $.namespaceMatcher(), $.namespaceMatcher()]
)
$.panel('Operations / sec') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s,component="%s"}[$__interval]))' % [$.namespaceMatcher(), component], '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
)
.addPanel(
$.panel('Op: ObjectSize') +
$.latencyPanel(operationDuration, '{%s, operation="objectsize"}' % $.namespaceMatcher()),
$.panel('Error rate') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{%s,component="%s"}[$__interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s,component="%s"}[$__interval]))' % [$.namespaceMatcher(), component, $.namespaceMatcher(), component], '{{operation}}') +
{ yaxes: $.yaxes('percentunit') },
)
.addPanel(
// Cortex (Thanos) doesn't track timing for 'iter', so we use ops/sec instead.
$.panel('Op: Iter') +
$.queryPanel('sum(rate(%s{%s, operation="iter"}[$__interval]))' % [opsTotal, $.namespaceMatcher()], 'ops/sec')
$.panel('Op: ObjectSize') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="objectsize"}' % [$.namespaceMatcher(), component]),
)
.addPanel(
$.panel('Op: Exists') +
$.latencyPanel(operationDuration, '{%s, operation="exists"}' % $.namespaceMatcher()),
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="exists"}' % [$.namespaceMatcher(), component]),
),

// Second row of Object Store stats
objectStorePanels2(title, metricPrefix)::
local operationDuration = '%s_thanos_objstore_bucket_operation_duration_seconds' % [metricPrefix];
objectStorePanels2(title, component)::
super.row(title)
.addPanel(
$.panel('Op: Get') +
$.latencyPanel(operationDuration, '{%s, operation="get"}' % $.namespaceMatcher()),
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="get"}' % [$.namespaceMatcher(), component]),
)
.addPanel(
$.panel('Op: GetRange') +
$.latencyPanel(operationDuration, '{%s, operation="get_range"}' % $.namespaceMatcher()),
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="get_range"}' % [$.namespaceMatcher(), component]),
)
.addPanel(
$.panel('Op: Upload') +
$.latencyPanel(operationDuration, '{%s, operation="upload"}' % $.namespaceMatcher()),
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="upload"}' % [$.namespaceMatcher(), component]),
)
.addPanel(
$.panel('Op: Delete') +
$.latencyPanel(operationDuration, '{%s, operation="delete"}' % $.namespaceMatcher()),
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="%s",operation="delete"}' % [$.namespaceMatcher(), component]),
),
}
65 changes: 65 additions & 0 deletions cortex-mixin/dashboards/object-store.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
local utils = import 'mixin-utils/utils.libsonnet';

(import 'dashboard-utils.libsonnet') {
'cortex-object-store.json':
$.dashboard('Cortex / Object Store')
.addClusterSelectorTemplates()
.addRow(
$.row('Components')
.addPanel(
$.panel('RPS / component') +
$.queryPanel('sum by(component) (rate(thanos_objstore_bucket_operations_total{%s}[$__interval]))' % $.namespaceMatcher(), '{{component}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
)
.addPanel(
$.panel('Error rate / component') +
$.queryPanel('sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{%s}[$__interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{component}}') +
{ yaxes: $.yaxes('percentunit') },
)
)
.addRow(
$.row('Operations')
.addPanel(
$.panel('RPS / operation') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s}[$__interval]))' % $.namespaceMatcher(), '{{operation}}') +
$.stack +
{ yaxes: $.yaxes('rps') },
)
.addPanel(
$.panel('Error rate / operation') +
$.queryPanel('sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{%s}[$__interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{%s}[$__interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()], '{{operation}}') +
{ yaxes: $.yaxes('percentunit') },
)
)
.addRow(
$.row('')
.addPanel(
$.panel('Op: Get') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="get"}' % $.namespaceMatcher()),
)
.addPanel(
$.panel('Op: GetRange') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="get_range"}' % $.namespaceMatcher()),
)
.addPanel(
$.panel('Op: Exists') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="exists"}' % $.namespaceMatcher()),
)
)
.addRow(
$.row('')
.addPanel(
$.panel('Op: ObjectSize') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="objectsize"}' % $.namespaceMatcher()),
)
.addPanel(
$.panel('Op: Upload') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="upload"}' % $.namespaceMatcher()),
)
.addPanel(
$.panel('Op: Delete') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,operation="delete"}' % $.namespaceMatcher()),
)
),
}
8 changes: 4 additions & 4 deletions cortex-mixin/dashboards/reads.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
// Object store metrics for the store-gateway.
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels1('Store-gateway - Blocks Object Store', 'cortex_storegateway'),
$.objectStorePanels1('Store-gateway - Blocks Object Store', 'store-gateway'),
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels2('', 'cortex_storegateway'),
$.objectStorePanels2('', 'store-gateway'),
)
// Object store metrics for the querier.
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels1('Querier - Blocks Object Store', 'cortex_querier'),
$.objectStorePanels1('Querier - Blocks Object Store', 'querier'),
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels2('', 'cortex_querier'),
$.objectStorePanels2('', 'querier'),
),
}
18 changes: 7 additions & 11 deletions cortex-mixin/dashboards/writes.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -151,21 +151,17 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.row('Blocks Shipper')
$.row('Ingester - Blocks storage - Shipper')
.addPanel(
$.successFailurePanel(
'Uploaded blocks / sec',
'sum(rate(cortex_ingester_shipper_uploads_total{%s}[$__interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{%s}[$__interval]))' % [$.namespaceMatcher(), $.namespaceMatcher()],
'sum(rate(cortex_ingester_shipper_upload_failures_total{%s}[$__interval]))' % [$.namespaceMatcher()],
'sum(rate(cortex_ingester_shipper_uploads_total{%s}[$__interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{%s}[$__interval]))' % [$.jobMatcher('ingester'), $.jobMatcher('ingester')],
'sum(rate(cortex_ingester_shipper_upload_failures_total{%s}[$__interval]))' % $.jobMatcher('ingester'),
),
)
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels1('Blocks Object Store Stats (Ingester)', 'cortex_ingester'),
)
.addRowIf(
std.setMember('tsdb', $._config.storage_engine),
$.objectStorePanels2('', 'cortex_ingester'),
.addPanel(
$.panel('Upload latency') +
$.latencyPanel('thanos_objstore_bucket_operation_duration_seconds', '{%s,component="ingester",operation="upload"}' % $.jobMatcher('ingester')),
)
),
}