Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
* [CHANGE] Add overrides config to tsdb store-gateway. #167
* [CHANGE] Ingesters now default to running as `StatefulSet` with WAL enabled. It is controlled by the config `$._config.ingester_deployment_without_wal` which is `false` by default. Setting the config to `true` will yield the old behaviour (stateless `Deployment` without WAL enabled). #72
* [CHANGE] We now allow queries that are 32 days long. For example, rate(metric[32d]). Before it was 31d. #173
* [CHANGE] Renamed `container_name` and `pod_name` label names to `container` and `pod` respectively. This is required in order to comply with cAdvisor metrics changes shipped with Kubernetes 1.16. #179
* [ENHANCEMENT] Enable support for HA in the Cortex Alertmanager #147
* [ENHANCEMENT] Support `alertmanager.fallback_config` option in the Alertmanager. #179
* [ENHANCEMENT] Add support for S3 block storage. #181
Expand Down
8 changes: 4 additions & 4 deletions cortex-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,9 @@
alert: 'CortexAllocatingTooMuchMemory',
expr: |||
(
container_memory_working_set_bytes{container_name="ingester"}
container_memory_working_set_bytes{container="ingester"}
/
container_spec_memory_limit_bytes{container_name="ingester"}
container_spec_memory_limit_bytes{container="ingester"}
) > 0.5
|||,
'for': '15m',
Expand All @@ -391,9 +391,9 @@
alert: 'CortexAllocatingTooMuchMemory',
expr: |||
(
container_memory_working_set_bytes{container_name="ingester"}
container_memory_working_set_bytes{container="ingester"}
/
container_spec_memory_limit_bytes{container_name="ingester"}
container_spec_memory_limit_bytes{container="ingester"}
) > 0.8
|||,
'for': '15m',
Expand Down
10 changes: 5 additions & 5 deletions cortex-mixin/dashboards/compactor-resources.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
(import 'dashboard-utils.libsonnet') {
'cortex-compactor-resources.json':
local filterNodeDiskByCompactor = |||
ignoring(pod_name) group_right() (label_replace(count by(pod_name, instance, device) (container_fs_writes_bytes_total{%s,container="compactor",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0)
ignoring(pod) group_right() (label_replace(count by(pod, instance, device) (container_fs_writes_bytes_total{%s,container="compactor",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0)
||| % $.namespaceMatcher();

$.dashboard('Cortex / Compactor Resources')
Expand All @@ -24,13 +24,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Network')
.addPanel(
$.panel('Receive Bandwidth') +
$.queryPanel('sum by(pod_name) (rate(container_network_receive_bytes_total{%s,pod_name=~"compactor.*"}[$__interval]))' % $.namespaceMatcher(), '{{pod_name}}') +
$.queryPanel('sum by(pod) (rate(container_network_receive_bytes_total{%s,pod=~"compactor.*"}[$__interval]))' % $.namespaceMatcher(), '{{pod}}') +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Transmit Bandwidth') +
$.queryPanel('sum by(pod_name) (rate(container_network_transmit_bytes_total{%s,pod_name=~"compactor.*"}[$__interval]))' % $.namespaceMatcher(), '{{pod_name}}') +
$.queryPanel('sum by(pod) (rate(container_network_transmit_bytes_total{%s,pod=~"compactor.*"}[$__interval]))' % $.namespaceMatcher(), '{{pod}}') +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand All @@ -39,13 +39,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('Disk')
.addPanel(
$.panel('Writes') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__interval])) + %s' % filterNodeDiskByCompactor, '{{pod_name}} - {{device}}') +
$.queryPanel('sum by(instance, device) (rate(node_disk_written_bytes_total[$__interval])) + %s' % filterNodeDiskByCompactor, '{{pod}} - {{device}}') +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
.addPanel(
$.panel('Reads') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__interval])) + %s' % filterNodeDiskByCompactor, '{{pod_name}} - {{device}}') +
$.queryPanel('sum by(instance, device) (rate(node_disk_read_bytes_total[$__interval])) + %s' % filterNodeDiskByCompactor, '{{pod}} - {{device}}') +
$.stack +
{ yaxes: $.yaxes('Bps') },
)
Expand Down
24 changes: 12 additions & 12 deletions cortex-mixin/dashboards/comparison.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('CPU per sample') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container_name="ingester"}[$__interval])) / sum(rate(cortex_ingester_ingested_samples_total{cluster=~"$cluster",job="$blocks_namespace/ingester"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container_name="ingester"}[$__interval])) / sum(rate(cortex_ingester_ingested_samples_total{cluster=~"$cluster",job="$chunks_namespace/ingester"}[$__interval]))', 'chunks')
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container="ingester"}[$__interval])) / sum(rate(cortex_ingester_ingested_samples_total{cluster=~"$cluster",job="$blocks_namespace/ingester"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container="ingester"}[$__interval])) / sum(rate(cortex_ingester_ingested_samples_total{cluster=~"$cluster",job="$chunks_namespace/ingester"}[$__interval]))', 'chunks')
)
.addPanel(
$.panel('Memory per active series') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container_name="ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$blocks_namespace/ingester"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container_name="ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$chunks_namespace/ingester"})', 'chunks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container="ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$blocks_namespace/ingester"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container="ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$chunks_namespace/ingester"})', 'chunks - working set') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$blocks_namespace/ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$blocks_namespace/ingester"})', 'blocks - heap inuse') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$chunks_namespace/ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster",job=~"$chunks_namespace/ingester"})', 'chunks - heap inuse') +
{ yaxes: $.yaxes('bytes') }
Expand All @@ -46,13 +46,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('CPU') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container_name="ingester"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container_name="ingester"}[$__interval]))', 'chunks')
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container="ingester"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container="ingester"}[$__interval]))', 'chunks')
)
.addPanel(
$.panel('Memory') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container_name="ingester"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container_name="ingester"})', 'chunks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container="ingester"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container="ingester"})', 'chunks - working set') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$blocks_namespace/ingester"})', 'blocks - heap inuse') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$chunks_namespace/ingester"})', 'chunks - heap inuse') +
{ yaxes: $.yaxes('bytes') }
Expand Down Expand Up @@ -90,13 +90,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.row('')
.addPanel(
$.panel('CPU') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container_name="querier"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container_name="querier"}[$__interval]))', 'chunks')
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$blocks_namespace",container="querier"}[$__interval]))', 'blocks') +
$.queryPanel('sum(rate(container_cpu_usage_seconds_total{cluster=~"$cluster",namespace="$chunks_namespace",container="querier"}[$__interval]))', 'chunks')
)
.addPanel(
$.panel('Memory') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container_name="querier"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container_name="querier"})', 'chunks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$blocks_namespace",container="querier"})', 'blocks - working set') +
$.queryPanel('sum(container_memory_working_set_bytes{cluster=~"$cluster",namespace="$chunks_namespace",container="querier"})', 'chunks - working set') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$blocks_namespace/querier"})', 'blocks - heap inuse') +
$.queryPanel('sum(go_memstats_heap_inuse_bytes{cluster=~"$cluster",job=~"$chunks_namespace/querier"})', 'chunks - heap inuse') +
{ yaxes: $.yaxes('bytes') }
Expand Down
12 changes: 6 additions & 6 deletions cortex-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
containerCPUUsagePanel(title, containerName)::
$.panel(title) +
$.queryPanel([
'sum by(pod_name) (rate(container_cpu_usage_seconds_total{%s,container_name="%s"}[$__interval]))' % [$.namespaceMatcher(), containerName],
'min(container_spec_cpu_quota{%s,container_name="%s"} / container_spec_cpu_period{%s,container_name="%s"})' % [$.namespaceMatcher(), containerName, $.namespaceMatcher(), containerName],
], ['{{pod_name}}', 'limit']) +
'sum by(pod) (rate(container_cpu_usage_seconds_total{%s,container="%s"}[$__interval]))' % [$.namespaceMatcher(), containerName],
'min(container_spec_cpu_quota{%s,container="%s"} / container_spec_cpu_period{%s,container="%s"})' % [$.namespaceMatcher(), containerName, $.namespaceMatcher(), containerName],
], ['{{pod}}', 'limit']) +
{
seriesOverrides: [
{
Expand All @@ -144,9 +144,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
containerMemoryWorkingSetPanel(title, containerName)::
$.panel(title) +
$.queryPanel([
'sum by(pod_name) (container_memory_working_set_bytes{%s,container_name="%s"})' % [$.namespaceMatcher(), containerName],
'min(container_spec_memory_limit_bytes{%s,container_name="%s"} > 0)' % [$.namespaceMatcher(), containerName],
], ['{{pod_name}}', 'limit']) +
'sum by(pod) (container_memory_working_set_bytes{%s,container="%s"})' % [$.namespaceMatcher(), containerName],
'min(container_spec_memory_limit_bytes{%s,container="%s"} > 0)' % [$.namespaceMatcher(), containerName],
], ['{{pod}}', 'limit']) +
{
seriesOverrides: [
{
Expand Down
4 changes: 2 additions & 2 deletions cortex-mixin/dashboards/scaling.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
)
*
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(container_cpu_usage_seconds_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:])
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(container_cpu_usage_seconds_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:])
/
sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_cpu_cores{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
|||,
Expand All @@ -94,7 +94,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
)
*
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(container_memory_usage_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:1m])
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(container_memory_usage_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:1m])
/
sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_memory_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
|||,
Expand Down