Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
- Cortex / Queries: added "Lazy loaded index-headers" and "Index-header lazy load duration"
- Cortex / Compactor: added "Tenants compaction progress", "Average blocks / tenant" and "Tenants with largest number of blocks"
- Alerts: added "CortexMemoryMapAreasTooHigh"
* [ENHANCEMENT] Fine-tuned gRPC keepalive pings to work nicely with Cortex default settings.
- `-server.grpc.keepalive.min-time-between-pings=10s`
- `-server.grpc.keepalive.ping-without-stream-allowed:true`
* [BUGFIX] Fixed workingset memory panel while rolling out a StatefulSet. #229
* [BUGFIX] Fixed `CortexRequestErrors` alert to not include `ready` route. #230

Expand Down
1 change: 1 addition & 0 deletions cortex/alertmanager.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
else [],

alertmanager_args::
$._config.grpcConfig +
{
target: 'alertmanager',
'log.level': 'debug',
Expand Down
5 changes: 5 additions & 0 deletions cortex/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@
}
else {},

grpcConfig:: {
'server.grpc.keepalive.min-time-between-pings': '10s',
'server.grpc.keepalive.ping-without-stream-allowed': true,
},

storageConfig:
$._config.client_configs.aws +
$._config.client_configs.cassandra +
Expand Down
1 change: 1 addition & 0 deletions cortex/distributor.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
local containerPort = $.core.v1.containerPort,

distributor_args::
$._config.grpcConfig +
$._config.ringConfig +
$._config.distributorConfig +
{
Expand Down
1 change: 1 addition & 0 deletions cortex/ingester.libsonnet
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
ingester_args::
$._config.grpcConfig +
$._config.ringConfig +
$._config.storeConfig +
$._config.storageConfig +
Expand Down
1 change: 1 addition & 0 deletions cortex/querier.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
local container = $.core.v1.container,

querier_args::
$._config.grpcConfig +
$._config.ringConfig +
$._config.storeConfig +
$._config.storageConfig +
Expand Down
91 changes: 48 additions & 43 deletions cortex/query-frontend.libsonnet
Original file line number Diff line number Diff line change
@@ -1,49 +1,54 @@
{
local container = $.core.v1.container,

query_frontend_args:: {
target: 'query-frontend',

// Need log.level=debug so all queries are logged, needed for analyse.py.
'log.level': 'debug',

// Increase HTTP server response write timeout, as we were seeing some
// queries that return a lot of data timeing out.
'server.http-write-timeout': '1m',

// Split long queries up into multiple day-long queries.
'querier.split-queries-by-interval': '24h',

// Cache query results.
'querier.align-querier-with-step': true,
'querier.cache-results': true,
'frontend.memcached.hostname': 'memcached-frontend.%s.svc.cluster.local' % $._config.namespace,
'frontend.memcached.service': 'memcached-client',
'frontend.memcached.timeout': '500ms',

// So that exporters like cloudwatch can still send in data and be un-cached.
'frontend.max-cache-freshness': '10m',

// Compress HTTP responses; improves latency for very big results and slow
// connections.
'querier.compress-http-responses': true,

// So it can recieve big responses from the querier.
'server.grpc-max-recv-msg-size-bytes': 100 << 20,

// Limit queries to 500 days, allow this to be override per-user.
'store.max-query-length': '12000h', // 500 Days
'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
} + if $._config.queryFrontend.sharded_queries_enabled then {
'querier.parallelise-shardable-queries': 'true',

// in process tenant queues on frontends. We divide by the number of frontends; 2 in this case in order to apply the global limit in aggregate.
// basically base * shard_factor * query_split_factor / num_frontends where
'querier.max-outstanding-requests-per-tenant': std.floor(200 * $._config.queryFrontend.shard_factor * $._config.queryFrontend.query_split_factor / $._config.queryFrontend.replicas),

'querier.query-ingesters-within': $._config.queryConfig['querier.query-ingesters-within'],
} + $._config.storageConfig
else {},
query_frontend_args::
$._config.ringConfig +
{
target: 'query-frontend',

// Need log.level=debug so all queries are logged, needed for analyse.py.
'log.level': 'debug',

// Increase HTTP server response write timeout, as we were seeing some
// queries that return a lot of data timeing out.
'server.http-write-timeout': '1m',

// Split long queries up into multiple day-long queries.
'querier.split-queries-by-interval': '24h',

// Cache query results.
'querier.align-querier-with-step': true,
'querier.cache-results': true,
'frontend.memcached.hostname': 'memcached-frontend.%s.svc.cluster.local' % $._config.namespace,
'frontend.memcached.service': 'memcached-client',
'frontend.memcached.timeout': '500ms',

// So that exporters like cloudwatch can still send in data and be un-cached.
'frontend.max-cache-freshness': '10m',

// Compress HTTP responses; improves latency for very big results and slow
// connections.
'querier.compress-http-responses': true,

// So it can receive big responses from the querier.
'server.grpc-max-recv-msg-size-bytes': 100 << 20,

// Limit queries to 500 days, allow this to be override per-user.
'store.max-query-length': '12000h', // 500 Days
'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
} + (
if $._config.queryFrontend.sharded_queries_enabled then
{
'querier.parallelise-shardable-queries': 'true',

// in process tenant queues on frontends. We divide by the number of frontends; 2 in this case in order to apply the global limit in aggregate.
// basically base * shard_factor * query_split_factor / num_frontends where
'querier.max-outstanding-requests-per-tenant': std.floor(200 * $._config.queryFrontend.shard_factor * $._config.queryFrontend.query_split_factor / $._config.queryFrontend.replicas),

'querier.query-ingesters-within': $._config.queryConfig['querier.query-ingesters-within'],
} + $._config.storageConfig
else {}
),

query_frontend_container::
container.new('query-frontend', $._images.query_frontend) +
Expand Down
1 change: 1 addition & 0 deletions cortex/ruler.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
local container = $.core.v1.container,

ruler_args::
$._config.grpcConfig +
$._config.ringConfig +
$._config.storeConfig +
$._config.storageConfig +
Expand Down
2 changes: 2 additions & 0 deletions cortex/tsdb.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@
pvc.mixin.metadata.withName('compactor-data'),

compactor_args::
$._config.grpcConfig +
$._config.storageConfig +
$._config.blocksStorageConfig +
{
Expand Down Expand Up @@ -178,6 +179,7 @@
pvc.mixin.metadata.withName('store-gateway-data'),

store_gateway_args::
$._config.grpcConfig +
$._config.storageConfig +
$._config.blocksStorageConfig +
{
Expand Down