Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 22 additions & 14 deletions docs/blocks-storage/store-gateway.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,20 +349,28 @@ store_gateway:
# CLI flag: -store-gateway.disabled-tenants
[disabled_tenants: <string> | default = ""]

instance_limits:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.instance-limits.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.instance-limits.heap-utilization
[heap_utilization: <float> | default = 0]
query_protection:
rejection:
# EXPERIMENTAL: Enable query rejection feature, where the component return
# 503 to all incoming query requests when the configured thresholds are
# breached.
# CLI flag: -store-gateway.query-protection.rejection.enabled
[enabled: <boolean> | default = false]

threshold:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage,
# between 0 and 1. monitored_resources config must include the resource
# type. 0 to disable.
# CLI flag: -store-gateway.query-protection.rejection.threshold.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage,
# between 0 and 1. monitored_resources config must include the resource
# type. 0 to disable.
# CLI flag: -store-gateway.query-protection.rejection.threshold.heap-utilization
[heap_utilization: <float> | default = 0]

hedged_request:
# If true, hedged requests are applied to object store calls. It can help
Expand Down
73 changes: 45 additions & 28 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3204,20 +3204,6 @@ lifecycler:
[upload_compacted_blocks_enabled: <boolean> | default = true]

instance_limits:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -ingester.instance-limits.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -ingester.instance-limits.heap-utilization
[heap_utilization: <float> | default = 0]

# Max ingestion rate (samples/sec) that ingester will accept. This limit is
# per-ingester, not per-tenant. Additional push requests will be rejected.
# Current ingestion rate is computed as exponentially weighted moving average,
Expand Down Expand Up @@ -3276,6 +3262,29 @@ instance_limits:
# If enabled, the metadata API returns all metadata regardless of the limits.
# CLI flag: -ingester.skip-metadata-limits
[skip_metadata_limits: <boolean> | default = true]

query_protection:
rejection:
# EXPERIMENTAL: Enable query rejection feature, where the component return
# 503 to all incoming query requests when the configured thresholds are
# breached.
# CLI flag: -ingester.query-protection.rejection.enabled
[enabled: <boolean> | default = false]

threshold:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between
# 0 and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -ingester.query-protection.rejection.threshold.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between
# 0 and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -ingester.query-protection.rejection.threshold.heap-utilization
[heap_utilization: <float> | default = 0]
```

### `ingester_client_config`
Expand Down Expand Up @@ -5897,20 +5906,28 @@ sharding_ring:
# CLI flag: -store-gateway.disabled-tenants
[disabled_tenants: <string> | default = ""]

instance_limits:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.instance-limits.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between 0
# and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.instance-limits.heap-utilization
[heap_utilization: <float> | default = 0]
query_protection:
rejection:
# EXPERIMENTAL: Enable query rejection feature, where the component return
# 503 to all incoming query requests when the configured thresholds are
# breached.
# CLI flag: -store-gateway.query-protection.rejection.enabled
[enabled: <boolean> | default = false]

threshold:
# EXPERIMENTAL: Max CPU utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between
# 0 and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.query-protection.rejection.threshold.cpu-utilization
[cpu_utilization: <float> | default = 0]

# EXPERIMENTAL: Max heap utilization that this ingester can reach before
# rejecting new query request (across all tenants) in percentage, between
# 0 and 1. monitored_resources config must include the resource type. 0 to
# disable.
# CLI flag: -store-gateway.query-protection.rejection.threshold.heap-utilization
[heap_utilization: <float> | default = 0]

hedged_request:
# If true, hedged requests are applied to object store calls. It can help with
Expand Down
10 changes: 4 additions & 6 deletions docs/configuration/v1-guarantees.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,8 @@ Currently experimental features are:
- Query-frontend: dynamic query splits
- `querier.max-shards-per-query` (int) CLI flag
- `querier.max-fetched-data-duration-per-query` (duration) CLI flag
- Ingester/Store-Gateway: Resource-based throttling
- `-ingester.instance-limits.cpu-utilization`
- `-ingester.instance-limits.heap-utilization`
- `-store-gateway.instance-limits.cpu-utilization`
- `-store-gateway.instance-limits.heap-utilization`
- Ingester/Store-Gateway: Query rejection
- `-ingester.query-protection.rejection`
- `-store-gateway.query-protection.rejection`
- Distributor/Ingester: Stream push connection
- Enable stream push connection between distributor and ingester by setting `-distributor.use-stream-push=true` on Distributor.
- Enable stream push connection between distributor and ingester by setting `-distributor.use-stream-push=true` on Distributor.
12 changes: 8 additions & 4 deletions docs/guides/protecting-cortex-from-heavy-queries.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,13 @@ For example, the following configuration will start throttling query requests if
```
target: ingester
monitored_resources: cpu,heap
instance_limits:
cpu_utilization: 0.8
heap_utilization: 0.8
ingester:
query_protection:
rejection:
enabled: true
threshold:
cpu_utilization: 0.8
heap_utilization: 0.8
```

See https://cortexmetrics.io/docs/configuration/configuration-file/:~:text=instance_limits for details.
See https://cortexmetrics.io/docs/configuration/configuration-file/:~:text=query_protection for details.
8 changes: 4 additions & 4 deletions integration/resource_based_limiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ func Test_ResourceBasedLimiter_shouldStartWithoutError(t *testing.T) {

// Start Cortex components.
ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
"-ingester.instance-limits.cpu-utilization": "0.8",
"-ingester.instance-limits.heap-utilization": "0.8",
"-ingester.query-protection.rejection.threshold.cpu-utilization": "0.8",
"-ingester.query-protection.rejection.threshold.heap-utilization": "0.8",
}), "")
storeGateway := e2ecortex.NewStoreGateway("store-gateway", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{
"-store-gateway.instance-limits.cpu-utilization": "0.8",
"-store-gateway.instance-limits.heap-utilization": "0.8",
"-store-gateway.query-protection.rejection.threshold.cpu-utilization": "0.8",
"-store-gateway.query-protection.rejection.threshold.heap-utilization": "0.8",
}), "")
require.NoError(t, s.StartAndWaitReady(ingester, storeGateway))
}
40 changes: 0 additions & 40 deletions pkg/configs/instance_limits.go

This file was deleted.

51 changes: 51 additions & 0 deletions pkg/configs/query_protection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package configs

import (
"errors"
"flag"
"strings"

"github.com/cortexproject/cortex/pkg/util/flagext"
"github.com/cortexproject/cortex/pkg/util/resource"
)

type QueryProtection struct {
Rejection rejection `json:"rejection"`
}

type rejection struct {
Enabled bool `yaml:"enabled"`
Threshold threshold `yaml:"threshold"`
}

type threshold struct {
CPUUtilization float64 `yaml:"cpu_utilization"`
HeapUtilization float64 `yaml:"heap_utilization"`
}

func (cfg *QueryProtection) RegisterFlagsWithPrefix(f *flag.FlagSet, prefix string) {
f.BoolVar(&cfg.Rejection.Enabled, prefix+"query-protection.rejection.enabled", false, "EXPERIMENTAL: Enable query rejection feature, where the component return 503 to all incoming query requests when the configured thresholds are breached.")
f.Float64Var(&cfg.Rejection.Threshold.CPUUtilization, prefix+"query-protection.rejection.threshold.cpu-utilization", 0, "EXPERIMENTAL: Max CPU utilization that this ingester can reach before rejecting new query request (across all tenants) in percentage, between 0 and 1. monitored_resources config must include the resource type. 0 to disable.")
f.Float64Var(&cfg.Rejection.Threshold.HeapUtilization, prefix+"query-protection.rejection.threshold.heap-utilization", 0, "EXPERIMENTAL: Max heap utilization that this ingester can reach before rejecting new query request (across all tenants) in percentage, between 0 and 1. monitored_resources config must include the resource type. 0 to disable.")
}

func (cfg *QueryProtection) Validate(monitoredResources flagext.StringSliceCSV) error {
thresholdCfg := cfg.Rejection.Threshold
if thresholdCfg.CPUUtilization > 1 || thresholdCfg.CPUUtilization < 0 {
return errors.New("cpu_utilization must be between 0 and 1")
}

if thresholdCfg.CPUUtilization > 0 && !strings.Contains(monitoredResources.String(), string(resource.CPU)) {
return errors.New("monitored_resources config must include \"cpu\" as well")
}

if thresholdCfg.HeapUtilization > 1 || thresholdCfg.HeapUtilization < 0 {
return errors.New("heap_utilization must be between 0 and 1")
}

if thresholdCfg.HeapUtilization > 0 && !strings.Contains(monitoredResources.String(), string(resource.Heap)) {
return errors.New("monitored_resources config must include \"heap\" as well")
}

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,71 @@ import (

func Test_Validate(t *testing.T) {
for name, tc := range map[string]struct {
instanceLimits InstanceLimits
queryProtection QueryProtection
monitoredResources []string
err error
}{
"correct config should pass validation": {
instanceLimits: InstanceLimits{
CPUUtilization: 0.5,
HeapUtilization: 0.5,
queryProtection: QueryProtection{
Rejection: rejection{
Threshold: threshold{
CPUUtilization: 0.5,
HeapUtilization: 0.5,
},
},
},
monitoredResources: []string{"cpu", "heap"},
err: nil,
},
"utilization config less than 0 should fail validation": {
instanceLimits: InstanceLimits{
CPUUtilization: -0.5,
HeapUtilization: 0.5,
queryProtection: QueryProtection{
Rejection: rejection{
Threshold: threshold{
CPUUtilization: -0.5,
HeapUtilization: 0.5,
},
},
},
monitoredResources: []string{"cpu", "heap"},
err: errors.New("cpu_utilization must be between 0 and 1"),
},
"utilization config greater than 1 should fail validation": {
instanceLimits: InstanceLimits{
CPUUtilization: 0.5,
HeapUtilization: 1.5,
queryProtection: QueryProtection{
Rejection: rejection{
Threshold: threshold{
CPUUtilization: 0.5,
HeapUtilization: 1.5,
},
},
},
monitoredResources: []string{"cpu", "heap"},
err: errors.New("heap_utilization must be between 0 and 1"),
},
"missing cpu in monitored_resources config should fail validation": {
instanceLimits: InstanceLimits{
CPUUtilization: 0.5,
queryProtection: QueryProtection{
Rejection: rejection{
Threshold: threshold{
CPUUtilization: 0.5,
},
},
},
monitoredResources: []string{"heap"},
err: errors.New("monitored_resources config must include \"cpu\" as well"),
},
"missing heap in monitored_resources config should fail validation": {
instanceLimits: InstanceLimits{
HeapUtilization: 0.5,
queryProtection: QueryProtection{
Rejection: rejection{
Threshold: threshold{
HeapUtilization: 0.5,
},
},
},
monitoredResources: []string{"cpu"},
err: errors.New("monitored_resources config must include \"heap\" as well"),
},
} {
t.Run(name, func(t *testing.T) {
err := tc.instanceLimits.Validate(tc.monitoredResources)
err := tc.queryProtection.Validate(tc.monitoredResources)
if tc.err != nil {
require.Errorf(t, err, tc.err.Error())
} else {
Expand Down
Loading
Loading