Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* [ENHANCEMENT] Add "integration" as a label for `cortex_alertmanager_notifications_total` and `cortex_alertmanager_notifications_failed_total` metrics. #3056
* [ENHANCEMENT] Add `cortex_ruler_config_last_reload_successful` and `cortex_ruler_config_last_reload_successful_seconds` to check status of users rule manager. #3056
* [ENHANCEMENT] Memcached dial() calls now have an optional circuit-breaker to avoid hammering a broken cache #3051
* [ENHANCEMENT] `-ruler.evaluation-delay-duration` is now overridable as a per-tenant limit, `evaluation_delay_duration` #3098
* [BUGFIX] Query-frontend: Fixed rounding for incoming query timestamps, to be 100% Prometheus compatible. #2990
* [BUGFIX] Querier: Merge results from chunks and blocks ingesters when using streaming of results. #3013
* [BUGFIX] Querier: query /series from ingesters regardless the `-querier.query-ingesters-within` setting. #3035
Expand Down
10 changes: 5 additions & 5 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -846,11 +846,6 @@ ruler_client:
# CLI flag: -ruler.evaluation-interval
[evaluation_interval: <duration> | default = 1m]

# Duration to delay the evaluation of rules to ensure they underlying metrics
# have been pushed to cortex.
# CLI flag: -ruler.evaluation-delay-duration
[evaluation_delay_duration: <duration> | default = 0s]

# How frequently to poll for rule changes
# CLI flag: -ruler.poll-interval
[poll_interval: <duration> | default = 1m]
Expand Down Expand Up @@ -2802,6 +2797,11 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
# CLI flag: -frontend.max-cache-freshness
[max_cache_freshness: <duration> | default = 1m]

# Duration to delay the evaluation of rules to ensure they underlying metrics
# have been pushed to cortex.
# CLI flag: -ruler.evaluation-delay-duration
[evaluation_delay_duration: <duration> | default = 0s]

# File name of per-user overrides. [deprecated, use -runtime-config.file
# instead]
# CLI flag: -limits.per-user-override-config
Expand Down
2 changes: 1 addition & 1 deletion pkg/cortex/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ func (t *Cortex) initRuler() (serv services.Service, err error) {
rulerRegisterer := prometheus.WrapRegistererWith(prometheus.Labels{"engine": "ruler"}, prometheus.DefaultRegisterer)
queryable, engine := querier.New(t.Cfg.Querier, t.Overrides, t.Distributor, t.StoreQueryables, t.TombstonesLoader, rulerRegisterer)

managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine)
managerFactory := ruler.DefaultTenantManagerFactory(t.Cfg.Ruler, t.Distributor, queryable, engine, t.Overrides)
manager, err := ruler.NewDefaultMultiTenantManager(t.Cfg.Ruler, managerFactory, prometheus.DefaultRegisterer, util.Logger)
if err != nil {
return nil, err
Expand Down
6 changes: 5 additions & 1 deletion pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/weaveworks/common/user"

"github.com/cortexproject/cortex/pkg/ingester/client"
"github.com/cortexproject/cortex/pkg/util/validation"
)

// Pusher is an ingester server that accepts pushes.
Expand Down Expand Up @@ -94,6 +95,7 @@ func DefaultTenantManagerFactory(
p Pusher,
q storage.Queryable,
engine *promql.Engine,
overrides *validation.Overrides,
) ManagerFactory {
return func(
ctx context.Context,
Expand All @@ -102,10 +104,12 @@ func DefaultTenantManagerFactory(
logger log.Logger,
reg prometheus.Registerer,
) *rules.Manager {
evaluationDelay := overrides.EvaluationDelay(userID)

return rules.NewManager(&rules.ManagerOptions{
Appendable: &PusherAppendable{pusher: p, userID: userID},
Queryable: q,
QueryFunc: engineQueryFunc(engine, q, cfg.EvaluationDelay),
QueryFunc: engineQueryFunc(engine, q, evaluationDelay),
Context: user.InjectOrgID(ctx, userID),
ExternalURL: cfg.ExternalURL.URL,
NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()),
Expand Down
4 changes: 0 additions & 4 deletions pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ type Config struct {
ClientTLSConfig tls.ClientConfig `yaml:"ruler_client"`
// How frequently to evaluate rules by default.
EvaluationInterval time.Duration `yaml:"evaluation_interval"`
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to cortex yet.
EvaluationDelay time.Duration `yaml:"evaluation_delay_duration"`
// How frequently to poll for updated rules.
PollInterval time.Duration `yaml:"poll_interval"`
// Rule Storage and Polling configuration.
Expand Down Expand Up @@ -109,7 +106,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil
f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.")
f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules")
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex.")
f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")

f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
Expand Down
18 changes: 12 additions & 6 deletions pkg/ruler/ruler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/flagext"
"github.com/cortexproject/cortex/pkg/util/services"
"github.com/cortexproject/cortex/pkg/util/validation"
)

func defaultRulerConfig(store rules.RuleStore) (Config, func()) {
Expand All @@ -57,7 +58,7 @@ func defaultRulerConfig(store rules.RuleStore) (Config, func()) {
return cfg, cleanup
}

func testSetup(t *testing.T, cfg Config) (*promql.Engine, storage.QueryableFunc, Pusher, log.Logger, func()) {
func testSetup(t *testing.T, cfg Config) (*promql.Engine, storage.QueryableFunc, Pusher, log.Logger, *validation.Overrides, func()) {
dir, err := ioutil.TempDir("", t.Name())
testutil.Ok(t, err)
cleanup := func() {
Expand All @@ -83,24 +84,29 @@ func testSetup(t *testing.T, cfg Config) (*promql.Engine, storage.QueryableFunc,
l := log.NewLogfmtLogger(os.Stdout)
l = level.NewFilter(l, level.AllowInfo())

return engine, noopQueryable, pusher, l, cleanup
var limits validation.Limits
flagext.DefaultValues(&limits)
overrides, err := validation.NewOverrides(limits, nil)
testutil.Ok(t, err)

return engine, noopQueryable, pusher, l, overrides, cleanup
}

func newManager(t *testing.T, cfg Config) (*DefaultMultiTenantManager, func()) {
engine, noopQueryable, pusher, logger, cleanup := testSetup(t, cfg)
manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, noopQueryable, engine), prometheus.NewRegistry(), logger)
engine, noopQueryable, pusher, logger, overrides, cleanup := testSetup(t, cfg)
manager, err := NewDefaultMultiTenantManager(cfg, DefaultTenantManagerFactory(cfg, pusher, noopQueryable, engine, overrides), prometheus.NewRegistry(), logger)
require.NoError(t, err)

return manager, cleanup
}

func newRuler(t *testing.T, cfg Config) (*Ruler, func()) {
engine, noopQueryable, pusher, logger, cleanup := testSetup(t, cfg)
engine, noopQueryable, pusher, logger, overrides, cleanup := testSetup(t, cfg)
storage, err := NewRuleStorage(cfg.StoreConfig)
require.NoError(t, err)

reg := prometheus.NewRegistry()
managerFactory := DefaultTenantManagerFactory(cfg, pusher, noopQueryable, engine)
managerFactory := DefaultTenantManagerFactory(cfg, pusher, noopQueryable, engine, overrides)
manager, err := NewDefaultMultiTenantManager(cfg, managerFactory, reg, util.Logger)
require.NoError(t, err)

Expand Down
12 changes: 12 additions & 0 deletions pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ type Limits struct {
CardinalityLimit int `yaml:"cardinality_limit"`
MaxCacheFreshness time.Duration `yaml:"max_cache_freshness"`

// Ruler defaults and limits.
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to cortex yet.
EvaluationDelay time.Duration `yaml:"evaluation_delay_duration"`

// Config for overrides, convenient if it goes here. [Deprecated in favor of RuntimeConfig flag in cortex.Config]
PerTenantOverrideConfig string `yaml:"per_tenant_override_config"`
PerTenantOverridePeriod time.Duration `yaml:"per_tenant_override_period"`
Expand Down Expand Up @@ -106,6 +111,8 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.")
f.DurationVar(&l.MaxCacheFreshness, "frontend.max-cache-freshness", 1*time.Minute, "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")

f.DurationVar(&l.EvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex.")

f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides. [deprecated, use -runtime-config.file instead]")
f.DurationVar(&l.PerTenantOverridePeriod, "limits.per-user-override-period", 10*time.Second, "Period with which to reload the overrides. [deprecated, use -runtime-config.reload-period instead]")
}
Expand Down Expand Up @@ -340,6 +347,11 @@ func (o *Overrides) SubringSize(userID string) int {
return o.getOverridesForUser(userID).SubringSize
}

// EvaluationDelay returns the rules evaluation delay for a given user.
func (o *Overrides) EvaluationDelay(userID string) time.Duration {
return o.getOverridesForUser(userID).EvaluationDelay
}

func (o *Overrides) getOverridesForUser(userID string) *Limits {
if o.tenantLimits != nil {
l := o.tenantLimits(userID)
Expand Down