Skip to content

Commit 226adc6

Browse files
authored
Configurable delay on rule evaluation (#2423)
* ensure rules are evaluated on a delayed interval Signed-off-by: Jacob Lisi <[email protected]> * update changelog and docs Signed-off-by: Jacob Lisi <[email protected]> * refactor per PR comments Signed-off-by: Jacob Lisi <[email protected]> * update docs Signed-off-by: Jacob Lisi <[email protected]>
1 parent e5c409d commit 226adc6

File tree

4 files changed

+23
-1
lines changed

4 files changed

+23
-1
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* `cortex_querier_bucket_store_blocks_meta_sync_duration_seconds` > `cortex_querier_blocks_meta_sync_duration_seconds`
1010
* `cortex_querier_bucket_store_blocks_meta_sync_consistency_delay_seconds` > `cortex_querier_blocks_meta_sync_consistency_delay_seconds`
1111
* [CHANGE] Experimental TSDB: Modified default values for `compactor.deletion-delay` option from 48h to 12h and `-experimental.tsdb.bucket-store.ignore-deletion-marks-delay` from 24h to 6h. #2414
12+
* [FEATURE] Ruler: The `-ruler.evaluation-delay` flag was added to allow users to configure a default evaluation delay for all rules in cortex. The default value is 0 which is the current behavior. #2423
1213
* [ENHANCEMENT] Experimental TSDB: sample ingestion errors are now reported via existing `cortex_discarded_samples_total` metric. #2370
1314
* [ENHANCEMENT] Failures on samples at distributors and ingesters return the first validation error as opposed to the last. #2383
1415
* [ENHANCEMENT] Experimental TSDB: Added `cortex_querier_blocks_meta_synced`, which reflects current state of synced blocks over all tenants. #2392

docs/configuration/config-file-reference.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,11 @@ The `ruler_config` configures the Cortex ruler.
700700
# CLI flag: -ruler.evaluation-interval
701701
[evaluation_interval: <duration> | default = 1m0s]
702702
703+
# Duration to delay the evaluation of rules to ensure they underlying metrics
704+
# have been pushed to cortex.
705+
# CLI flag: -ruler.evaluation-delay-duration
706+
[evaluation_delay_duration: <duration> | default = 0s]
707+
703708
# How frequently to poll for rule changes
704709
# CLI flag: -ruler.poll-interval
705710
[poll_interval: <duration> | default = 1m0s]

pkg/ruler/compat.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ package ruler
22

33
import (
44
"context"
5+
"time"
56

67
"github.com/prometheus/prometheus/pkg/labels"
8+
"github.com/prometheus/prometheus/promql"
9+
"github.com/prometheus/prometheus/rules"
710
"github.com/prometheus/prometheus/storage"
811
"github.com/weaveworks/common/user"
912

@@ -78,3 +81,12 @@ func (t *tsdb) StartTime() (int64, error) {
7881
func (t *tsdb) Close() error {
7982
return nil
8083
}
84+
85+
// engineQueryFunc returns a new query function using the rules.EngineQueryFunc function
86+
// and passing an altered timestamp.
87+
func engineQueryFunc(engine *promql.Engine, q storage.Queryable, delay time.Duration) rules.QueryFunc {
88+
orig := rules.EngineQueryFunc(engine, q)
89+
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
90+
return orig(ctx, qs, t.Add(-delay))
91+
}
92+
}

pkg/ruler/ruler.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ type Config struct {
6161
ExternalURL flagext.URLValue `yaml:"external_url"`
6262
// How frequently to evaluate rules by default.
6363
EvaluationInterval time.Duration `yaml:"evaluation_interval"`
64+
// Delay the evaluation of all rules by a set interval to give a buffer
65+
// to metric that haven't been forwarded to cortex yet.
66+
EvaluationDelay time.Duration `yaml:"evaluation_delay_duration"`
6467
// How frequently to poll for updated rules.
6568
PollInterval time.Duration `yaml:"poll_interval"`
6669
// Rule Storage and Polling configuration.
@@ -103,6 +106,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
103106
cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil
104107
f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.")
105108
f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules")
109+
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex.")
106110
f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")
107111
f.Var(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "URL of the Alertmanager to send notifications to.")
108112
f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover alertmanager hosts.")
@@ -480,7 +484,7 @@ func (r *Ruler) newManager(ctx context.Context, userID string) (*promRules.Manag
480484
opts := &promRules.ManagerOptions{
481485
Appendable: tsdb,
482486
TSDB: tsdb,
483-
QueryFunc: promRules.EngineQueryFunc(r.engine, r.queryable),
487+
QueryFunc: engineQueryFunc(r.engine, r.queryable, r.cfg.EvaluationDelay),
484488
Context: user.InjectOrgID(ctx, userID),
485489
ExternalURL: r.alertURL,
486490
NotifyFunc: sendAlerts(notifier, r.alertURL.String()),

0 commit comments

Comments
 (0)