From 0e82968c9150cf98f75d1e19dde6cc462126da0b Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Mon, 29 Jun 2020 17:15:16 +0530 Subject: [PATCH 1/6] track oldest delete request age since their cancellation period is over Signed-off-by: Sandeep Sukhani --- pkg/chunk/purger/purger.go | 9 +++++++-- pkg/chunk/purger/purger_test.go | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/chunk/purger/purger.go b/pkg/chunk/purger/purger.go index 7a771e0aa34..844f510e949 100644 --- a/pkg/chunk/purger/purger.go +++ b/pkg/chunk/purger/purger.go @@ -66,7 +66,7 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics { m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{ Namespace: "cortex", Name: "purger_oldest_pending_delete_request_age_seconds", - Help: "Age of oldest pending delete request in seconds", + Help: "Age of oldest pending delete request in seconds, since they are over their cancellation period", }) m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{ Namespace: "cortex", @@ -473,7 +473,12 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error { p.executePlansChan <- req } - p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second)) + // track age of oldest delete request since they are over their cancellation period + oldestPendingRequestAge := time.Duration(0) + if !oldestPendingRequestCreatedAt.Equal(now) { + oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod)) + } + p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second)) p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount)) return nil diff --git a/pkg/chunk/purger/purger_test.go b/pkg/chunk/purger/purger_test.go index e73d1044148..4de230e661f 100644 --- a/pkg/chunk/purger/purger_test.go +++ b/pkg/chunk/purger/purger_test.go @@ -410,8 +410,8 @@ func TestPurger_Metrics(t *testing.T) { // load new delete requests for processing require.NoError(t, purger.pullDeleteRequestsToPlanDeletes()) - // there must be 2 pending delete requests, oldest being 3 days old - require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1) + // there must be 2 pending delete requests, oldest being 2 days old since its cancellation time is over + require.InDelta(t, float64(2*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1) require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount)) // start loop to process requests From d4318e01e12513e872bb3a82266c360e08bb9c5c Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Mon, 29 Jun 2020 20:51:33 +0530 Subject: [PATCH 2/6] update metric for oldest pending request and number of pending requests to 0 sooner Signed-off-by: Sandeep Sukhani --- pkg/chunk/purger/purger.go | 4 ++++ pkg/chunk/purger/purger_test.go | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/chunk/purger/purger.go b/pkg/chunk/purger/purger.go index 844f510e949..4502c1427eb 100644 --- a/pkg/chunk/purger/purger.go +++ b/pkg/chunk/purger/purger.go @@ -248,6 +248,10 @@ func (p *Purger) workerJobCleanup(job workerJob) { default: // already sent } + } else if len(p.usersWithPendingRequests) == 0 { + // there are no pending requests from any of the users, set the oldest pending request and number of pending requests to 0 + p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(0) + p.metrics.pendingDeleteRequestsCount.Set(0) } } else { p.pendingPlansCountMtx.Unlock() diff --git a/pkg/chunk/purger/purger_test.go b/pkg/chunk/purger/purger_test.go index 4de230e661f..5b6fac6b4e0 100644 --- a/pkg/chunk/purger/purger_test.go +++ b/pkg/chunk/purger/purger_test.go @@ -429,9 +429,6 @@ func TestPurger_Metrics(t *testing.T) { return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal) }) - // load new delete requests for processing which should update the metrics - require.NoError(t, purger.pullDeleteRequestsToPlanDeletes()) - // there must be 0 pending delete requests so the age for oldest pending must be 0 require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1) require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount)) From 611dc413ceddb83d7b9d8072ebef026512303e0d Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Tue, 30 Jun 2020 11:58:35 +0530 Subject: [PATCH 3/6] changes suggested from PR review Signed-off-by: Sandeep Sukhani --- pkg/chunk/purger/purger.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/chunk/purger/purger.go b/pkg/chunk/purger/purger.go index 4502c1427eb..e54f57e746c 100644 --- a/pkg/chunk/purger/purger.go +++ b/pkg/chunk/purger/purger.go @@ -413,7 +413,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error { p.inProcessRequestIDsMtx.RUnlock() now := model.Now() - oldestPendingRequestCreatedAt := now + oldestPendingRequestCreatedAt := model.Time(0) // requests which are still being processed are also considered pending if pendingDeleteRequestsCount != 0 { @@ -430,7 +430,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error { } pendingDeleteRequestsCount++ - if deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) { + if oldestPendingRequestCreatedAt == 0 || deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) { oldestPendingRequestCreatedAt = deleteRequest.CreatedAt } @@ -479,7 +479,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error { // track age of oldest delete request since they are over their cancellation period oldestPendingRequestAge := time.Duration(0) - if !oldestPendingRequestCreatedAt.Equal(now) { + if oldestPendingRequestCreatedAt != 0 { oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod)) } p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second)) From 1628eb20c3f7b030f0fd322ea9adaeb6b1d7c5bb Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Tue, 30 Jun 2020 12:02:22 +0530 Subject: [PATCH 4/6] update changelog Signed-off-by: Sandeep Sukhani --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a81c8e682b3..eb86cfe9820 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * `cortex_bucket_stores_gate_queries_in_flight` * `cortex_bucket_stores_gate_duration_seconds` * [CHANGE] Metric `cortex_ingester_flush_reasons` has been renamed to `cortex_ingester_series_flushed_total`, and is now incremented during flush, not when series is enqueued for flushing. #2802 +* [CHANGE] Experimental Delete Series: Metric `purger_pending_delete_requests_count` would track age of delete requests since they are over their cancellation period instead of their creation time. #2806 * [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783 * [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783 * [FEATURE] Introduced `ruler.resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783 From c11eef9d6d7525baf947dda1521ef807f8f56c5b Mon Sep 17 00:00:00 2001 From: Sandeep Sukhani Date: Tue, 30 Jun 2020 12:20:28 +0530 Subject: [PATCH 5/6] minor nits suggested in PR review Signed-off-by: Sandeep Sukhani --- CHANGELOG.md | 2 +- pkg/chunk/purger/purger.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb86cfe9820..bfa6833161f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ * `cortex_bucket_stores_gate_queries_in_flight` * `cortex_bucket_stores_gate_duration_seconds` * [CHANGE] Metric `cortex_ingester_flush_reasons` has been renamed to `cortex_ingester_series_flushed_total`, and is now incremented during flush, not when series is enqueued for flushing. #2802 -* [CHANGE] Experimental Delete Series: Metric `purger_pending_delete_requests_count` would track age of delete requests since they are over their cancellation period instead of their creation time. #2806 +* [CHANGE] Experimental Delete Series: Metric `cortex_purger_oldest_pending_delete_request_age_seconds` would track age of delete requests since they are over their cancellation period instead of their creation time. #2806 * [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783 * [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783 * [FEATURE] Introduced `ruler.resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783 diff --git a/pkg/chunk/purger/purger.go b/pkg/chunk/purger/purger.go index e54f57e746c..2e966a659a9 100644 --- a/pkg/chunk/purger/purger.go +++ b/pkg/chunk/purger/purger.go @@ -71,7 +71,7 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics { m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{ Namespace: "cortex", Name: "purger_pending_delete_requests_count", - Help: "Count of requests which are in process or are ready to be processed", + Help: "Count of delete requests which are over their cancellation period and have not finished processing yet", }) return &m From f47da404609a462880562a5649578a8abe86e761 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Tue, 30 Jun 2020 09:11:44 +0200 Subject: [PATCH 6/6] Fixed flaky test Signed-off-by: Marco Pracucci --- pkg/testexporter/correctness/runner_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/testexporter/correctness/runner_test.go b/pkg/testexporter/correctness/runner_test.go index d71720f7f16..0410a7d0cd7 100644 --- a/pkg/testexporter/correctness/runner_test.go +++ b/pkg/testexporter/correctness/runner_test.go @@ -27,7 +27,6 @@ func TestMinQueryTime(t *testing.T) { } for _, tt := range tests { - //require.Equal(t, tt.expected, tc.MinQueryTime()) - assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 5*time.Millisecond) + assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 50*time.Millisecond) } }