Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* `cortex_bucket_stores_gate_queries_in_flight`
* `cortex_bucket_stores_gate_duration_seconds`
* [CHANGE] Metric `cortex_ingester_flush_reasons` has been renamed to `cortex_ingester_series_flushed_total`, and is now incremented during flush, not when series is enqueued for flushing. #2802
* [CHANGE] Experimental Delete Series: Metric `cortex_purger_oldest_pending_delete_request_age_seconds` would track age of delete requests since they are over their cancellation period instead of their creation time. #2806
* [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783
* [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783
* [FEATURE] Introduced `ruler.resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783
Expand Down
19 changes: 14 additions & 5 deletions pkg/chunk/purger/purger.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "purger_oldest_pending_delete_request_age_seconds",
Help: "Age of oldest pending delete request in seconds",
Help: "Age of oldest pending delete request in seconds, since they are over their cancellation period",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I do understand what you mean by "since they are over their cancellation period". Could you clarify it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not process requests until they are over their cancellation period i.e they are not allowed to be cancelled anymore. I wanted to make it clear because people would think the age is since they are created which is not the case here.
I can remove it if it looks too confusing. Also, we have a metric to count the number of pending requests https://github.com/cortexproject/cortex/blob/35b756fa80c80210a678c59ff65a004d2bf0a018/pkg/chunk/purger/purger.go#L71 and here also pending requests are those which are over their cancellation period.
What do you think? I think we should either add that statement to both or none.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I got it and I think having it in the description is fine. Agree that we should add the same to pendingDeleteRequestsCount too.

})
m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "purger_pending_delete_requests_count",
Help: "Count of requests which are in process or are ready to be processed",
Help: "Count of delete requests which are over their cancellation period and have not finished processing yet",
})

return &m
Expand Down Expand Up @@ -248,6 +248,10 @@ func (p *Purger) workerJobCleanup(job workerJob) {
default:
// already sent
}
} else if len(p.usersWithPendingRequests) == 0 {
// there are no pending requests from any of the users, set the oldest pending request and number of pending requests to 0
p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(0)
p.metrics.pendingDeleteRequestsCount.Set(0)
}
} else {
p.pendingPlansCountMtx.Unlock()
Expand Down Expand Up @@ -409,7 +413,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
p.inProcessRequestIDsMtx.RUnlock()

now := model.Now()
oldestPendingRequestCreatedAt := now
oldestPendingRequestCreatedAt := model.Time(0)

// requests which are still being processed are also considered pending
if pendingDeleteRequestsCount != 0 {
Expand All @@ -426,7 +430,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
}

pendingDeleteRequestsCount++
if deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
if oldestPendingRequestCreatedAt == 0 || deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
}

Expand Down Expand Up @@ -473,7 +477,12 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
p.executePlansChan <- req
}

p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
// track age of oldest delete request since they are over their cancellation period
oldestPendingRequestAge := time.Duration(0)
if oldestPendingRequestCreatedAt != 0 {
oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod))
}
p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second))
p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))

return nil
Expand Down
7 changes: 2 additions & 5 deletions pkg/chunk/purger/purger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,8 @@ func TestPurger_Metrics(t *testing.T) {
// load new delete requests for processing
require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())

// there must be 2 pending delete requests, oldest being 3 days old
require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
// there must be 2 pending delete requests, oldest being 2 days old since its cancellation time is over
require.InDelta(t, float64(2*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))

// start loop to process requests
Expand All @@ -429,9 +429,6 @@ func TestPurger_Metrics(t *testing.T) {
return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal)
})

// load new delete requests for processing which should update the metrics
require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())

// there must be 0 pending delete requests so the age for oldest pending must be 0
require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
Expand Down
3 changes: 1 addition & 2 deletions pkg/testexporter/correctness/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ func TestMinQueryTime(t *testing.T) {
}

for _, tt := range tests {
//require.Equal(t, tt.expected, tc.MinQueryTime())
assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 5*time.Millisecond)
assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 50*time.Millisecond)
}
}