diff --git a/CHANGELOG.md b/CHANGELOG.md index b9ae17e20e9..41479ba2c92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,8 +55,11 @@ * [ENHANCEMENT] Experimental WAL: Ingester WAL records now have type header and the custom WAL records have been replaced by Prometheus TSDB's WAL records. Old records will not be supported from 1.3 onwards. Note: once this is deployed, you cannot downgrade without data loss. #2436 * [ENHANCEMENT] Redis Cache: Added `idle_timeout`, `wait_on_pool_exhaustion` and `max_conn_lifetime` options to redis cache configuration. #2550 * [ENHANCEMENT] WAL: the experimental tag has been removed on the WAL in ingesters. -* [BUGFIX] Ruler: Ensure temporary rule files with special characters are properly mapped and cleaned up. #2506 * [ENHANCEMENT] Use newer AWS API for paginated queries - removes 'Deprecated' message from logfiles. #2452 +* [ENHANCEMENT] Experimental TSDB: added the following metrics to the ingester: #2580 + * `cortex_ingester_tsdb_appender_add_duration_seconds` + * `cortex_ingester_tsdb_appender_commit_duration_seconds` +* [BUGFIX] Ruler: Ensure temporary rule files with special characters are properly mapped and cleaned up. #2506 * [BUGFIX] Fixes #2411, Ensure requests are properly routed to the prometheus api embedded in the query if `-server.path-prefix` is set. #2372 * [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400 * [BUGFIX] Cassandra Storage: Fix endpoint TLS host verification. #2109 diff --git a/pkg/ingester/ingester_v2.go b/pkg/ingester/ingester_v2.go index f1a6d0e0f18..4ba7506f046 100644 --- a/pkg/ingester/ingester_v2.go +++ b/pkg/ingester/ingester_v2.go @@ -74,9 +74,11 @@ type TSDBState struct { tsdbMetrics *tsdbMetrics // Head compactions metrics. - compactionsTriggered prometheus.Counter - compactionsFailed prometheus.Counter - walReplayTime prometheus.Histogram + compactionsTriggered prometheus.Counter + compactionsFailed prometheus.Counter + walReplayTime prometheus.Histogram + appenderAddDuration prometheus.Histogram + appenderCommitDuration prometheus.Histogram } // NewV2 returns a new Ingester that uses prometheus block storage instead of chunk storage @@ -114,6 +116,16 @@ func NewV2(cfg Config, clientConfig client.Config, limits *validation.Overrides, Help: "The total time it takes to open and replay a TSDB WAL.", Buckets: prometheus.DefBuckets, }), + appenderAddDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Name: "cortex_ingester_tsdb_appender_add_duration_seconds", + Help: "The total time it takes for a push request to add samples to the TSDB appender.", + Buckets: []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, + }), + appenderCommitDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Name: "cortex_ingester_tsdb_appender_commit_duration_seconds", + Help: "The total time it takes for a push request to commit samples appended to TSDB.", + Buckets: []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}, + }), }, } @@ -272,7 +284,7 @@ func (i *Ingester) v2Push(ctx context.Context, req *client.WriteRequest) (*clien // successfully committed succeededSamplesCount := 0 failedSamplesCount := 0 - now := time.Now() + startAppend := time.Now() // Walk the samples, appending them to the users database app := db.Appender() @@ -281,7 +293,7 @@ func (i *Ingester) v2Push(ctx context.Context, req *client.WriteRequest) (*clien // that even if we have a reference it's not guaranteed to be still valid. // The labels must be sorted (in our case, it's guaranteed a write request // has sorted labels once hit the ingester). - cachedRef, cachedRefExists := db.refCache.Ref(now, client.FromLabelAdaptersToLabels(ts.Labels)) + cachedRef, cachedRefExists := db.refCache.Ref(startAppend, client.FromLabelAdaptersToLabels(ts.Labels)) for _, s := range ts.Samples { var err error @@ -307,7 +319,7 @@ func (i *Ingester) v2Push(ctx context.Context, req *client.WriteRequest) (*clien copiedLabels := client.FromLabelAdaptersToLabelsWithCopy(ts.Labels) if ref, err = app.Add(copiedLabels, s.TimestampMs, s.Value); err == nil { - db.refCache.SetRef(now, copiedLabels, ref) + db.refCache.SetRef(startAppend, copiedLabels, ref) cachedRef = ref cachedRefExists = true @@ -348,9 +360,15 @@ func (i *Ingester) v2Push(ctx context.Context, req *client.WriteRequest) (*clien return nil, wrapWithUser(err, userID) } } + + // At this point all samples have been added to the appender, so we can track the time it took. + i.TSDBState.appenderAddDuration.Observe(time.Since(startAppend).Seconds()) + + startCommit := time.Now() if err := app.Commit(); err != nil { return nil, wrapWithUser(err, userID) } + i.TSDBState.appenderCommitDuration.Observe(time.Since(startCommit).Seconds()) // Increment metrics only if the samples have been successfully committed. // If the code didn't reach this point, it means that we returned an error