diff --git a/lib/auth/accesspoint/accesspoint.go b/lib/auth/accesspoint/accesspoint.go index 66bf51223990f..6cfcc534a702b 100644 --- a/lib/auth/accesspoint/accesspoint.go +++ b/lib/auth/accesspoint/accesspoint.go @@ -28,6 +28,7 @@ import ( "time" "github.com/gravitational/trace" + "github.com/prometheus/client_golang/prometheus" oteltrace "go.opentelemetry.io/otel/trace" "github.com/gravitational/teleport" @@ -63,6 +64,8 @@ type Config struct { // TracingProvider is the provider to be used for exporting // traces. No-op tracers will be used if no provider is set. TracingProvider *tracing.Provider + // Registerer is used to register prometheus metrics. + Registerer prometheus.Registerer // The following services are provided to the Cache to allow it to // populate its resource collections. They will either be the local service @@ -130,6 +133,7 @@ func NewCache(cfg Config) (*cache.Cache, error) { if err := cfg.CheckAndSetDefaults(); err != nil { return nil, trace.Wrap(err) } + slog.DebugContext(cfg.Context, "Creating in-memory backend cache.", "cache_name", cfg.CacheName) mem, err := memory.New(memory.Config{ Context: cfg.Context, @@ -139,10 +143,12 @@ func NewCache(cfg Config) (*cache.Cache, error) { if err != nil { return nil, trace.Wrap(err) } + var tracer oteltrace.Tracer if cfg.TracingProvider != nil { tracer = cfg.TracingProvider.Tracer(teleport.ComponentCache) } + reporter, err := backend.NewReporter(backend.ReporterConfig{ Component: teleport.ComponentCache, Backend: mem, @@ -161,14 +167,14 @@ func NewCache(cfg Config) (*cache.Cache, error) { metricComponent := append(slices.Clone(cfg.CacheName), teleport.ComponentCache) cacheCfg := cache.Config{ - Context: cfg.Context, - Backend: reporter, - Component: teleport.Component(component...), - MetricComponent: teleport.Component(metricComponent...), - Tracer: tracer, - MaxRetryPeriod: cfg.MaxRetryPeriod, - Unstarted: cfg.Unstarted, - + Context: cfg.Context, + Backend: reporter, + Component: teleport.Component(component...), + MetricComponent: teleport.Component(metricComponent...), + Tracer: tracer, + Registerer: cfg.Registerer, + MaxRetryPeriod: cfg.MaxRetryPeriod, + Unstarted: cfg.Unstarted, Access: cfg.Access, AccessLists: cfg.AccessLists, AccessMonitoringRules: cfg.AccessMonitoringRules, diff --git a/lib/backend/atomicwrite.go b/lib/backend/atomicwrite.go index a59cf84f70cb7..b3eaf37fedd8d 100644 --- a/lib/backend/atomicwrite.go +++ b/lib/backend/atomicwrite.go @@ -218,6 +218,8 @@ const ( // MaxAtomicWriteSize is the maximum number of conditional actions that may // be applied via a single atomic write. The exact number is subject to change // but must always be less than the minimum value supported across all backends. + // + // NOTE:The buckets for backendmetrics.AtomicWriteSize must stay in sync with this constant. MaxAtomicWriteSize = 64 ) diff --git a/lib/backend/backendmetrics/metrics.go b/lib/backend/backendmetrics/metrics.go new file mode 100644 index 0000000000000..40881c020b604 --- /dev/null +++ b/lib/backend/backendmetrics/metrics.go @@ -0,0 +1,255 @@ +// Teleport +// Copyright (C) 2025 Gravitational, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package backendmetrics + +import ( + "github.com/gravitational/trace" + "github.com/prometheus/client_golang/prometheus" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/lib/observability/metrics" +) + +var ( + Requests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendRequests, + Help: "Number of requests to the backend (reads, writes, and keepalives)", + }, + []string{teleport.ComponentLabel, teleport.TagReq, teleport.TagRange}, + ) + Watchers = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: teleport.MetricBackendWatchers, + Help: "Number of active backend watchers", + }, + []string{teleport.ComponentLabel}, + ) + WatcherQueues = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: teleport.MetricBackendWatcherQueues, + Help: "Watcher queue sizes", + }, + []string{teleport.ComponentLabel}, + ) + WriteRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendWriteRequests, + Help: "Number of write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + Writes = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendWrites, + Help: "Number of individual items written to the backend", + }, + []string{teleport.ComponentLabel}, + ) + WriteRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendWriteFailedRequests, + Help: "Number of failed write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + WriteRequestsFailedPrecondition = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendWriteFailedPreconditionRequests, + Help: "Number of write requests that failed due to a precondition (existence, revision, value, etc)", + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteRequests, + Help: "Number of atomic write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteFailedRequests, + Help: "Number of failed atomic write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteConditionFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteConditionFailed, + Help: "Number of times an atomic write request results in condition failure", + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteHistogram, + Help: "Latency for backend atomic write operations", + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^15 == 32.768 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteSize = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteSize, + Help: "Atomic write batch size", + // buckets of the form 1, 2, 4, 8, 16, etc... + Buckets: prometheus.ExponentialBuckets(1, 2, 8), + }, + []string{teleport.ComponentLabel}, + ) + AtomicWriteContention = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendAtomicWriteContention, + Help: "Number of times atomic write requests experience contention", + }, + []string{teleport.ComponentLabel}, + ) + BatchWriteRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendBatchWriteRequests, + Help: "Number of batch write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + BatchWriteRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendBatchFailedWriteRequests, + Help: "Number of failed write requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + ReadRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendReadRequests, + Help: "Number of read requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + Reads = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Name: teleport.MetricBackendReads, + Help: "Number of individual items read from the backend", + }, + []string{teleport.ComponentLabel}, + ) + ReadRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendFailedReadRequests, + Help: "Number of failed read requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + StreamingRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: "backend", + Name: "stream_requests", + Help: "Number of inflight stream requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + StreamingRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: "backend", + Name: "stream_requests_failed", + Help: "Number of failed stream requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + BatchReadRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendBatchReadRequests, + Help: "Number of read requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + BatchReadRequestsFailed = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: teleport.MetricBackendBatchFailedReadRequests, + Help: "Number of failed read requests to the backend", + }, + []string{teleport.ComponentLabel}, + ) + WriteLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: teleport.MetricBackendWriteHistogram, + Help: "Latency for backend write operations", + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^15 == 32.768 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), + }, + []string{teleport.ComponentLabel}, + ) + BatchWriteLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: teleport.MetricBackendBatchWriteHistogram, + Help: "Latency for backend batch write operations", + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^15 == 32.768 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), + }, + []string{teleport.ComponentLabel}, + ) + BatchReadLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: teleport.MetricBackendBatchReadHistogram, + Help: "Latency for batch read operations", + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^15 == 32.768 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), + }, + []string{teleport.ComponentLabel}, + ) + ReadLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: teleport.MetricBackendReadHistogram, + Help: "Latency for read operations", + // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 + // highest bucket start of 0.001 sec * 2^15 == 32.768 sec + Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), + }, + []string{teleport.ComponentLabel}, + ) +) + +// RegisterCollectors ensures all backend metrics are registered +// with the provided [prometheus.Registerer]. +func RegisterCollectors(reg prometheus.Registerer) error { + return trace.Wrap(metrics.RegisterCollectors(reg, + Watchers, WatcherQueues, Requests, WriteRequests, + WriteRequestsFailed, BatchWriteRequests, BatchWriteRequestsFailed, ReadRequests, + ReadRequestsFailed, BatchReadRequests, BatchReadRequestsFailed, WriteLatencies, + WriteRequestsFailedPrecondition, + AtomicWriteRequests, AtomicWriteRequestsFailed, AtomicWriteConditionFailed, AtomicWriteLatencies, + AtomicWriteContention, AtomicWriteSize, Reads, Writes, + BatchWriteLatencies, BatchReadLatencies, ReadLatencies, + StreamingRequests, StreamingRequestsFailed, + )) +} diff --git a/lib/backend/buffer.go b/lib/backend/buffer.go index 5af31c114d52e..2cbc892e60fc7 100644 --- a/lib/backend/buffer.go +++ b/lib/backend/buffer.go @@ -32,6 +32,7 @@ import ( "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/lib/backend/backendmetrics" logutils "github.com/gravitational/teleport/lib/utils/log" ) @@ -203,7 +204,7 @@ func (c *CircularBuffer) fanOutEvent(r Event) { var watchersToDelete []*BufferWatcher c.watchers.walkPath(r.Item.Key.String(), func(watcher *BufferWatcher) { if watcher.MetricComponent != "" { - watcherQueues.WithLabelValues(watcher.MetricComponent).Set(float64(len(watcher.eventsC))) + backendmetrics.WatcherQueues.WithLabelValues(watcher.MetricComponent).Set(float64(len(watcher.eventsC))) } if !watcher.emit(r) { watchersToDelete = append(watchersToDelete, watcher) diff --git a/lib/backend/dynamo/atomicwrite.go b/lib/backend/dynamo/atomicwrite.go index 4de200e5b97b1..a30bbf8a9344a 100644 --- a/lib/backend/dynamo/atomicwrite.go +++ b/lib/backend/dynamo/atomicwrite.go @@ -34,6 +34,7 @@ import ( "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/utils/retryutils" "github.com/gravitational/teleport/lib/backend" + "github.com/gravitational/teleport/lib/backend/backendmetrics" ) const ( @@ -250,7 +251,7 @@ TxnLoop: } if i > 0 { - backend.AtomicWriteContention.WithLabelValues(teleport.ComponentDynamoDB).Add(float64(i)) + backendmetrics.AtomicWriteContention.WithLabelValues(teleport.ComponentDynamoDB).Add(float64(i)) } if n := i + 1; n > 2 { diff --git a/lib/backend/firestore/atomicwrite.go b/lib/backend/firestore/atomicwrite.go index 3dee9de1b8770..6f4877dd04848 100644 --- a/lib/backend/firestore/atomicwrite.go +++ b/lib/backend/firestore/atomicwrite.go @@ -29,6 +29,7 @@ import ( "github.com/gravitational/teleport" "github.com/gravitational/teleport/lib/backend" + "github.com/gravitational/teleport/lib/backend/backendmetrics" ) func (b *Backend) AtomicWrite(ctx context.Context, condacts []backend.ConditionalAction) (revision string, err error) { @@ -146,7 +147,7 @@ func (b *Backend) AtomicWrite(ctx context.Context, condacts []backend.Conditiona } if n > 1 { - backend.AtomicWriteContention.WithLabelValues(teleport.ComponentFirestore).Add(float64(n - 1)) + backendmetrics.AtomicWriteContention.WithLabelValues(teleport.ComponentFirestore).Add(float64(n - 1)) } if n > 2 { diff --git a/lib/backend/pgbk/atomicwrite.go b/lib/backend/pgbk/atomicwrite.go index aaaada25d68a9..20e60d050919c 100644 --- a/lib/backend/pgbk/atomicwrite.go +++ b/lib/backend/pgbk/atomicwrite.go @@ -26,6 +26,7 @@ import ( "github.com/jackc/pgx/v5/pgtype/zeronull" "github.com/gravitational/teleport/lib/backend" + "github.com/gravitational/teleport/lib/backend/backendmetrics" pgcommon "github.com/gravitational/teleport/lib/backend/pgbk/common" ) @@ -131,7 +132,7 @@ func (b *Backend) AtomicWrite(ctx context.Context, condacts []backend.Conditiona }) if attempts > 1 { - backend.AtomicWriteContention.WithLabelValues(b.GetName()).Add(float64(attempts - 1)) + backendmetrics.AtomicWriteContention.WithLabelValues(b.GetName()).Add(float64(attempts - 1)) } if attempts > 2 { diff --git a/lib/backend/report.go b/lib/backend/report.go index 3107235f20f11..3186db077d0f5 100644 --- a/lib/backend/report.go +++ b/lib/backend/report.go @@ -22,7 +22,6 @@ import ( "context" "errors" "log/slog" - "math" "slices" "strings" "time" @@ -37,7 +36,7 @@ import ( "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/lib/observability/metrics" + "github.com/gravitational/teleport/lib/backend/backendmetrics" "github.com/gravitational/teleport/lib/observability/tracing" logutils "github.com/gravitational/teleport/lib/utils/log" ) @@ -56,6 +55,8 @@ type ReporterConfig struct { TopRequestsCount int // Tracer is used to create spans Tracer oteltrace.Tracer + // Registerer is used to register prometheus metrics. + Registerer prometheus.Registerer } // CheckAndSetDefaults checks and sets @@ -72,6 +73,9 @@ func (r *ReporterConfig) CheckAndSetDefaults() error { if r.Tracer == nil { r.Tracer = tracing.NoopTracer(teleport.ComponentBackend) } + if r.Registerer == nil { + r.Registerer = prometheus.DefaultRegisterer + } return nil } @@ -96,18 +100,17 @@ type Reporter struct { // NewReporter returns a new Reporter. func NewReporter(cfg ReporterConfig) (*Reporter, error) { - err := metrics.RegisterPrometheusCollectors(prometheusCollectors...) - if err != nil { + if err := cfg.CheckAndSetDefaults(); err != nil { return nil, trace.Wrap(err) } - if err := cfg.CheckAndSetDefaults(); err != nil { + if err := backendmetrics.RegisterCollectors(cfg.Registerer); err != nil { return nil, trace.Wrap(err) } cache, err := lru.NewWithEvict(cfg.TopRequestsCount, func(labels topRequestsCacheKey, value struct{}) { // Evict the key from requests metric. - requests.DeleteLabelValues(labels.component, labels.key, labels.isRange) + backendmetrics.Requests.DeleteLabelValues(labels.component, labels.key, labels.isRange) }) if err != nil { return nil, trace.Wrap(err) @@ -139,12 +142,12 @@ func (s *Reporter) GetRange(ctx context.Context, startKey, endKey Key, limit int start := s.Clock().Now() res, err := s.Backend.GetRange(ctx, startKey, endKey, limit) - batchReadLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - batchReadRequests.WithLabelValues(s.Component).Inc() + backendmetrics.BatchReadLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.BatchReadRequests.WithLabelValues(s.Component).Inc() if err != nil { - batchReadRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.BatchReadRequestsFailed.WithLabelValues(s.Component).Inc() } else { - reads.WithLabelValues(s.Component).Add(float64(len(res.Items))) + backendmetrics.Reads.WithLabelValues(s.Component).Add(float64(len(res.Items))) } s.trackRequest(ctx, types.OpGet, startKey, endKey) end := s.Clock().Now() @@ -170,15 +173,15 @@ func (s *Reporter) Create(ctx context.Context, i Item) (*Lease, error) { start := s.Clock().Now() lease, err := s.Backend.Create(ctx, i) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsAlreadyExists(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, i.Key, Key{}) return lease, err @@ -199,12 +202,12 @@ func (s *Reporter) Put(ctx context.Context, i Item) (*Lease, error) { start := s.Clock().Now() lease, err := s.Backend.Put(ctx, i) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, i.Key, Key{}) return lease, err @@ -224,15 +227,15 @@ func (s *Reporter) Update(ctx context.Context, i Item) (*Lease, error) { start := s.Clock().Now() lease, err := s.Backend.Update(ctx, i) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsNotFound(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, i.Key, Key{}) return lease, err @@ -252,15 +255,15 @@ func (s *Reporter) ConditionalUpdate(ctx context.Context, i Item) (*Lease, error start := s.Clock().Now() lease, err := s.Backend.ConditionalUpdate(ctx, i) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if errors.Is(err, ErrIncorrectRevision) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, i.Key, Key{}) return lease, err @@ -279,11 +282,11 @@ func (s *Reporter) Get(ctx context.Context, key Key) (*Item, error) { start := s.Clock().Now() item, err := s.Backend.Get(ctx, key) - readLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - readRequests.WithLabelValues(s.Component).Inc() - reads.WithLabelValues(s.Component).Inc() + backendmetrics.ReadLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.ReadRequests.WithLabelValues(s.Component).Inc() + backendmetrics.Reads.WithLabelValues(s.Component).Inc() if err != nil && !trace.IsNotFound(err) { - readRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.ReadRequestsFailed.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpGet, key, Key{}) return item, err @@ -303,15 +306,15 @@ func (s *Reporter) CompareAndSwap(ctx context.Context, expected Item, replaceWit start := s.Clock().Now() lease, err := s.Backend.CompareAndSwap(ctx, expected, replaceWith) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsNotFound(err) || trace.IsCompareFailed(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, expected.Key, Key{}) return lease, err @@ -330,15 +333,15 @@ func (s *Reporter) Delete(ctx context.Context, key Key) error { start := s.Clock().Now() err := s.Backend.Delete(ctx, key) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsNotFound(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpDelete, key, Key{}) return err @@ -358,15 +361,15 @@ func (s *Reporter) ConditionalDelete(ctx context.Context, key Key, revision stri start := s.Clock().Now() err := s.Backend.ConditionalDelete(ctx, key, revision) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsNotFound(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpDelete, key, Key{}) return err @@ -390,18 +393,18 @@ func (s *Reporter) AtomicWrite(ctx context.Context, condacts []ConditionalAction revision, err = s.Backend.AtomicWrite(ctx, condacts) elapsed := s.Clock().Since(start).Seconds() - writeLatencies.WithLabelValues(s.Component).Observe(elapsed) - atomicWriteLatencies.WithLabelValues(s.Component).Observe(elapsed) + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(elapsed) + backendmetrics.AtomicWriteLatencies.WithLabelValues(s.Component).Observe(elapsed) - writeRequests.WithLabelValues(s.Component).Inc() - atomicWriteRequests.WithLabelValues(s.Component).Inc() - atomicWriteSize.WithLabelValues(s.Component).Observe(float64(len(condacts))) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() + backendmetrics.AtomicWriteRequests.WithLabelValues(s.Component).Inc() + backendmetrics.AtomicWriteSize.WithLabelValues(s.Component).Observe(float64(len(condacts))) if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() - atomicWriteRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.AtomicWriteRequestsFailed.WithLabelValues(s.Component).Inc() if errors.Is(err, ErrConditionFailed) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() - atomicWriteConditionFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.AtomicWriteConditionFailed.WithLabelValues(s.Component).Inc() } } @@ -420,7 +423,7 @@ func (s *Reporter) AtomicWrite(ctx context.Context, condacts []ConditionalAction } if err == nil { - writes.WithLabelValues(s.Component).Add(float64(writeTotal)) + backendmetrics.Writes.WithLabelValues(s.Component).Add(float64(writeTotal)) } return } @@ -439,10 +442,10 @@ func (s *Reporter) DeleteRange(ctx context.Context, startKey, endKey Key) error start := s.Clock().Now() err := s.Backend.DeleteRange(ctx, startKey, endKey) - batchWriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - batchWriteRequests.WithLabelValues(s.Component).Inc() + backendmetrics.BatchWriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.BatchWriteRequests.WithLabelValues(s.Component).Inc() if err != nil && !trace.IsNotFound(err) { - batchWriteRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.BatchWriteRequestsFailed.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpDelete, startKey, endKey) return err @@ -465,15 +468,15 @@ func (s *Reporter) KeepAlive(ctx context.Context, lease Lease, expires time.Time start := s.Clock().Now() err := s.Backend.KeepAlive(ctx, lease, expires) - writeLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) - writeRequests.WithLabelValues(s.Component).Inc() + backendmetrics.WriteLatencies.WithLabelValues(s.Component).Observe(s.Clock().Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues(s.Component).Inc() if err != nil { - writeRequestsFailed.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailed.WithLabelValues(s.Component).Inc() if trace.IsNotFound(err) { - writeRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() + backendmetrics.WriteRequestsFailedPrecondition.WithLabelValues(s.Component).Inc() } } else { - writes.WithLabelValues(s.Component).Inc() + backendmetrics.Writes.WithLabelValues(s.Component).Inc() } s.trackRequest(ctx, types.OpPut, lease.Key, Key{}) return err @@ -548,7 +551,7 @@ func (s *Reporter) trackRequest(ctx context.Context, opType types.OpType, key Ke s.topRequestsCache.Get(cacheKey) } - counter, err := requests.GetMetricWithLabelValues(s.Component, keyLabel, rangeSuffix) + counter, err := backendmetrics.Requests.GetMetricWithLabelValues(s.Component, keyLabel, rangeSuffix) if err != nil { slog.WarnContext(ctx, "Failed to get prometheus counter", "error", err) return @@ -631,8 +634,8 @@ func NewReporterWatcher(ctx context.Context, component string, w Watcher) *Repor } func (r *ReporterWatcher) watch(ctx context.Context) { - watchers.WithLabelValues(r.Component).Inc() - defer watchers.WithLabelValues(r.Component).Dec() + backendmetrics.Watchers.WithLabelValues(r.Component).Inc() + defer backendmetrics.Watchers.WithLabelValues(r.Component).Dec() select { case <-r.Done(): return @@ -641,209 +644,5 @@ func (r *ReporterWatcher) watch(ctx context.Context) { } } -var ( - requests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendRequests, - Help: "Number of requests to the backend (reads, writes, and keepalives)", - }, - []string{teleport.ComponentLabel, teleport.TagReq, teleport.TagRange}, - ) - watchers = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: teleport.MetricBackendWatchers, - Help: "Number of active backend watchers", - }, - []string{teleport.ComponentLabel}, - ) - watcherQueues = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: teleport.MetricBackendWatcherQueues, - Help: "Watcher queue sizes", - }, - []string{teleport.ComponentLabel}, - ) - writeRequests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendWriteRequests, - Help: "Number of write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - writes = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendWrites, - Help: "Number of individual items written to the backend", - }, - []string{teleport.ComponentLabel}, - ) - writeRequestsFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendWriteFailedRequests, - Help: "Number of failed write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - writeRequestsFailedPrecondition = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendWriteFailedPreconditionRequests, - Help: "Number of write requests that failed due to a precondition (existence, revision, value, etc)", - }, - []string{teleport.ComponentLabel}, - ) - atomicWriteRequests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteRequests, - Help: "Number of atomic write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - atomicWriteRequestsFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteFailedRequests, - Help: "Number of failed atomic write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - atomicWriteConditionFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteConditionFailed, - Help: "Number of times an atomic write request results in condition failure", - }, - []string{teleport.ComponentLabel}, - ) - atomicWriteLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteHistogram, - Help: "Latency for backend atomic write operations", - // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 - // highest bucket start of 0.001 sec * 2^15 == 32.768 sec - Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), - }, - []string{teleport.ComponentLabel}, - ) - atomicWriteSize = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteSize, - Help: "Atomic write batch size", - // buckets of the form 1, 2, 4, 8, 16, etc... - Buckets: prometheus.ExponentialBuckets(1, 2, int(math.Sqrt(MaxAtomicWriteSize))), - }, - []string{teleport.ComponentLabel}, - ) - AtomicWriteContention = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendAtomicWriteContention, - Help: "Number of times atomic write requests experience contention", - }, - []string{teleport.ComponentLabel}, - ) - batchWriteRequests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendBatchWriteRequests, - Help: "Number of batch write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - batchWriteRequestsFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendBatchFailedWriteRequests, - Help: "Number of failed write requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - readRequests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendReadRequests, - Help: "Number of read requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - reads = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: teleport.MetricNamespace, - Name: teleport.MetricBackendReads, - Help: "Number of individual items read from the backend", - }, - []string{teleport.ComponentLabel}, - ) - readRequestsFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendFailedReadRequests, - Help: "Number of failed read requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - batchReadRequests = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendBatchReadRequests, - Help: "Number of read requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - batchReadRequestsFailed = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: teleport.MetricBackendBatchFailedReadRequests, - Help: "Number of failed read requests to the backend", - }, - []string{teleport.ComponentLabel}, - ) - writeLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: teleport.MetricBackendWriteHistogram, - Help: "Latency for backend write operations", - // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 - // highest bucket start of 0.001 sec * 2^15 == 32.768 sec - Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), - }, - []string{teleport.ComponentLabel}, - ) - batchWriteLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: teleport.MetricBackendBatchWriteHistogram, - Help: "Latency for backend batch write operations", - // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 - // highest bucket start of 0.001 sec * 2^15 == 32.768 sec - Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), - }, - []string{teleport.ComponentLabel}, - ) - batchReadLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: teleport.MetricBackendBatchReadHistogram, - Help: "Latency for batch read operations", - // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 - // highest bucket start of 0.001 sec * 2^15 == 32.768 sec - Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), - }, - []string{teleport.ComponentLabel}, - ) - readLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: teleport.MetricBackendReadHistogram, - Help: "Latency for read operations", - // lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2 - // highest bucket start of 0.001 sec * 2^15 == 32.768 sec - Buckets: prometheus.ExponentialBuckets(0.001, 2, 16), - }, - []string{teleport.ComponentLabel}, - ) - - prometheusCollectors = []prometheus.Collector{ - watchers, watcherQueues, requests, writeRequests, - writeRequestsFailed, batchWriteRequests, batchWriteRequestsFailed, readRequests, - readRequestsFailed, batchReadRequests, batchReadRequestsFailed, writeLatencies, - writeRequestsFailedPrecondition, - atomicWriteRequests, atomicWriteRequestsFailed, atomicWriteConditionFailed, atomicWriteLatencies, - AtomicWriteContention, atomicWriteSize, reads, writes, - batchWriteLatencies, batchReadLatencies, readLatencies, - } -) +// TODO(tross): Remove once crdb is converted to use backendmetrics directly. +var AtomicWriteContention = backendmetrics.AtomicWriteContention diff --git a/lib/backend/report_test.go b/lib/backend/report_test.go index 41217fcf27213..42b79ba8a6825 100644 --- a/lib/backend/report_test.go +++ b/lib/backend/report_test.go @@ -28,6 +28,7 @@ import ( "github.com/stretchr/testify/require" "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/lib/backend/backendmetrics" ) func TestReporterTopRequestsLimit(t *testing.T) { @@ -44,7 +45,7 @@ func TestReporterTopRequestsLimit(t *testing.T) { countTopRequests := func() int { ch := make(chan prometheus.Metric) go func() { - requests.Collect(ch) + backendmetrics.Requests.Collect(ch) close(ch) }() @@ -54,7 +55,7 @@ func TestReporterTopRequestsLimit(t *testing.T) { } return int(count) } - t.Cleanup(requests.Reset) + t.Cleanup(backendmetrics.Requests.Reset) // At first, the metric should have no values. require.Equal(t, 0, countTopRequests()) diff --git a/lib/cache/cache.go b/lib/cache/cache.go index 37520f6488b1b..ac0a280f5c8c8 100644 --- a/lib/cache/cache.go +++ b/lib/cache/cache.go @@ -58,6 +58,7 @@ import ( "github.com/gravitational/teleport/api/types/userloginstate" "github.com/gravitational/teleport/api/utils/retryutils" "github.com/gravitational/teleport/lib/backend" + "github.com/gravitational/teleport/lib/backend/backendmetrics" "github.com/gravitational/teleport/lib/defaults" "github.com/gravitational/teleport/lib/observability/metrics" "github.com/gravitational/teleport/lib/observability/tracing" @@ -859,6 +860,8 @@ type Config struct { neverOK bool // Tracer is used to create spans Tracer oteltrace.Tracer + // Registerer is used to register prometheus metrics. + Registerer prometheus.Registerer // Unstarted indicates that the cache should not be started during New. The // cache is usable before it's started, but it will always hit the backend. Unstarted bool @@ -930,6 +933,9 @@ func (c *Config) CheckAndSetDefaults() error { if c.Tracer == nil { c.Tracer = tracing.NoopTracer(c.Component) } + if c.Registerer == nil { + c.Registerer = prometheus.DefaultRegisterer + } if c.FanoutShards == 0 { c.FanoutShards = 1 } @@ -962,7 +968,15 @@ const ( // New creates a new instance of Cache func New(config Config) (*Cache, error) { - if err := metrics.RegisterPrometheusCollectors( + if err := config.CheckAndSetDefaults(); err != nil { + return nil, trace.Wrap(err) + } + + if err := backendmetrics.RegisterCollectors(config.Registerer); err != nil { + return nil, trace.Wrap(err) + } + + if err := metrics.RegisterCollectors(config.Registerer, cacheEventsReceived, cacheStaleEventsReceived, cacheHealth, @@ -970,9 +984,6 @@ func New(config Config) (*Cache, error) { ); err != nil { return nil, trace.Wrap(err) } - if err := config.CheckAndSetDefaults(); err != nil { - return nil, trace.Wrap(err) - } clusterConfigCache, err := local.NewClusterConfigurationService(config.Backend) if err != nil { diff --git a/lib/cache/generic_operations_test.go b/lib/cache/generic_operations_test.go index 5fe08828efc67..dcb12c4eea6fd 100644 --- a/lib/cache/generic_operations_test.go +++ b/lib/cache/generic_operations_test.go @@ -32,6 +32,7 @@ func TestGetter(t *testing.T) { t.Cleanup(p.Close) store := newStore( + types.KindRole, func(role types.Role) types.Role { return role }, @@ -90,6 +91,7 @@ func TestLister(t *testing.T) { t.Cleanup(p.Close) store := newStore( + types.KindRole, func(role types.Role) types.Role { return role }, diff --git a/lib/cache/store.go b/lib/cache/store.go index a605ab75c21d5..ccb4b99bb6b0e 100644 --- a/lib/cache/store.go +++ b/lib/cache/store.go @@ -18,14 +18,17 @@ package cache import ( "iter" + "time" "github.com/gravitational/trace" + "github.com/gravitational/teleport/lib/backend/backendmetrics" "github.com/gravitational/teleport/lib/utils/sortcache" ) // store persists cached resources directly in memory. type store[T any, I comparable] struct { + kind string cache *sortcache.SortCache[T, I] clone func(T) T indexes map[I]func(T) string @@ -33,8 +36,9 @@ type store[T any, I comparable] struct { // newStore creates a store that will index the resource // based on the provided indexes. -func newStore[T any, I comparable](clone func(T) T, indexes map[I]func(T) string) *store[T, I] { +func newStore[T any, I comparable](kind string, clone func(T) T, indexes map[I]func(T) string) *store[T, I] { return &store[T, I]{ + kind: kind, clone: clone, indexes: indexes, cache: sortcache.New(sortcache.Config[T, I]{ @@ -45,21 +49,33 @@ func newStore[T any, I comparable](clone func(T) T, indexes map[I]func(T) string // clear removes all items from the store. func (s *store[T, I]) clear() error { + start := time.Now() s.cache.Clear() + backendmetrics.BatchWriteLatencies.WithLabelValues("cache").Observe(time.Since(start).Seconds()) + backendmetrics.BatchWriteRequests.WithLabelValues("cache").Inc() + backendmetrics.Requests.WithLabelValues("cache", s.kind, "true").Inc() return nil } // put adds a new item, or updates an existing item. func (s *store[T, I]) put(t T) error { + start := time.Now() s.cache.Put(s.clone(t)) + backendmetrics.WriteLatencies.WithLabelValues("cache").Observe(time.Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues("cache").Inc() + backendmetrics.Requests.WithLabelValues("cache", s.kind, "false").Inc() return nil } // delete removes the provided item if any of the indexes match. func (s *store[T, I]) delete(t T) error { + start := time.Now() for idx, transform := range s.indexes { s.cache.Delete(idx, transform(t)) } + backendmetrics.WriteLatencies.WithLabelValues("cache").Observe(time.Since(start).Seconds()) + backendmetrics.WriteRequests.WithLabelValues("cache").Inc() + backendmetrics.Requests.WithLabelValues("cache", s.kind, "false").Inc() return nil } @@ -75,9 +91,14 @@ func (s *store[T, I]) len() int { // It is the responsibility of the caller to clone the resource // before propagating it further. func (s *store[T, I]) get(index I, key string) (T, error) { + start := time.Now() t, ok := s.cache.Get(index, key) + backendmetrics.ReadLatencies.WithLabelValues("cache").Observe(time.Since(start).Seconds()) + backendmetrics.ReadRequests.WithLabelValues("cache").Inc() + backendmetrics.Requests.WithLabelValues("cache", s.kind, "false").Inc() if !ok { - return t, trace.NotFound("no value for key %q in index %v", key, index) + backendmetrics.ReadRequestsFailed.WithLabelValues("cache").Inc() + return t, trace.NotFound("%q %q does not exist", s.kind, key) } return t, nil @@ -89,7 +110,19 @@ func (s *store[T, I]) get(index I, key string) (T, error) { // It is the responsibility of the caller to clone the resource // before propagating it further. func (s *store[T, I]) resources(index I, start, stop string) iter.Seq[T] { - return s.cache.Ascend(index, start, stop) + return func(yield func(T) bool) { + defer func() { + backendmetrics.StreamingRequests.WithLabelValues("cache").Inc() + backendmetrics.Requests.WithLabelValues("cache", s.kind, "false").Inc() + }() + + for t := range s.cache.Ascend(index, start, stop) { + backendmetrics.ReadRequests.WithLabelValues("cache").Inc() + if !yield(t) { + return + } + } + } } // count returns the number of items that exist in the provided range. diff --git a/lib/cache/store_test.go b/lib/cache/store_test.go index 0a5d956f9d65c..254489cb55711 100644 --- a/lib/cache/store_test.go +++ b/lib/cache/store_test.go @@ -27,6 +27,7 @@ import ( func TestResourceStore(t *testing.T) { store := newStore( + "int", func(i int) int { return i }, map[string]func(i int) string{ "numbers": strconv.Itoa, @@ -43,7 +44,7 @@ func TestResourceStore(t *testing.T) { require.Equal(t, 0, zero) n, err := store.get("numbers", "1000") - require.ErrorIs(t, err, &trace.NotFoundError{Message: `no value for key "1000" in index numbers`}) + require.ErrorIs(t, err, &trace.NotFoundError{Message: `"int" "1000" does not exist`}) require.Equal(t, 0, n) require.Equal(t, 2, store.count("numbers", "1", "100")) @@ -59,12 +60,12 @@ func TestResourceStore(t *testing.T) { require.NoError(t, store.delete(0)) _, err = store.get("numbers", "0") - require.ErrorIs(t, err, &trace.NotFoundError{Message: `no value for key "0" in index numbers`}) + require.ErrorIs(t, err, &trace.NotFoundError{Message: `"int" "0" does not exist`}) require.NoError(t, store.clear()) _, err = store.get("numbers", "0") - require.ErrorIs(t, err, &trace.NotFoundError{Message: `no value for key "0" in index numbers`}) + require.ErrorIs(t, err, &trace.NotFoundError{Message: `"int" "0" does not exist`}) require.Zero(t, store.len()) require.Zero(t, store.count("numbers", "1", "100")) diff --git a/lib/observability/metrics/prometheus.go b/lib/observability/metrics/prometheus.go index 8085be0ef19b7..d190c7ca1f5dc 100644 --- a/lib/observability/metrics/prometheus.go +++ b/lib/observability/metrics/prometheus.go @@ -34,6 +34,13 @@ import ( // - returns an error if a collector does not fulfill the consistency and // uniqueness criteria func RegisterPrometheusCollectors(collectors ...prometheus.Collector) error { + return RegisterCollectors(prometheus.DefaultRegisterer, collectors...) +} + +// RegisterCollectors registers the collectors in the registry. Any errors +// for already registered collectors are ignored. Errors are only returned +// if a collector does not fulfill the consistency and uniqueness criteria. +func RegisterCollectors(registry prometheus.Registerer, collectors ...prometheus.Collector) error { var errs []error for _, c := range collectors { if err := prometheus.Register(c); err != nil { diff --git a/lib/service/service.go b/lib/service/service.go index a3359b8d4bac7..37818cc78fb2e 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -2620,6 +2620,7 @@ func (process *TeleportProcess) newAccessCacheForServices(cfg accesspoint.Config cfg.ProcessID = process.id cfg.TracingProvider = process.TracingProvider cfg.MaxRetryPeriod = process.Config.CachePolicy.MaxRetryPeriod + cfg.Registerer = process.metricsRegistry cfg.Access = services.Access cfg.AccessLists = services.AccessLists @@ -6316,9 +6317,10 @@ func (process *TeleportProcess) initAuthStorage() (backend.Backend, error) { } reporter, err := backend.NewReporter(backend.ReporterConfig{ - Component: teleport.ComponentBackend, - Backend: backend.NewSanitizer(bk), - Tracer: process.TracingProvider.Tracer(teleport.ComponentBackend), + Component: teleport.ComponentBackend, + Backend: backend.NewSanitizer(bk), + Tracer: process.TracingProvider.Tracer(teleport.ComponentBackend), + Registerer: process.metricsRegistry, }) if err != nil { return nil, trace.Wrap(err)