Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions go/internal/services/keys/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"database/sql"
"fmt"
"strconv"
"strings"
"time"

Expand All @@ -16,7 +15,6 @@ import (
"github.com/unkeyed/unkey/go/pkg/codes"
"github.com/unkeyed/unkey/go/pkg/fault"
"github.com/unkeyed/unkey/go/pkg/otel/tracing"
"github.com/unkeyed/unkey/go/pkg/prometheus/metrics"
"github.com/unkeyed/unkey/go/pkg/ptr"
"github.com/unkeyed/unkey/go/pkg/rbac"
)
Expand Down Expand Up @@ -44,26 +42,6 @@ func (k *KeyVerifier) withCredits(ctx context.Context, cost int32) error {
k.setInvalid(StatusUsageExceeded, "Key usage limit exceeded.")
}

// Emit Prometheus metrics for credits spent
identityID := ""
if k.Key.IdentityID.Valid {
identityID = k.Key.IdentityID.String
}

// Credits are deducted when usage is valid AND cost > 0
deducted := usage.Valid && cost > 0
actualCostDeducted := int32(0)
if deducted {
actualCostDeducted = cost
}

metrics.KeyCreditsSpentTotal.WithLabelValues(
k.AuthorizedWorkspaceID, // workspace_id
k.Key.ID, // key_id
identityID, // identity_id
strconv.FormatBool(deducted), // deducted - whether credits were actually deducted
).Add(float64(actualCostDeducted)) // Add the actual amount deducted, not the requested cost

return nil
}

Expand Down
3 changes: 2 additions & 1 deletion go/pkg/circuitbreaker/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/unkeyed/unkey/go/pkg/clock"
"github.com/unkeyed/unkey/go/pkg/otel/logging"
"github.com/unkeyed/unkey/go/pkg/otel/tracing"
"github.com/unkeyed/unkey/go/pkg/prometheus/metrics"
)

type CB[Res any] struct {
Expand Down Expand Up @@ -198,7 +199,7 @@ func (cb *CB[Res]) preflight(ctx context.Context) error {
cb.resetStateAt = now.Add(cb.config.timeout)
}

requests.WithLabelValues(cb.config.name, string(cb.state)).Inc()
metrics.CircuitBreakerRequests.WithLabelValues(cb.config.name, string(cb.state)).Inc()

if cb.state == Open {
return ErrTripped
Expand Down
15 changes: 0 additions & 15 deletions go/pkg/circuitbreaker/metrics.go

This file was deleted.

20 changes: 10 additions & 10 deletions go/pkg/db/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ func (r *Replica) ExecContext(ctx context.Context, query string, args ...interfa
status = "error"
}

metrics.DatabaseOperationLatency.WithLabelValues(r.mode, "exec", status).Observe(duration)
metrics.DatabaseOperationTotal.WithLabelValues(r.mode, "exec", status).Inc()
metrics.DatabaseOperationsLatency.WithLabelValues(r.mode, "exec", status).Observe(duration)
metrics.DatabaseOperationsTotal.WithLabelValues(r.mode, "exec", status).Inc()

return result, err
}
Expand All @@ -69,8 +69,8 @@ func (r *Replica) PrepareContext(ctx context.Context, query string) (*sql.Stmt,
status = "error"
}

metrics.DatabaseOperationLatency.WithLabelValues(r.mode, "prepare", status).Observe(duration)
metrics.DatabaseOperationTotal.WithLabelValues(r.mode, "prepare", status).Inc()
metrics.DatabaseOperationsLatency.WithLabelValues(r.mode, "prepare", status).Observe(duration)
metrics.DatabaseOperationsTotal.WithLabelValues(r.mode, "prepare", status).Inc()

return stmt, err // nolint:sqlclosecheck
}
Expand All @@ -95,8 +95,8 @@ func (r *Replica) QueryContext(ctx context.Context, query string, args ...interf
status = "error"
}

metrics.DatabaseOperationLatency.WithLabelValues(r.mode, "query", status).Observe(duration)
metrics.DatabaseOperationTotal.WithLabelValues(r.mode, "query", status).Inc()
metrics.DatabaseOperationsLatency.WithLabelValues(r.mode, "query", status).Observe(duration)
metrics.DatabaseOperationsTotal.WithLabelValues(r.mode, "query", status).Inc()

return rows, err // nolint:sqlclosecheck
}
Expand All @@ -119,8 +119,8 @@ func (r *Replica) QueryRowContext(ctx context.Context, query string, args ...int
// QueryRowContext doesn't return an error, but we can still track timing
status := "success"

metrics.DatabaseOperationLatency.WithLabelValues(r.mode, "query_row", status).Observe(duration)
metrics.DatabaseOperationTotal.WithLabelValues(r.mode, "query_row", status).Inc()
metrics.DatabaseOperationsLatency.WithLabelValues(r.mode, "query_row", status).Observe(duration)
metrics.DatabaseOperationsTotal.WithLabelValues(r.mode, "query_row", status).Inc()

return row
}
Expand All @@ -143,8 +143,8 @@ func (r *Replica) Begin(ctx context.Context) (*sql.Tx, error) {
status = "error"
}

metrics.DatabaseOperationLatency.WithLabelValues(r.mode, "begin", status).Observe(duration)
metrics.DatabaseOperationTotal.WithLabelValues(r.mode, "begin", status).Inc()
metrics.DatabaseOperationsLatency.WithLabelValues(r.mode, "begin", status).Observe(duration)
metrics.DatabaseOperationsTotal.WithLabelValues(r.mode, "begin", status).Inc()

return tx, err
}
17 changes: 17 additions & 0 deletions go/pkg/prometheus/metrics/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,21 @@ var (
},
[]string{"name"},
)

// BatchItemsProcessedErrorsTotal tracks the total number of items that resulted in errors
// during batch processing, labeled by batch name.
// Use this counter to monitor error rates in batch processing and identify problematic batches.
//
// Example usage:
// metrics.BatchItemsProcessedErrorsTotal.WithLabelValues("database_writes").Add(float64(errorCount))
BatchItemsProcessedErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "batch",
Name: "items_processed_errors_total",
Help: "Total number of items processed through batches that resulted in an error",
ConstLabels: constLabels,
},
[]string{"name"},
)
)
16 changes: 16 additions & 0 deletions go/pkg/prometheus/metrics/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,20 @@ var (
},
[]string{"name", "drop"},
)

// BufferErrorsTotal tracks the total number of buffer operation errors,
// labeled by buffer name and error type. Use this counter to monitor buffer error rates.
//
// Example usage:
// metrics.BufferErrorsTotal.WithLabelValues("batch_writer", "write_failed").Inc()
BufferErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "buffer",
Name: "errors_total",
Help: "Total number of buffer operation errors by name and state.",
ConstLabels: constLabels,
},
[]string{"name", "state"},
)
)
38 changes: 38 additions & 0 deletions go/pkg/prometheus/metrics/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ var (
// metrics.CacheHits.WithLabelValues("user_profile")
CacheReads = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "reads_total",
Help: "Number of cache reads by resource type and hit status.",
Expand All @@ -35,6 +36,7 @@ var (
// metrics.CacheWrites.WithLabelValues("user_profile").Set(float64(writeCount))
CacheWrites = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "writes",
Help: "Number of cache writes by resource type.",
Expand All @@ -52,6 +54,7 @@ var (
// metrics.CacheDeleted.WithLabelValues("user_profile", "capacity").Set(float64(evictionCount))
CacheDeleted = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "deleted_total",
Help: "Number of cache entries deleted by resource type and reason.",
Expand All @@ -67,6 +70,7 @@ var (
// metrics.CacheSize.WithLabelValues("user_profile").Set(float64(cacheSize))
CacheSize = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "size",
Help: "Current number of entries in the cache by resource type.",
Expand All @@ -82,6 +86,7 @@ var (
// metrics.CacheCapacity.WithLabelValues("user_profile").Set(float64(cacheCapacity))
CacheCapacity = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "capacity",
Help: "Maximum capacity of the cache by resource type.",
Expand All @@ -97,11 +102,44 @@ var (
// metrics.CacheRevalidations.WithLabelValues("user_profile").Inc()
CacheRevalidations = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "revalidations_total",
Help: "Total number of cache revalidations by resource type.",
ConstLabels: constLabels,
},
[]string{"resource"},
)

// CacheReadsErrorsTotal tracks the total number of cache read errors,
// labeled by resource type. Use this counter to monitor cache read error rates.
//
// Example usage:
// metrics.CacheReadsErrorsTotal.WithLabelValues("user_profile").Inc()
CacheReadsErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "reads_errors_total",
Help: "Total number of cache read errors by resource type.",
ConstLabels: constLabels,
},
[]string{"resource"},
)

// CacheRevalidationsErrorsTotal tracks the total number of cache revalidation errors,
// labeled by resource type. Use this counter to monitor cache revalidation error rates.
//
// Example usage:
// metrics.CacheRevalidationsErrorsTotal.WithLabelValues("user_profile").Inc()
CacheRevalidationsErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "cache",
Name: "revalidations_errors_total",
Help: "Total number of cache revalidation errors by resource type.",
ConstLabels: constLabels,
},
[]string{"resource"},
)
)
34 changes: 34 additions & 0 deletions go/pkg/prometheus/metrics/chproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var (
// metrics.ChproxyRequestsTotal.WithLabelValues("verifications").Inc()
ChproxyRequestsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "chproxy",
Name: "requests_total",
Help: "Total number of ClickHouse proxy requests processed.",
Expand All @@ -26,18 +27,51 @@ var (
[]string{"endpoint"},
)

// ChproxyErrorsTotal tracks the total number of errors encountered by ClickHouse proxy,
// labeled by endpoint. Use this counter to monitor error rates and identify problematic endpoints.
//
// Example usage:
// metrics.ChproxyErrorsTotal.WithLabelValues("verifications").Inc()
ChproxyErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "chproxy",
Name: "errors_total",
Help: "Total number of errors encountered by ClickHouse proxy.",
ConstLabels: constLabels,
},
[]string{"endpoint"},
)

// ChproxyRowsTotal tracks the total number of rows/events received in chproxy requests.
// Use this counter to monitor data volume and ingestion patterns.
//
// Example usage:
// metrics.ChproxyRowsTotal.WithLabelValues("verifications").Add(float64(len(events)))
ChproxyRowsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "chproxy",
Name: "rows_total",
Help: "Total number of rows/events processed by ClickHouse proxy.",
ConstLabels: constLabels,
},
[]string{"endpoint"},
)

// ChproxyRowsErrorsTotal tracks the total number of row processing errors in ClickHouse proxy,
// labeled by endpoint. Use this counter to monitor row processing error rates.
//
// Example usage:
// metrics.ChproxyRowsErrorsTotal.WithLabelValues("verifications").Inc()
ChproxyRowsErrorsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "chproxy",
Name: "rows_errors_total",
Help: "Total number of row processing errors in ClickHouse proxy.",
ConstLabels: constLabels,
},
[]string{"endpoint"},
)
)
16 changes: 15 additions & 1 deletion go/pkg/prometheus/metrics/circuitbreaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,23 @@ var (
// Example usage:
// metrics.CircuitBreakerRequests.WithLabelValues("my_circuit_breaker", "open").Inc()
CircuitBreakerRequests = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "circuitbreaker",
Name: "requests_total",
Help: "Tracks the number of requests made to the circuitbreaker by state.",
ConstLabels: constLabels,
}, []string{"name", "state"})
}, []string{"service", "action"})

// CircuitBreakerErrorsTotal tracks the total number of circuit breaker errors,
// labeled by service and action. Use this counter to monitor circuit breaker error rates.
//
// Example usage:
// metrics.CircuitBreakerErrorsTotal.WithLabelValues("database", "timeout").Inc()
CircuitBreakerErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "circuitbreaker",
Name: "errors_total",
Help: "Total number of circuit breaker errors by service and action.",
ConstLabels: constLabels,
}, []string{"service", "action"})
)
26 changes: 21 additions & 5 deletions go/pkg/prometheus/metrics/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,20 @@ import (
)

var (
// DatabaseOperationLatency tracks database operation latencies as a histogram,
// DatabaseOperationsLatency tracks database operation latencies as a histogram,
// labeled by replica type (rw/ro), operation type, and success status.
// This collector uses predefined buckets optimized for typical database operation latencies.
//
// Example usage:
// timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
// metrics.DatabaseOperationLatency.WithLabelValues("rw", "exec", "success").Observe(v)
// metrics.DatabaseOperationsLatency.WithLabelValues("rw", "exec", "success").Observe(v)
// }))
// defer timer.ObserveDuration()
DatabaseOperationLatency = promauto.NewHistogramVec(
DatabaseOperationsLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "unkey",
Subsystem: "database",
Name: "operation_latency_seconds",
Name: "operations_latency_seconds",
Help: "Histogram of database operation latencies in seconds.",
Buckets: latencyBuckets,
ConstLabels: constLabels,
Expand All @@ -39,13 +40,28 @@ var (
// Example usage:
// metrics.DatabaseOperationTotal.WithLabelValues("rw", "exec", "success").Inc()
// metrics.DatabaseOperationTotal.WithLabelValues("ro", "query", "error").Inc()
DatabaseOperationTotal = promauto.NewCounterVec(
DatabaseOperationsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "database",
Name: "operations_total",
Help: "Total number of database operations processed.",
ConstLabels: constLabels,
},
[]string{"replica", "operation", "status"},
)

// DatabaseOperationsErrorsTotal tracks the total number of database operation errors,
// labeled by replica type (rw/ro), and operation type.
// Use this counter to monitor database error rates and identify problematic operations.
//
// Example usage:
// metrics.DatabaseOperationsErrorsTotal.WithLabelValues("rw", "exec").Inc()
DatabaseOperationsErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "unkey",
Subsystem: "database",
Name: "operations_errors_total",
Help: "Total number of database operation errors.",
ConstLabels: constLabels,
}, []string{"replica", "operation"})
)
Loading
Loading