diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ad0505f1..d8d9cb0f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ The following emojis are used to highlight certain changes: ### Added +- `bitswap/network/httpnet`: New `WithMetricsLabelsForEndpoints` allows defining which hosts/endpoints can be used for labelling metrics that support such label. '*' enables this for all endpoints receiving HTTP requests, but may cause metric cardinality growth when too many endpoints exist. These labels allow tracking, for example, number or requests per response status AND endpoint used. Non-labelled request hosts are labelled with same value: `other`. + ### Changed - `DontHaveTimeoutConfig`'s default `MinTimeout` is changed from `0` to `25ms`. diff --git a/bitswap/network/httpnet/httpnet.go b/bitswap/network/httpnet/httpnet.go index 417f91709..16a6a1f0d 100644 --- a/bitswap/network/httpnet/httpnet.go +++ b/bitswap/network/httpnet/httpnet.go @@ -191,6 +191,23 @@ func WithMaxDontHaveErrors(threshold int) Option { } } +// WithMetricsLabelsForHosts allows to label some metrics that support it +// with the endpoint name that they relate to. For example, this allows +// tracking respose statuses by endpoint. Using '*' means that all endpoints +// are tracked. By default, no endpoints are tracked. Endpoints that are not +// tracked are assigned the label "other". In a scenario where we are making +// requests to many different endpoints, logging all of them with '*' can +// cause the metric cardinality to grow accordingly, and end up affecting +// the performance of the metrics collector (i.e. Prometheus). +func WithMetricsLabelsForEndpoints(hosts []string) Option { + return func(net *Network) { + net.trackedEndpoints = make(map[string]struct{}) + for _, h := range hosts { + net.trackedEndpoints[h] = struct{}{} + } + } +} + type Network struct { // NOTE: Stats must be at the top of the heap allocation to ensure 64bit // alignment. @@ -221,6 +238,7 @@ type Network struct { httpWorkers int allowlist map[string]struct{} denylist map[string]struct{} + trackedEndpoints map[string]struct{} metrics *metrics httpRequests chan httpRequestInfo @@ -262,8 +280,7 @@ func New(host host.Host, opts ...Option) network.BitSwapNetwork { opt(htnet) } - // TODO: take allowlist into account! - htnet.metrics = newMetrics(htnet.allowlist) + htnet.metrics = newMetrics(htnet.trackedEndpoints) reqTracker := newRequestTracker() htnet.requestTracker = reqTracker diff --git a/bitswap/network/httpnet/metrics.go b/bitswap/network/httpnet/metrics.go index 077aa727d..592803c14 100644 --- a/bitswap/network/httpnet/metrics.go +++ b/bitswap/network/httpnet/metrics.go @@ -56,7 +56,9 @@ func status(ctx context.Context) imetrics.CounterVec { } type metrics struct { - trackedEndpoints map[string]struct{} + trackedEndpoints map[string]struct{} + trackAllEndpoints bool // avoid unnecessary map-lookups + trackSomeEndpoints bool // avoid unnecessary map-lookups RequestsInFlight imetrics.Gauge RequestsTotal imetrics.Counter @@ -74,7 +76,12 @@ type metrics struct { func newMetrics(endpoints map[string]struct{}) *metrics { ctx := imetrics.CtxScope(context.Background(), "exchange_httpnet") + _, trackAll := endpoints["*"] + trackSome := len(endpoints) > 0 && !trackAll + return &metrics{ + trackAllEndpoints: trackAll, + trackSomeEndpoints: trackSome, trackedEndpoints: endpoints, RequestsInFlight: requestsInFlight(ctx), RequestsTotal: requestsTotal(ctx), @@ -93,10 +100,17 @@ func newMetrics(endpoints map[string]struct{}) *metrics { func (m *metrics) updateStatusCounter(method string, statusCode int, host string) { m.RequestsTotal.Inc() - // Track all for the moment. - // if _, ok := m.trackedEndpoints[host]; !ok { - // host = "other" - // } + // Set host == other if we are: + // - not tracking all hosts + // - not tracking any hosts + // - the host is not among those we are tracking + if !m.trackAllEndpoints { + if !m.trackSomeEndpoints { + host = "other" + } else if _, ok := m.trackedEndpoints[host]; !ok { + host = "other" + } + } var statusStr string