Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions lib/msgraph/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/google/uuid"
"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"
"github.com/prometheus/client_golang/prometheus"

"github.com/gravitational/teleport"
apidefaults "github.com/gravitational/teleport/api/defaults"
Expand Down Expand Up @@ -98,6 +99,9 @@ type Config struct {
// GraphEndpoint specifies root domain of the Graph API.
GraphEndpoint string
Logger *slog.Logger
// MetricsRegistry configures where metrics should be registered.
// When nil, metrics are created but not registered.
MetricsRegistry prometheus.Registerer
}

// SetDefaults sets the default values for optional fields.
Expand Down Expand Up @@ -144,6 +148,7 @@ type Client struct {
baseURL *url.URL
pageSize int
logger *slog.Logger
metrics *clientMetrics
}

// NewClient returns a new client for the given config.
Expand All @@ -156,6 +161,15 @@ func NewClient(cfg Config) (*Client, error) {
if err != nil {
return nil, trace.Wrap(err)
}

metrics := newMetrics()
// gracefully handle not being given a metric registry
if cfg.MetricsRegistry != nil {
if err := metrics.register(cfg.MetricsRegistry); err != nil {
return nil, trace.Wrap(err, "registering metrics")
}
}

return &Client{
httpClient: cfg.HTTPClient,
tokenProvider: cfg.TokenProvider,
Expand All @@ -164,6 +178,7 @@ func NewClient(cfg Config) (*Client, error) {
baseURL: base.JoinPath(graphVersion),
pageSize: cfg.PageSize,
logger: cfg.Logger,
metrics: metrics,
}, nil
}

Expand Down Expand Up @@ -201,6 +216,7 @@ func (c *Client) request(ctx context.Context, method string, uri string, header
}

var lastErr error
var start time.Time
for range maxRetries {
if retryAfter > 0 {
select {
Expand Down Expand Up @@ -231,10 +247,13 @@ func (c *Client) request(ctx context.Context, method string, uri string, header
// https://learn.microsoft.com/en-us/graph/best-practices-concept#reliability-and-support
req.Header.Set("client-request-id", requestID)

start = c.clock.Now()
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, trace.Wrap(err) // hard I/O error, bail
}
c.metrics.requestDuration.WithLabelValues(method).Observe(c.clock.Since(start).Seconds())
c.metrics.requestTotal.WithLabelValues(method, strconv.Itoa(resp.StatusCode))

if resp.StatusCode >= 200 && resp.StatusCode < 400 {
return resp, nil
Expand Down
64 changes: 64 additions & 0 deletions lib/msgraph/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Teleport
* Copyright (C) 2025 Gravitational, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package msgraph

import (
"github.com/gravitational/trace"
"github.com/prometheus/client_golang/prometheus"

"github.com/gravitational/teleport"
)

type clientMetrics struct {
// requestsTotal keeps track of the number of requests done by the client
// This metric is labeled by status code.
requestTotal *prometheus.CounterVec
// requestDuration keeps track of the request duration, in seconds.
requestDuration *prometheus.HistogramVec
}

const (
metricSubsystem = "msgraph"
metricsLabelStatus = "status"
metricsLabelsMethod = "method"
)

func newMetrics() *clientMetrics {
return &clientMetrics{
requestTotal: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: teleport.MetricNamespace,
Subsystem: metricSubsystem,
Name: "request_total",
Help: "Total number of requests made to MS Graph",
}, []string{metricsLabelsMethod, metricsLabelStatus}),
requestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: teleport.MetricNamespace,
Subsystem: metricSubsystem,
Name: "request_duration_seconds",
Help: "Request to MS Graph duration in seconds.",
}, []string{metricsLabelsMethod}),
}
Comment on lines +43 to +56
Copy link
Copy Markdown
Contributor

@smallinsky smallinsky Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will not scale in long run - we will keep implementing third party api SDK metrics for each external client. So each SDK will have custom metrics implemented in different way - But it it better than not having metric at all.

If we have a moment I would like to consider building generic third party API metrics in generic way:

#40579

But I don't see this as a blocker for this PR, but if possible we can wrap up #40579 and have a generic metrics arch shared across very SDK client.

WDYT ?

Copy link
Copy Markdown
Contributor Author

@hugoShaka hugoShaka Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I considered doing this but this raises the problem of cardinality. We need some way to mask variable parts of the URI. We have a similar problem in the backend with our metrics and we have not solved it yet. I wanted to get metrics without refactoring the whole client to add a new endpoint struct containing both the endpoint parts and the metrics mask

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My vote would be to start simple and iterate. The generic approach sounds nice at first, but adds a number of complexities that we don't need to solve today.

}

func (metrics *clientMetrics) register(r prometheus.Registerer) error {
return trace.NewAggregate(
r.Register(metrics.requestTotal),
r.Register(metrics.requestDuration),
)
}
Loading