diff --git a/auth/go.mod b/auth/go.mod index c8ea0a24318d..fb8497e0e76e 100644 --- a/auth/go.mod +++ b/auth/go.mod @@ -7,7 +7,7 @@ require ( github.com/google/go-cmp v0.7.0 github.com/google/s2a-go v0.1.9 github.com/googleapis/enterprise-certificate-proxy v0.3.14 - github.com/googleapis/gax-go/v2 v2.17.0 + github.com/googleapis/gax-go/v2 v2.18.0 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 go.opentelemetry.io/otel v1.40.0 @@ -28,9 +28,10 @@ require ( go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/otel/metric v1.40.0 // indirect golang.org/x/crypto v0.48.0 // indirect - golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.41.0 // indirect golang.org/x/text v0.34.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect + google.golang.org/api v0.267.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d // indirect ) diff --git a/auth/go.sum b/auth/go.sum index 6176e7353d91..836aebf2395b 100644 --- a/auth/go.sum +++ b/auth/go.sum @@ -28,8 +28,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.14 h1:yh8ncqsbUY4shRD5dA6RlzjJaT4hi3kII+zYw8wmLb8= github.com/googleapis/enterprise-certificate-proxy v0.3.14/go.mod h1:vqVt9yG9480NtzREnTlmGSBmFrA+bzb0yl0TxoBQXOg= -github.com/googleapis/gax-go/v2 v2.17.0 h1:RksgfBpxqff0EZkDWYuz9q/uWsTVz+kf43LsZ1J6SMc= -github.com/googleapis/gax-go/v2 v2.17.0/go.mod h1:mzaqghpQp4JDh3HvADwrat+6M3MOIDp5YKHhb9PAgDY= +github.com/googleapis/gax-go/v2 v2.18.0 h1:jxP5Uuo3bxm3M6gGtV94P4lliVetoCB4Wk2x8QA86LI= +github.com/googleapis/gax-go/v2 v2.18.0/go.mod h1:uSzZN4a356eRG985CzJ3WfbFSpqkLTjsnhWGJR6EwrE= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -58,8 +58,8 @@ golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo= golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y= -golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= -golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= @@ -70,8 +70,14 @@ golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 h1:H86B94AW+VfJWDqFeEbBPhEtHzJwJfTbgE2lZa54ZAQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/api v0.267.0 h1:w+vfWPMPYeRs8qH1aYYsFX68jMls5acWl/jocfLomwE= +google.golang.org/api v0.267.0/go.mod h1:Jzc0+ZfLnyvXma3UtaTl023TdhZu6OMBP9tJ+0EmFD0= +google.golang.org/genproto v0.0.0-20260217215200-42d3e9bedb6d h1:vsOm753cOAMkt76efriTCDKjpCbK18XGHMJHo0JUKhc= +google.golang.org/genproto v0.0.0-20260217215200-42d3e9bedb6d/go.mod h1:0oz9d7g9QLSdv9/lgbIjowW1JoxMbxmBVNe8i6tORJI= +google.golang.org/genproto/googleapis/api v0.0.0-20260217215200-42d3e9bedb6d h1:EocjzKLywydp5uZ5tJ79iP6Q0UjDnyiHkGRWxuPBP8s= +google.golang.org/genproto/googleapis/api v0.0.0-20260217215200-42d3e9bedb6d/go.mod h1:48U2I+QQUYhsFrg2SY6r+nJzeOtjey7j//WBESw+qyQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d h1:t/LOSXPJ9R0B6fnZNyALBRfZBH0Uy0gT+uR+SJ6syqQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= google.golang.org/grpc v1.79.2 h1:fRMD94s2tITpyJGtBBn7MkMseNpOZU8ZxgC3MMBaXRU= google.golang.org/grpc v1.79.2/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= diff --git a/auth/grpctransport/grpctransport.go b/auth/grpctransport/grpctransport.go index 9b4b1f06e889..1792c47286d3 100644 --- a/auth/grpctransport/grpctransport.go +++ b/auth/grpctransport/grpctransport.go @@ -24,17 +24,25 @@ import ( "log/slog" "net/http" "os" + "strconv" "cloud.google.com/go/auth" "cloud.google.com/go/auth/credentials" "cloud.google.com/go/auth/internal" "cloud.google.com/go/auth/internal/transport" "cloud.google.com/go/auth/internal/transport/headers" + "github.com/googleapis/gax-go/v2" + "github.com/googleapis/gax-go/v2/callctx" "github.com/googleapis/gax-go/v2/internallog" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" + "google.golang.org/grpc/codes" grpccreds "google.golang.org/grpc/credentials" grpcinsecure "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/stats" + "google.golang.org/grpc/status" ) const ( @@ -47,6 +55,30 @@ const ( quotaProjectHeaderKey = "X-goog-user-project" ) +// codeToStr is a reversal of the `strToCode` map in +// https://github.com/grpc/grpc-go/blob/master/codes/codes.go +// The gRPC specification has exactly 17 status codes, defined +// as a contiguous block of integers from 0 to 16. +var codeToStr = [...]string{ + "OK", // codes.OK = 0 + "CANCELED", // codes.Canceled = 1 + "UNKNOWN", // codes.Unknown = 2 + "INVALID_ARGUMENT", // codes.InvalidArgument = 3 + "DEADLINE_EXCEEDED", // codes.DeadlineExceeded = 4 + "NOT_FOUND", // codes.NotFound = 5 + "ALREADY_EXISTS", // codes.AlreadyExists = 6 + "PERMISSION_DENIED", // codes.PermissionDenied = 7 + "RESOURCE_EXHAUSTED", // codes.ResourceExhausted = 8 + "FAILED_PRECONDITION", // codes.FailedPrecondition = 9 + "ABORTED", // codes.Aborted = 10 + "OUT_OF_RANGE", // codes.OutOfRange = 11 + "UNIMPLEMENTED", // codes.Unimplemented = 12 + "INTERNAL", // codes.Internal = 13 + "UNAVAILABLE", // codes.Unavailable = 14 + "DATA_LOSS", // codes.DataLoss = 15 + "UNAUTHENTICATED", // codes.Unauthenticated = 16 +} + var ( // Set at init time by dial_socketopt.go. If nil, socketopt is not supported. timeoutDialerOption grpc.DialOption @@ -198,7 +230,7 @@ type InternalOptions struct { // service. DefaultScopes []string // SkipValidation bypasses validation on Options. It should only be used - // internally for clients that needs more control over their transport. + // internally for clients that need more control over their transport. SkipValidation bool // TelemetryAttributes specifies a map of telemetry attributes to be added // to all OpenTelemetry signals, such as tracing and metrics, for purposes @@ -430,5 +462,115 @@ func addOpenTelemetryStatsHandler(dialOpts []grpc.DialOption, opts *Options) []g if opts.DisableTelemetry { return dialOpts } - return append(dialOpts, grpc.WithStatsHandler(otelgrpc.NewClientHandler())) + if !gax.IsFeatureEnabled("TRACING") { + return append(dialOpts, grpc.WithStatsHandler(otelgrpc.NewClientHandler())) + } + var staticAttrs []attribute.KeyValue + if opts.InternalOptions != nil { + staticAttrs = transport.StaticTelemetryAttributes(opts.InternalOptions.TelemetryAttributes) + } + otelOpts := []otelgrpc.Option{ + otelgrpc.WithSpanAttributes(staticAttrs...), + } + return append(dialOpts, grpc.WithStatsHandler(&otelHandler{ + Handler: otelgrpc.NewClientHandler(otelOpts...), + })) +} + +// otelHandler is a wrapper around the OpenTelemetry gRPC client handler that +// adds custom Google Cloud-specific attributes to spans and metrics. +type otelHandler struct { + stats.Handler +} + +// TagRPC intercepts the RPC start to extract dynamic attributes like resource +// name and retry count from the outgoing context metadata and attach them to +// the current span. +func (h *otelHandler) TagRPC(ctx context.Context, info *stats.RPCTagInfo) context.Context { + ctx = h.Handler.TagRPC(ctx, info) + span := trace.SpanFromContext(ctx) + if !span.IsRecording() { + return ctx + } + var attrs []attribute.KeyValue + if resName, ok := callctx.TelemetryFromContext(ctx, "resource_name"); ok { + attrs = append(attrs, attribute.String("gcp.resource.destination.id", resName)) + } + if resendCountStr, ok := callctx.TelemetryFromContext(ctx, "resend_count"); ok { + if count, err := strconv.Atoi(resendCountStr); err == nil { + attrs = append(attrs, attribute.Int("gcp.grpc.resend_count", count)) + } + } + if len(attrs) > 0 { + span.SetAttributes(attrs...) + } + return ctx +} + +// HandleRPC intercepts the RPC completion to capture and format error-related +// attributes ensuring they conform to Google Cloud observability standards. +func (h *otelHandler) HandleRPC(ctx context.Context, s stats.RPCStats) { + end, ok := s.(*stats.End) + if !ok { + h.Handler.HandleRPC(ctx, s) + return + } + span := trace.SpanFromContext(ctx) + if !span.IsRecording() { + h.Handler.HandleRPC(ctx, s) + return + } + + var attrs []attribute.KeyValue + if end.Error != nil { + st, ok := status.FromError(end.Error) + rpcStatusCode := codeToCanonicalStr(st.Code()) + + var errorType string + // 1. Check if the local context expired or was cancelled. This is the only + // reliable way to distinguish a local client timeout from a server timeout + // because gRPC does not wrap context errors in its status.Error types. + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + errorType = "CLIENT_TIMEOUT" + } else if errors.Is(ctx.Err(), context.Canceled) { + errorType = "CLIENT_CANCELLED" + } else if !ok || st.Code() == codes.Unknown || st.Code() == codes.Internal { + // 2. If the error isn't a context breakdown and the gRPC framework + // doesn't "understand" it (returning ok=false or a generic catch-all + // bucket like Unknown/Internal), we "pack" the actual Go error type + // name into error.type (e.g., "*net.OpError"). This is per the error.type + // [spec](https://opentelemetry.io/docs/specs/semconv/registry/attributes/error/#error-type). + // "When error.type is set to a type (e.g., an exception type), its canonical + // class name identifying the type within the artifact SHOULD be used." + errorType = fmt.Sprintf("%T", end.Error) + } else { + // 3. Otherwise, it is a well-understood gRPC protocol error (e.g., + // PERMISSION_DENIED) likely returned by the server. + errorType = rpcStatusCode + } + + attrs = []attribute.KeyValue{ + attribute.String("error.type", errorType), + attribute.String("status.message", st.Message()), + attribute.String("rpc.response.status_code", rpcStatusCode), + attribute.String("exception.type", fmt.Sprintf("%T", end.Error)), + } + } else { + attrs = []attribute.KeyValue{ + attribute.String("rpc.response.status_code", "OK"), + } + } + span.SetAttributes(attrs...) + h.Handler.HandleRPC(ctx, s) +} + +// codeToCanonicalStr returns the canonical name for each of the 17 gRPC +// status codes defined in https://github.com/grpc/grpc-go/blob/master/codes/codes.go. +// For any codes.Code that converts to an out-of-bounds int, +// it returns "UNKNOWN". +func codeToCanonicalStr(code codes.Code) string { + if int(code) >= 0 && int(code) < len(codeToStr) { + return codeToStr[code] + } + return "UNKNOWN" } diff --git a/auth/grpctransport/grpctransport_otel_test.go b/auth/grpctransport/grpctransport_otel_test.go index b9f8311a57cd..c47ca9c2e435 100644 --- a/auth/grpctransport/grpctransport_otel_test.go +++ b/auth/grpctransport/grpctransport_otel_test.go @@ -19,13 +19,18 @@ import ( "net" "net/http" "testing" + "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/googleapis/gax-go/v2" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/sdk/trace/tracetest" + + "github.com/googleapis/gax-go/v2/callctx" oteltrace "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -48,7 +53,11 @@ const ( valLocalhost = "127.0.0.1" ) -func TestDial_OpenTelemetry(t *testing.T) { +func TestDial_OpenTelemetry_Enabled(t *testing.T) { + gax.TestOnlyResetIsFeatureEnabled() + defer gax.TestOnlyResetIsFeatureEnabled() + t.Setenv("GOOGLE_SDK_GO_EXPERIMENTAL_TRACING", "true") + // Ensure any lingering HTTP/2 connections are closed to avoid goroutine leaks. defer http.DefaultTransport.(*http.Transport).CloseIdleConnections() @@ -70,15 +79,29 @@ func TestDial_OpenTelemetry(t *testing.T) { return nil, status.Error(grpccodes.Internal, "test error") }, } + timeoutEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + time.Sleep(100 * time.Millisecond) + return &echo.EchoReply{Message: req.Message}, nil + }, + } + cancelEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + time.Sleep(100 * time.Millisecond) + return &echo.EchoReply{Message: req.Message}, nil + }, + } tests := []struct { - name string - echoer echo.EchoerServer - opts *Options - wantErr bool - wantSpans int - wantSpan sdktrace.ReadOnlySpan - wantAttrKeys []attribute.Key + name string + echoer echo.EchoerServer + opts *Options + telemetryCtxValues map[string]string + errorType string // "timeout", "cancel" + wantErr bool + wantSpans int + wantSpan sdktrace.ReadOnlySpan + wantAttrKeys []attribute.Key }{ { name: "telemetry enabled success", @@ -92,22 +115,10 @@ func TestDial_OpenTelemetry(t *testing.T) { Code: codes.Unset, }, Attributes: []attribute.KeyValue{ - // Note on Events (Logs): - // The otelgrpc instrumentation also records "message" events (Sent/Received) - // containing message sizes (compressed/uncompressed). These appear in the - // "Logs" or "Events" tab in Cloud Trace. This test does not explicitly verify - // them, but they are present in the generated span. - - // In Cloud Trace, this status code maps to the visual "Status" field - // (e.g., a green checkmark for 0/OK, or an error icon for other codes). keyRPCStatusCode.Int64(0), - // In Cloud Trace, "rpc.service" and "rpc.method" are combined to form - // the Span Name (e.g., "echo.Echoer/Echo"). keyRPCMethod.String("Echo"), keyRPCService.String("echo.Echoer"), - // "rpc.system" is displayed as a standard attribute key in the "Attributes" tab. keyRPCSystem.String(valRPCSystemGRPC), - // "server.address" and "server.port" are displayed as standard attribute keys. keyServerAddr.String(valLocalhost), }, }.Snapshot(), @@ -127,23 +138,70 @@ func TestDial_OpenTelemetry(t *testing.T) { Description: "test error", }, Attributes: []attribute.KeyValue{ - // Note on Events (Logs): - // The otelgrpc instrumentation also records "message" events (Sent/Received) - // containing message sizes (compressed/uncompressed). These appear in the - // "Logs" or "Events" tab in Cloud Trace. This test does not explicitly verify - // them, but they are present in the generated span. - - // In Cloud Trace, non-zero status codes (like 13 for INTERNAL) are displayed - // as errors in the "Status" field. keyRPCStatusCode.Int64(13), keyRPCMethod.String("Echo"), keyRPCService.String("echo.Echoer"), keyRPCSystem.String(valRPCSystemGRPC), keyServerAddr.String(valLocalhost), + attribute.String("error.type", "*status.Error"), + attribute.String("status.message", "test error"), + attribute.String("rpc.response.status_code", "INTERNAL"), }, }.Snapshot(), wantAttrKeys: []attribute.Key{keyServerPort}, }, + { + name: "telemetry enabled client timeout", + echoer: timeoutEchoer, + opts: &Options{DisableAuthentication: true}, + errorType: "timeout", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context deadline exceeded", + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(4), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + attribute.String("error.type", "CLIENT_TIMEOUT"), + attribute.String("rpc.response.status_code", "DEADLINE_EXCEEDED"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort, attribute.Key("status.message"), attribute.Key("exception.type")}, + }, + { + name: "telemetry enabled client cancelled", + echoer: cancelEchoer, + opts: &Options{DisableAuthentication: true}, + errorType: "cancel", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context canceled", + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(1), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + attribute.String("error.type", "CLIENT_CANCELLED"), + attribute.String("rpc.response.status_code", "CANCELED"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort, attribute.Key("status.message"), attribute.Key("exception.type")}, + }, { name: "telemetry disabled", echoer: successfulEchoer, @@ -153,6 +211,49 @@ func TestDial_OpenTelemetry(t *testing.T) { }, wantSpans: 0, }, + { + name: "telemetry enabled metadata enrichment", + echoer: successfulEchoer, + opts: &Options{ + DisableAuthentication: true, + InternalOptions: &InternalOptions{ + TelemetryAttributes: map[string]string{ + "gcp.client.service": "echo", + "gcp.client.version": "1.0.0", + "gcp.client.repo": "googleapis/google-cloud-go", + "gcp.client.artifact": "c.g/auth/grpctransport", + "gcp.client.language": "go", + "url.domain": "echo.googleapis.com", + "ignored.key": "should not be included", + }, + }, + }, + telemetryCtxValues: map[string]string{"resource_name": "my-resource"}, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Unset, + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(0), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + attribute.String("gcp.resource.destination.id", "my-resource"), + attribute.String("gcp.client.service", "echo"), + attribute.String("gcp.client.version", "1.0.0"), + attribute.String("gcp.client.repo", "googleapis/google-cloud-go"), + attribute.String("gcp.client.artifact", "c.g/auth/grpctransport"), + attribute.String("gcp.client.language", "go"), + attribute.String("url.domain", "echo.googleapis.com"), + attribute.String("rpc.response.status_code", "OK"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, } for _, tt := range tests { @@ -170,14 +271,29 @@ func TestDial_OpenTelemetry(t *testing.T) { tt.opts.Endpoint = l.Addr().String() tt.opts.GRPCDialOpts = []grpc.DialOption{grpc.WithTransportCredentials(insecure.NewCredentials())} + pool, err := Dial(context.Background(), false, tt.opts) if err != nil { t.Fatalf("Dial() = %v, want nil", err) } defer pool.Close() + ctx := context.Background() + var cancel context.CancelFunc + if tt.errorType == "timeout" { + ctx, cancel = context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + } else if tt.errorType == "cancel" { + ctx, cancel = context.WithCancel(ctx) + time.AfterFunc(10*time.Millisecond, cancel) + } + + for k, v := range tt.telemetryCtxValues { + ctx = callctx.WithTelemetryContext(ctx, k, v) + } + client := echo.NewEchoerClient(pool) - _, err = client.Echo(context.Background(), &echo.EchoRequest{Message: "hello"}) + _, err = client.Echo(ctx, &echo.EchoRequest{Message: "hello"}) if (err != nil) != tt.wantErr { t.Errorf("client.Echo() error = %v, wantErr %v", err, tt.wantErr) } @@ -192,44 +308,13 @@ func TestDial_OpenTelemetry(t *testing.T) { if diff := cmp.Diff(tt.wantSpan.Name(), span.Name); diff != "" { t.Errorf("span.Name mismatch (-want +got):\n%s", diff) } - // In Cloud Trace, SpanKind "Client" identifies this as an outgoing request, - // often affecting the icon used in the trace visualization. if diff := cmp.Diff(tt.wantSpan.SpanKind(), span.SpanKind); diff != "" { t.Errorf("span.SpanKind mismatch (-want +got):\n%s", diff) } - if diff := cmp.Diff(tt.wantSpan.Status(), span.Status); diff != "" { + if diff := cmp.Diff(tt.wantSpan.Status(), span.Status, cmpopts.IgnoreFields(sdktrace.Status{}, "Description")); diff != "" { t.Errorf("span.Status mismatch (-want +got):\n%s", diff) } - // Note: Real-world spans in Cloud Trace will contain additional attributes - // that are not present in this unit test. - // - // 1. Resource Attributes: - // - "g.co/r/generic_node/location" (e.g., "global") - // - "g.co/r/generic_node/namespace" - // - "g.co/r/generic_node/node_id" - // - "service.name" (e.g., "my-application") - // - "telemetry.sdk.language" (e.g., "go") - // - "telemetry.sdk.name" (e.g., "opentelemetry") - // - "telemetry.sdk.version" (e.g., "1.20.0") - // These are defined by the TracerProvider's Resource configuration. This test uses - // a basic TracerProvider, so these attributes contain default values (e.g., - // service.name="unknown_service:grpctransport.test") rather than production values. - // - // 2. Instrumentation Scope: - // - "otel.scope.name" (e.g., "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc") - // - "otel.scope.version" (e.g., "0.46.0") - // These identify the instrumentation library itself and are part of the - // OpenTelemetry data model, separate from Span attributes. - // - // 3. Exporter Attributes: - // - "g.co/agent" (e.g., "opentelemetry-go 1.20.0; google-cloud-trace-exporter 1.20.0") - // These are injected by specific exporters (like the Google Cloud Trace exporter) - // and are not present when using the InMemoryExporter. - // - // This test focuses on verifying the "rpc.*" and "server.*" attributes, which are - // generated by the otelgrpc instrumentation library itself. - gotAttrs := map[attribute.Key]attribute.Value{} for _, attr := range span.Attributes { gotAttrs[attr.Key] = attr.Value @@ -249,6 +334,279 @@ func TestDial_OpenTelemetry(t *testing.T) { t.Errorf("missing attribute key: %s", wantKey) } } + if _, ok := gotAttrs[attribute.Key("ignored.key")]; ok { + t.Errorf("found unexpected attribute key: ignored.key") + } + } + }) + } +} + +func TestDial_OpenTelemetry_Disabled(t *testing.T) { + gax.TestOnlyResetIsFeatureEnabled() + defer gax.TestOnlyResetIsFeatureEnabled() + t.Setenv("GOOGLE_SDK_GO_EXPERIMENTAL_TRACING", "false") + + // Ensure any lingering HTTP/2 connections are closed to avoid goroutine leaks. + defer http.DefaultTransport.(*http.Transport).CloseIdleConnections() + + exporter := tracetest.NewInMemoryExporter() + tp := sdktrace.NewTracerProvider(sdktrace.WithSyncer(exporter)) + defer tp.Shutdown(context.Background()) + + // Restore the global tracer provider after the test to avoid side effects. + defer func(prev oteltrace.TracerProvider) { otel.SetTracerProvider(prev) }(otel.GetTracerProvider()) + otel.SetTracerProvider(tp) + + successfulEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + return &echo.EchoReply{Message: req.Message}, nil + }, + } + errorEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + return nil, status.Error(grpccodes.Internal, "test error") + }, + } + timeoutEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + time.Sleep(100 * time.Millisecond) + return &echo.EchoReply{Message: req.Message}, nil + }, + } + cancelEchoer := &fakeEchoService{ + Fn: func(ctx context.Context, req *echo.EchoRequest) (*echo.EchoReply, error) { + time.Sleep(100 * time.Millisecond) + return &echo.EchoReply{Message: req.Message}, nil + }, + } + + tests := []struct { + name string + echoer echo.EchoerServer + opts *Options + telemetryCtxValues map[string]string + errorType string // "timeout", "cancel" + wantErr bool + wantSpans int + wantSpan sdktrace.ReadOnlySpan + wantAttrKeys []attribute.Key + }{ + { + name: "telemetry enabled success (but gated off)", + echoer: successfulEchoer, + opts: &Options{DisableAuthentication: true}, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Unset, + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(0), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, + { + name: "telemetry enabled error (but gated off)", + echoer: errorEchoer, + opts: &Options{DisableAuthentication: true}, + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "test error", + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(13), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + // Standard OTel attributes only, NO strict error.type/status.message/grpc.status + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, + { + name: "telemetry enabled client timeout (but gated off)", + echoer: timeoutEchoer, + opts: &Options{DisableAuthentication: true}, + errorType: "timeout", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context deadline exceeded", + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(4), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, + { + name: "telemetry enabled client cancelled (but gated off)", + echoer: cancelEchoer, + opts: &Options{DisableAuthentication: true}, + errorType: "cancel", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context canceled", + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(1), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, + { + name: "telemetry disabled", + echoer: successfulEchoer, + opts: &Options{ + DisableAuthentication: true, + DisableTelemetry: true, + }, + wantSpans: 0, + }, + { + name: "telemetry enabled metadata enrichment (but gated off)", + echoer: successfulEchoer, + opts: &Options{ + DisableAuthentication: true, + InternalOptions: &InternalOptions{ + TelemetryAttributes: map[string]string{ + "gcp.client.version": "1.0.0", + "ignored.key": "should not be included", + }, + }, + }, + telemetryCtxValues: map[string]string{"resource_name": "my-resource"}, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "echo.Echoer/Echo", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Unset, + }, + Attributes: []attribute.KeyValue{ + keyRPCStatusCode.Int64(0), + keyRPCMethod.String("Echo"), + keyRPCService.String("echo.Echoer"), + keyRPCSystem.String(valRPCSystemGRPC), + keyServerAddr.String(valLocalhost), + // NO gcp.* attributes + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exporter.Reset() + + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("Failed to listen: %v", err) + } + s := grpc.NewServer() + echo.RegisterEchoerServer(s, tt.echoer) + go s.Serve(l) + defer s.Stop() + + tt.opts.Endpoint = l.Addr().String() + tt.opts.GRPCDialOpts = []grpc.DialOption{grpc.WithTransportCredentials(insecure.NewCredentials())} + + pool, err := Dial(context.Background(), false, tt.opts) + if err != nil { + t.Fatalf("Dial() = %v, want nil", err) + } + defer pool.Close() + + ctx := context.Background() + var cancel context.CancelFunc + if tt.errorType == "timeout" { + ctx, cancel = context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + } else if tt.errorType == "cancel" { + ctx, cancel = context.WithCancel(ctx) + time.AfterFunc(10*time.Millisecond, cancel) + } + + for k, v := range tt.telemetryCtxValues { + ctx = callctx.WithTelemetryContext(ctx, k, v) + } + + client := echo.NewEchoerClient(pool) + _, err = client.Echo(ctx, &echo.EchoRequest{Message: "hello"}) + if (err != nil) != tt.wantErr { + t.Errorf("client.Echo() error = %v, wantErr %v", err, tt.wantErr) + } + + spans := exporter.GetSpans() + if len(spans) != tt.wantSpans { + t.Fatalf("len(spans) = %d, want %d", len(spans), tt.wantSpans) + } + + if tt.wantSpans > 0 { + span := exporter.GetSpans()[0] + if diff := cmp.Diff(tt.wantSpan.Name(), span.Name); diff != "" { + t.Errorf("span.Name mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tt.wantSpan.SpanKind(), span.SpanKind); diff != "" { + t.Errorf("span.SpanKind mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tt.wantSpan.Status(), span.Status, cmpopts.IgnoreFields(sdktrace.Status{}, "Description")); diff != "" { + t.Errorf("span.Status mismatch (-want +got):\n%s", diff) + } + + gotAttrs := map[attribute.Key]attribute.Value{} + for _, attr := range span.Attributes { + gotAttrs[attr.Key] = attr.Value + } + for _, wantAttr := range tt.wantSpan.Attributes() { + if gotVal, ok := gotAttrs[wantAttr.Key]; !ok { + t.Errorf("missing attribute: %s", wantAttr.Key) + } else { + if diff := cmp.Diff(wantAttr.Value, gotVal, cmp.AllowUnexported(attribute.Value{})); diff != "" { + t.Errorf("attribute %s mismatch (-want +got):\n%s", wantAttr.Key, diff) + } + } + } + for _, wantKey := range tt.wantAttrKeys { + if _, ok := gotAttrs[wantKey]; !ok { + t.Errorf("missing attribute key: %s", wantKey) + } + } + if _, ok := gotAttrs[attribute.Key("ignored.key")]; ok { + t.Errorf("found unexpected attribute key: ignored.key") + } } }) } diff --git a/auth/httptransport/httptransport_otel_test.go b/auth/httptransport/httptransport_otel_test.go index 06cd219ba15a..843c491cfe5b 100644 --- a/auth/httptransport/httptransport_otel_test.go +++ b/auth/httptransport/httptransport_otel_test.go @@ -19,13 +19,18 @@ import ( "net/http" "net/http/httptest" "testing" + "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/googleapis/gax-go/v2" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/sdk/trace/tracetest" + + "github.com/googleapis/gax-go/v2/callctx" oteltrace "go.opentelemetry.io/otel/trace" ) @@ -43,7 +48,11 @@ const ( valLocalhost = "127.0.0.1" ) -func TestNewClient_OpenTelemetry(t *testing.T) { +func TestNewClient_OpenTelemetry_Enabled(t *testing.T) { + gax.TestOnlyResetIsFeatureEnabled() + defer gax.TestOnlyResetIsFeatureEnabled() + t.Setenv("GOOGLE_SDK_GO_EXPERIMENTAL_TRACING", "true") + defer http.DefaultTransport.(*http.Transport).CloseIdleConnections() exporter := tracetest.NewInMemoryExporter() @@ -55,12 +64,15 @@ func TestNewClient_OpenTelemetry(t *testing.T) { otel.SetTracerProvider(tp) tests := []struct { - name string - opts *Options - statusCode int - wantSpans int - wantSpan sdktrace.ReadOnlySpan - wantAttrKeys []attribute.Key + name string + opts *Options + telemetryCtxValues map[string]string + statusCode int + errorType string // "timeout", "cancel", "connection" + wantErr bool + wantSpans int + wantSpan sdktrace.ReadOnlySpan + wantAttrKeys []attribute.Key }{ { name: "telemetry enabled success", @@ -74,15 +86,11 @@ func TestNewClient_OpenTelemetry(t *testing.T) { Code: codes.Unset, }, Attributes: []attribute.KeyValue{ - // In Cloud Trace, this often forms part of the Span Name. keyHTTPRequestMetod.String(valHTTPGet), - // In Cloud Trace, this status code maps to the visual "Status" field - // (e.g., a green checkmark for 200, or an error icon for 5xx). keyHTTPResponseStatus.Int(200), keyNetProtoVersion.String(valHTTP11), - // "server.address", "server.port", and "url.full" are displayed as - // standard attribute keys in the "Attributes" tab. keyServerAddr.String(valLocalhost), + attribute.String("rpc.system.name", "http"), }, }.Snapshot(), wantAttrKeys: []attribute.Key{keyServerPort, keyURLFull}, @@ -91,6 +99,7 @@ func TestNewClient_OpenTelemetry(t *testing.T) { name: "telemetry enabled error", opts: &Options{DisableAuthentication: true}, statusCode: http.StatusInternalServerError, + wantErr: false, // The RoundTrip itself doesn't return an error for 500, it returns a response. wantSpans: 1, wantSpan: tracetest.SpanStub{ Name: "HTTP GET", @@ -101,15 +110,78 @@ func TestNewClient_OpenTelemetry(t *testing.T) { }, Attributes: []attribute.KeyValue{ keyHTTPRequestMetod.String(valHTTPGet), - // In Cloud Trace, 5xx status codes are displayed as errors in the "Status" field. keyHTTPResponseStatus.Int(500), keyNetProtoVersion.String(valHTTP11), keyServerAddr.String(valLocalhost), - keyErrorType.String("500"), // otelhttp adds this on error + keyErrorType.String("500"), + attribute.String("status.message", "500 Internal Server Error"), + attribute.String("rpc.system.name", "http"), }, }.Snapshot(), wantAttrKeys: []attribute.Key{keyServerPort, keyURLFull}, }, + { + name: "telemetry enabled client timeout", + opts: &Options{DisableAuthentication: true}, + errorType: "timeout", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context deadline exceeded", + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyErrorType.String("context.deadlineExceededError"), + attribute.String("rpc.system.name", "http"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr, attribute.Key("status.message"), attribute.Key("exception.type")}, + }, + { + name: "telemetry enabled client cancelled", + opts: &Options{DisableAuthentication: true}, + errorType: "cancel", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "context canceled", + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyErrorType.String("*errors.errorString"), + attribute.String("rpc.system.name", "http"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr, attribute.Key("status.message"), attribute.Key("exception.type")}, + }, + { + name: "telemetry enabled client connection error", + opts: &Options{DisableAuthentication: true}, + errorType: "connection", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyErrorType.String("*net.OpError"), + attribute.String("rpc.system.name", "http"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr, attribute.Key("status.message"), attribute.Key("exception.type")}, + }, { name: "telemetry disabled", opts: &Options{ @@ -119,6 +191,83 @@ func TestNewClient_OpenTelemetry(t *testing.T) { statusCode: http.StatusOK, wantSpans: 0, }, + { + name: "telemetry enabled metadata enrichment", + opts: &Options{ + DisableAuthentication: true, + InternalOptions: &InternalOptions{ + TelemetryAttributes: map[string]string{ + "gcp.client.service": "myservice", + "gcp.client.version": "1.0.0", + "gcp.client.repo": "googleapis/google-cloud-go", + "gcp.client.artifact": "c.g/auth/httptransport", + "gcp.client.language": "go", + "url.domain": "myservice.googleapis.com", + "ignored.key": "should not be included", + }, + }, + }, + telemetryCtxValues: map[string]string{"resource_name": "my-resource"}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + attribute.String("gcp.resource.destination.id", "my-resource"), + attribute.String("gcp.client.service", "myservice"), + attribute.String("gcp.client.version", "1.0.0"), + attribute.String("gcp.client.repo", "googleapis/google-cloud-go"), + attribute.String("gcp.client.artifact", "c.g/auth/httptransport"), + attribute.String("gcp.client.language", "go"), + attribute.String("rpc.system.name", "http"), + attribute.String("url.domain", "myservice.googleapis.com"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, + { + name: "telemetry enabled url template", + opts: &Options{ + DisableAuthentication: true, + }, + telemetryCtxValues: map[string]string{"url_template": "/my/template"}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "GET /my/template", + SpanKind: oteltrace.SpanKindClient, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + attribute.String("url.template", "/my/template"), + attribute.String("rpc.system.name", "http"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, + { + name: "telemetry enabled resend count", + opts: &Options{ + DisableAuthentication: true, + }, + telemetryCtxValues: map[string]string{"resend_count": "2"}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + attribute.Int("http.request.resend_count", 2), + attribute.String("rpc.system.name", "http"), + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, } for _, tt := range tests { @@ -126,26 +275,51 @@ func TestNewClient_OpenTelemetry(t *testing.T) { exporter.Reset() server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(tt.statusCode) + if tt.errorType == "timeout" { + time.Sleep(100 * time.Millisecond) + } + if tt.statusCode != 0 { + w.WriteHeader(tt.statusCode) + } })) defer server.Close() + if tt.errorType == "connection" { + server.Close() + } + tt.opts.Endpoint = server.URL client, err := NewClient(tt.opts) if err != nil { t.Fatalf("NewClient() = %v, want nil", err) } - req, err := http.NewRequest("GET", server.URL, nil) + ctx := context.Background() + var cancel context.CancelFunc + if tt.errorType == "timeout" { + ctx, cancel = context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + } else if tt.errorType == "cancel" { + ctx, cancel = context.WithCancel(ctx) + cancel() + } + + for k, v := range tt.telemetryCtxValues { + ctx = callctx.WithTelemetryContext(ctx, k, v) + } + + req, err := http.NewRequestWithContext(ctx, "GET", server.URL, nil) if err != nil { t.Fatalf("http.NewRequest() = %v, want nil", err) } resp, err := client.Do(req) - if err != nil { - t.Fatalf("client.Do() = %v, want nil", err) + if (err != nil) != tt.wantErr { + t.Errorf("client.Do() error = %v, wantErr %v", err, tt.wantErr) + } + if resp != nil && resp.Body != nil { + resp.Body.Close() } - resp.Body.Close() spans := exporter.GetSpans() if len(spans) != tt.wantSpans { @@ -160,7 +334,7 @@ func TestNewClient_OpenTelemetry(t *testing.T) { if diff := cmp.Diff(tt.wantSpan.SpanKind(), span.SpanKind); diff != "" { t.Errorf("span.SpanKind mismatch (-want +got):\n%s", diff) } - if diff := cmp.Diff(tt.wantSpan.Status(), span.Status); diff != "" { + if diff := cmp.Diff(tt.wantSpan.Status(), span.Status, cmpopts.IgnoreFields(sdktrace.Status{}, "Description")); diff != "" { t.Errorf("span.Status mismatch (-want +got):\n%s", diff) } @@ -211,6 +385,286 @@ func TestNewClient_OpenTelemetry(t *testing.T) { t.Errorf("missing attribute key: %s", wantKey) } } + if _, ok := gotAttrs[attribute.Key("ignored.key")]; ok { + t.Errorf("found unexpected attribute key: ignored.key") + } + } + }) + } +} + +func TestNewClient_OpenTelemetry_Disabled(t *testing.T) { + gax.TestOnlyResetIsFeatureEnabled() + defer gax.TestOnlyResetIsFeatureEnabled() + t.Setenv("GOOGLE_SDK_GO_EXPERIMENTAL_TRACING", "false") + + defer http.DefaultTransport.(*http.Transport).CloseIdleConnections() + + exporter := tracetest.NewInMemoryExporter() + tp := sdktrace.NewTracerProvider(sdktrace.WithSyncer(exporter)) + defer tp.Shutdown(context.Background()) + + // Restore the global tracer provider after the test to avoid side effects. + defer func(prev oteltrace.TracerProvider) { otel.SetTracerProvider(prev) }(otel.GetTracerProvider()) + otel.SetTracerProvider(tp) + + tests := []struct { + name string + opts *Options + telemetryCtxValues map[string]string + statusCode int + errorType string // "timeout", "cancel", "connection" + wantErr bool + wantSpans int + wantSpan sdktrace.ReadOnlySpan + wantAttrKeys []attribute.Key + }{ + { + name: "telemetry enabled success (but gated off)", + opts: &Options{DisableAuthentication: true}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Unset, + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + keyNetProtoVersion.String(valHTTP11), + keyServerAddr.String(valLocalhost), + // NO rpc.system + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort, keyURLFull}, // NO url.domain + }, + { + name: "telemetry enabled error (but gated off)", + opts: &Options{DisableAuthentication: true}, + statusCode: http.StatusInternalServerError, + wantErr: false, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + Description: "", + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(500), + keyNetProtoVersion.String(valHTTP11), + keyServerAddr.String(valLocalhost), + // NO rpc.system, status.message, error.type + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerPort, keyURLFull}, // NO url.domain + }, + { + name: "telemetry enabled client timeout (but gated off)", + opts: &Options{DisableAuthentication: true}, + errorType: "timeout", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + // NO rpc.system, exception.type, error.type + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, + { + name: "telemetry enabled client cancelled (but gated off)", + opts: &Options{DisableAuthentication: true}, + errorType: "cancel", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + // NO rpc.system, exception.type, error.type + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, + { + name: "telemetry enabled client connection error (but gated off)", + opts: &Options{DisableAuthentication: true}, + errorType: "connection", + wantErr: true, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Status: sdktrace.Status{ + Code: codes.Error, + }, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + // NO rpc.system, exception.type, error.type + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, + }, + { + name: "telemetry disabled", + opts: &Options{ + DisableAuthentication: true, + DisableTelemetry: true, + }, + statusCode: http.StatusOK, + wantSpans: 0, + }, + { + name: "telemetry enabled metadata enrichment (but gated off)", + opts: &Options{ + DisableAuthentication: true, + InternalOptions: &InternalOptions{ + TelemetryAttributes: map[string]string{ + "gcp.client.version": "1.0.0", + "ignored.key": "should not be included", + }, + }, + }, + telemetryCtxValues: map[string]string{"resource_name": "my-resource"}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + // NO gcp.* attributes, NO rpc.system + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, // NO url.domain + }, + { + name: "telemetry enabled resend count (but gated off)", + opts: &Options{ + DisableAuthentication: true, + }, + telemetryCtxValues: map[string]string{"resend_count": "2"}, + statusCode: http.StatusOK, + wantSpans: 1, + wantSpan: tracetest.SpanStub{ + Name: "HTTP GET", + SpanKind: oteltrace.SpanKindClient, + Attributes: []attribute.KeyValue{ + keyHTTPRequestMetod.String(valHTTPGet), + keyHTTPResponseStatus.Int(200), + // NO http.request.resend_count + }, + }.Snapshot(), + wantAttrKeys: []attribute.Key{keyServerAddr}, // NO url.domain + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exporter.Reset() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if tt.errorType == "timeout" { + time.Sleep(100 * time.Millisecond) + } + if tt.statusCode != 0 { + w.WriteHeader(tt.statusCode) + } + })) + defer server.Close() + + if tt.errorType == "connection" { + server.Close() + } + + tt.opts.Endpoint = server.URL + client, err := NewClient(tt.opts) + if err != nil { + t.Fatalf("NewClient() = %v, want nil", err) + } + + ctx := context.Background() + var cancel context.CancelFunc + if tt.errorType == "timeout" { + ctx, cancel = context.WithTimeout(ctx, 10*time.Millisecond) + defer cancel() + } else if tt.errorType == "cancel" { + ctx, cancel = context.WithCancel(ctx) + cancel() + } + + for k, v := range tt.telemetryCtxValues { + ctx = callctx.WithTelemetryContext(ctx, k, v) + } + + req, err := http.NewRequestWithContext(ctx, "GET", server.URL, nil) + if err != nil { + t.Fatalf("http.NewRequest() = %v, want nil", err) + } + + resp, err := client.Do(req) + if (err != nil) != tt.wantErr { + t.Errorf("client.Do() error = %v, wantErr %v", err, tt.wantErr) + } + if resp != nil && resp.Body != nil { + resp.Body.Close() + } + + spans := exporter.GetSpans() + if len(spans) != tt.wantSpans { + t.Fatalf("len(spans) = %d, want %d", len(spans), tt.wantSpans) + } + + if tt.wantSpans > 0 { + span := exporter.GetSpans()[0] + if diff := cmp.Diff(tt.wantSpan.Name(), span.Name); diff != "" { + t.Errorf("span.Name mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tt.wantSpan.SpanKind(), span.SpanKind); diff != "" { + t.Errorf("span.SpanKind mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tt.wantSpan.Status(), span.Status, cmpopts.IgnoreFields(sdktrace.Status{}, "Description")); diff != "" { + t.Errorf("span.Status mismatch (-want +got):\n%s", diff) + } + + gotAttrs := map[attribute.Key]attribute.Value{} + for _, attr := range span.Attributes { + gotAttrs[attr.Key] = attr.Value + } + for _, wantAttr := range tt.wantSpan.Attributes() { + if gotVal, ok := gotAttrs[wantAttr.Key]; !ok { + t.Errorf("missing attribute: %s", wantAttr.Key) + } else { + // Use simple value comparison for non-dynamic fields + if diff := cmp.Diff(wantAttr.Value, gotVal, cmp.AllowUnexported(attribute.Value{})); diff != "" { + t.Errorf("attribute %s mismatch (-want +got):\n%s", wantAttr.Key, diff) + } + } + } + for _, wantKey := range tt.wantAttrKeys { + if _, ok := gotAttrs[wantKey]; !ok { + t.Errorf("missing attribute key: %s", wantKey) + } + } + if _, ok := gotAttrs[attribute.Key("ignored.key")]; ok { + t.Errorf("found unexpected attribute key: ignored.key") + } } }) } diff --git a/auth/httptransport/transport.go b/auth/httptransport/transport.go index 3feb997c76d4..2ece1ea360eb 100644 --- a/auth/httptransport/transport.go +++ b/auth/httptransport/transport.go @@ -17,9 +17,12 @@ package httptransport import ( "context" "crypto/tls" + "errors" + "fmt" "net" "net/http" "os" + "strconv" "time" "cloud.google.com/go/auth" @@ -28,7 +31,11 @@ import ( "cloud.google.com/go/auth/internal/transport" "cloud.google.com/go/auth/internal/transport/cert" "cloud.google.com/go/auth/internal/transport/headers" + "github.com/googleapis/gax-go/v2" + "github.com/googleapis/gax-go/v2/callctx" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "golang.org/x/net/http2" ) @@ -173,7 +180,81 @@ func addOpenTelemetryTransport(trans http.RoundTripper, opts *Options) http.Roun if opts.DisableTelemetry { return trans } - return otelhttp.NewTransport(trans) + if !gax.IsFeatureEnabled("TRACING") { + return otelhttp.NewTransport(trans) + } + var staticAttrs []attribute.KeyValue + if opts.InternalOptions != nil { + staticAttrs = transport.StaticTelemetryAttributes(opts.InternalOptions.TelemetryAttributes) + } + otelOpts := []otelhttp.Option{ + otelhttp.WithSpanOptions(trace.WithAttributes(staticAttrs...)), + } + return otelhttp.NewTransport(&otelAttributeTransport{ + base: trans, + }, otelOpts...) +} + +// otelAttributeTransport is a wrapper around an http.RoundTripper that adds +// custom Google Cloud-specific attributes to OpenTelemetry spans. +type otelAttributeTransport struct { + base http.RoundTripper +} + +// RoundTrip intercepts the HTTP request and response to enrich the active +// OpenTelemetry span with static and dynamic attributes, as well as detailed +// error information. +func (t *otelAttributeTransport) RoundTrip(req *http.Request) (*http.Response, error) { + span := trace.SpanFromContext(req.Context()) + if span.IsRecording() { + var attrs []attribute.KeyValue + attrs = append(attrs, attribute.String("rpc.system.name", "http")) + if resName, ok := callctx.TelemetryFromContext(req.Context(), "resource_name"); ok { + attrs = append(attrs, attribute.String("gcp.resource.destination.id", resName)) + } + if resendCountStr, ok := callctx.TelemetryFromContext(req.Context(), "resend_count"); ok { + if count, err := strconv.Atoi(resendCountStr); err == nil { + attrs = append(attrs, attribute.Int("http.request.resend_count", count)) + } + } + if urlTemplate, ok := callctx.TelemetryFromContext(req.Context(), "url_template"); ok { + attrs = append(attrs, attribute.String("url.template", urlTemplate)) + span.SetName(fmt.Sprintf("%s %s", req.Method, urlTemplate)) + } + span.SetAttributes(attrs...) + } + + resp, err := t.base.RoundTrip(req) + + if span.IsRecording() { + if err != nil { + var errorType string + switch { + case errors.Is(err, context.DeadlineExceeded): + errorType = "CLIENT_TIMEOUT" + case errors.Is(err, context.Canceled): + errorType = "CLIENT_CANCELLED" + default: + errorType = "CLIENT_CONNECTION_ERROR" + } + span.SetAttributes( + attribute.String("error.type", errorType), + attribute.String("status.message", err.Error()), + attribute.String("exception.type", fmt.Sprintf("%T", err)), + ) + } else { + span.SetAttributes(attribute.Int("http.response.status_code", resp.StatusCode)) + if resp.StatusCode >= 400 { + errorType := strconv.Itoa(resp.StatusCode) + span.SetAttributes( + attribute.String("error.type", errorType), + attribute.String("status.message", resp.Status), + ) + } + } + } + + return resp, err } type authTransport struct { diff --git a/auth/internal/transport/transport.go b/auth/internal/transport/transport.go index 5c8721efa9f2..fb0a8a1e0748 100644 --- a/auth/internal/transport/transport.go +++ b/auth/internal/transport/transport.go @@ -24,8 +24,38 @@ import ( "time" "cloud.google.com/go/auth/credentials" + "go.opentelemetry.io/otel/attribute" ) +// knownKeys provides keys for reading telemetry attributes from Context. +// It provides an implicit contract with generated client library code +// using the same keys. The keys in this collection should not be removed +// or modified. New keys may be added, but they will need to be explicitly +// used in code referencing this collection in order to appear in telemetry. +var knownKeys = []string{ + "gcp.client.service", + "gcp.client.version", + "gcp.client.repo", + "gcp.client.artifact", + "gcp.client.language", + "url.domain", +} + +// StaticTelemetryAttributes selectively converts known keys from a map of +// strings to Open Telemetry attributes. +func StaticTelemetryAttributes(m map[string]string) []attribute.KeyValue { + var staticAttrs []attribute.KeyValue + if m == nil { + return staticAttrs + } + for _, k := range knownKeys { + if v, ok := m[k]; ok { + staticAttrs = append(staticAttrs, attribute.String(k, v)) + } + } + return staticAttrs +} + // CloneDetectOptions clones a user set detect option into some new memory that // we can internally manipulate before sending onto the detect package. func CloneDetectOptions(oldDo *credentials.DetectOptions) *credentials.DetectOptions { diff --git a/auth/internal/transport/transport_test.go b/auth/internal/transport/transport_test.go index 9a4643ab2e01..fb4db0205ad3 100644 --- a/auth/internal/transport/transport_test.go +++ b/auth/internal/transport/transport_test.go @@ -112,3 +112,22 @@ func TestCloneDetectOptions(t *testing.T) { t.Fatalf("Scopes should not reference the same slice") } } + +func TestStaticTelemetryAttributes_KnownKeys(t *testing.T) { + // Existing keys are used by generated code and cannot be removed or modified. + // New keys may be added, but they will need to also be added to + // auth/internal/transport/transport.go and transport wrappers + // auth/httptransport/transport.go and auth/grpctransport/grpctransport.go + // before they will appear in telemetry. + want := []string{ + "gcp.client.service", + "gcp.client.version", + "gcp.client.repo", + "gcp.client.artifact", + "gcp.client.language", + "url.domain", + } + if !reflect.DeepEqual(knownKeys, want) { + t.Errorf("got %v, want %v", knownKeys, want) + } +}