From 9191fc4a32b4d42bd8a857592dd039203226bca7 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 17:11:15 -0700 Subject: [PATCH 1/9] Use timeout cause context in batchSpanProcessor --- sdk/trace/batch_span_processor.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/trace/batch_span_processor.go b/sdk/trace/batch_span_processor.go index 6872cbb4e7a..241090788c3 100644 --- a/sdk/trace/batch_span_processor.go +++ b/sdk/trace/batch_span_processor.go @@ -5,6 +5,7 @@ package trace // import "go.opentelemetry.io/otel/sdk/trace" import ( "context" + "errors" "sync" "sync/atomic" "time" @@ -267,7 +268,7 @@ func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error { if bsp.o.ExportTimeout > 0 { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, bsp.o.ExportTimeout) + ctx, cancel = context.WithTimeoutCause(ctx, bsp.o.ExportTimeout, errors.New("processor export timeout exceeded")) defer cancel() } From 5d0d252c23b7314978a3a2bb1465089d35ca5782 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 17:14:02 -0700 Subject: [PATCH 2/9] Use context with timeout cause in otlptracegrpc --- exporters/otlp/otlptrace/otlptracegrpc/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporters/otlp/otlptrace/otlptracegrpc/client.go b/exporters/otlp/otlptrace/otlptracegrpc/client.go index 8409b5f8f95..cfbd58f6ab7 100644 --- a/exporters/otlp/otlptrace/otlptracegrpc/client.go +++ b/exporters/otlp/otlptrace/otlptracegrpc/client.go @@ -223,7 +223,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeout(parent, c.exportTimeout) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout exceeded")) } else { ctx, cancel = context.WithCancel(parent) } From f91613c85ce4743ef5e65c326dcc8bb01503a07a Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 17:21:56 -0700 Subject: [PATCH 3/9] Use cause of context error in otlp retry --- exporters/otlp/otlplog/otlploggrpc/internal/retry/retry.go | 2 +- exporters/otlp/otlplog/otlploghttp/internal/retry/retry.go | 2 +- .../otlp/otlpmetric/otlpmetricgrpc/internal/retry/retry.go | 2 +- .../otlp/otlpmetric/otlpmetrichttp/internal/retry/retry.go | 2 +- exporters/otlp/otlptrace/otlptracegrpc/internal/retry/retry.go | 2 +- exporters/otlp/otlptrace/otlptracehttp/internal/retry/retry.go | 2 +- internal/shared/otlp/retry/retry.go.tmpl | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/exporters/otlp/otlplog/otlploggrpc/internal/retry/retry.go b/exporters/otlp/otlplog/otlploggrpc/internal/retry/retry.go index 896c3a3032b..fa59467747f 100644 --- a/exporters/otlp/otlplog/otlploggrpc/internal/retry/retry.go +++ b/exporters/otlp/otlplog/otlploggrpc/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/exporters/otlp/otlplog/otlploghttp/internal/retry/retry.go b/exporters/otlp/otlplog/otlploghttp/internal/retry/retry.go index bd9a750a1f9..a0a9dc1334a 100644 --- a/exporters/otlp/otlplog/otlploghttp/internal/retry/retry.go +++ b/exporters/otlp/otlplog/otlploghttp/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/retry/retry.go b/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/retry/retry.go index 37cc6c519ca..80691ac3a9f 100644 --- a/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/retry/retry.go +++ b/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/exporters/otlp/otlpmetric/otlpmetrichttp/internal/retry/retry.go b/exporters/otlp/otlpmetric/otlpmetrichttp/internal/retry/retry.go index c855bdc93d6..8a5fa80eac6 100644 --- a/exporters/otlp/otlpmetric/otlpmetrichttp/internal/retry/retry.go +++ b/exporters/otlp/otlpmetric/otlpmetrichttp/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/exporters/otlp/otlptrace/otlptracegrpc/internal/retry/retry.go b/exporters/otlp/otlptrace/otlptracegrpc/internal/retry/retry.go index 777e68a7bbd..259a898ae77 100644 --- a/exporters/otlp/otlptrace/otlptracegrpc/internal/retry/retry.go +++ b/exporters/otlp/otlptrace/otlptracegrpc/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/exporters/otlp/otlptrace/otlptracehttp/internal/retry/retry.go b/exporters/otlp/otlptrace/otlptracehttp/internal/retry/retry.go index e9d35c7fae3..107428fa6cf 100644 --- a/exporters/otlp/otlptrace/otlptracehttp/internal/retry/retry.go +++ b/exporters/otlp/otlptrace/otlptracehttp/internal/retry/retry.go @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } diff --git a/internal/shared/otlp/retry/retry.go.tmpl b/internal/shared/otlp/retry/retry.go.tmpl index 896c3a3032b..fa59467747f 100644 --- a/internal/shared/otlp/retry/retry.go.tmpl +++ b/internal/shared/otlp/retry/retry.go.tmpl @@ -132,7 +132,7 @@ func wait(ctx context.Context, delay time.Duration) error { select { case <-timer.C: default: - return ctx.Err() + return context.Cause(ctx) } case <-timer.C: } From cf211a628271963459ecfd0a4b40ecdae3c7a114 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 17:44:52 -0700 Subject: [PATCH 4/9] Use context with timeout cause in otlploggrpc --- exporters/otlp/otlplog/otlploggrpc/client.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exporters/otlp/otlplog/otlploggrpc/client.go b/exporters/otlp/otlplog/otlploggrpc/client.go index 05abd92eeec..7dc43f74560 100644 --- a/exporters/otlp/otlplog/otlploggrpc/client.go +++ b/exporters/otlp/otlplog/otlploggrpc/client.go @@ -5,6 +5,7 @@ package otlploggrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlplog/o import ( "context" + "errors" "fmt" "time" @@ -192,7 +193,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeout(parent, c.exportTimeout) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("export timeout")) } else { ctx, cancel = context.WithCancel(parent) } @@ -228,6 +229,8 @@ func retryable(err error) (bool, time.Duration) { func retryableGRPCStatus(s *status.Status) (bool, time.Duration) { switch s.Code() { + // Follows the retryable error codes defined in + // https://opentelemetry.io/docs/specs/otlp/#failures case codes.Canceled, codes.DeadlineExceeded, codes.Aborted, From 5f6d127da9372b5dead4404c89f3e668c89254e4 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 17:47:37 -0700 Subject: [PATCH 5/9] Use context with timeout cause in otlpmetricgrpc --- exporters/otlp/otlpmetric/otlpmetricgrpc/client.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go b/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go index e0fa0570a81..9446e205a68 100644 --- a/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go +++ b/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go @@ -5,6 +5,7 @@ package otlpmetricgrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlpme import ( "context" + "errors" "time" "google.golang.org/genproto/googleapis/rpc/errdetails" @@ -149,7 +150,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeout(parent, c.exportTimeout) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("export timeout")) } else { ctx, cancel = context.WithCancel(parent) } From f586178a759ec025feccdede58d9e9ff5f03c3f9 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 18:02:01 -0700 Subject: [PATCH 6/9] Unify timeout content for exporter --- exporters/otlp/otlplog/otlploggrpc/client.go | 2 +- exporters/otlp/otlpmetric/otlpmetricgrpc/client.go | 2 +- exporters/otlp/otlptrace/otlptracegrpc/client.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/exporters/otlp/otlplog/otlploggrpc/client.go b/exporters/otlp/otlplog/otlploggrpc/client.go index 7dc43f74560..1add3f33330 100644 --- a/exporters/otlp/otlplog/otlploggrpc/client.go +++ b/exporters/otlp/otlplog/otlploggrpc/client.go @@ -193,7 +193,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("export timeout")) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout")) } else { ctx, cancel = context.WithCancel(parent) } diff --git a/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go b/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go index 9446e205a68..82a4c2c2a1e 100644 --- a/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go +++ b/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go @@ -150,7 +150,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("export timeout")) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout")) } else { ctx, cancel = context.WithCancel(parent) } diff --git a/exporters/otlp/otlptrace/otlptracegrpc/client.go b/exporters/otlp/otlptrace/otlptracegrpc/client.go index cfbd58f6ab7..8236c995a9c 100644 --- a/exporters/otlp/otlptrace/otlptracegrpc/client.go +++ b/exporters/otlp/otlptrace/otlptracegrpc/client.go @@ -223,7 +223,7 @@ func (c *client) exportContext(parent context.Context) (context.Context, context ) if c.exportTimeout > 0 { - ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout exceeded")) + ctx, cancel = context.WithTimeoutCause(parent, c.exportTimeout, errors.New("exporter export timeout")) } else { ctx, cancel = context.WithCancel(parent) } From 36982d4c1f7f3f4119f7260d532feef12ccde3f7 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 18:24:08 -0700 Subject: [PATCH 7/9] Use context with timeout cause in metric PeriodicReader --- sdk/metric/periodic_reader.go | 6 +++--- sdk/trace/batch_span_processor.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/metric/periodic_reader.go b/sdk/metric/periodic_reader.go index ebb9a0463b3..0a48aed74dd 100644 --- a/sdk/metric/periodic_reader.go +++ b/sdk/metric/periodic_reader.go @@ -202,7 +202,7 @@ func (r *PeriodicReader) aggregation( // collectAndExport gather all metric data related to the periodicReader r from // the SDK and exports it with r's exporter. func (r *PeriodicReader) collectAndExport(ctx context.Context) error { - ctx, cancel := context.WithTimeout(ctx, r.timeout) + ctx, cancel := context.WithTimeoutCause(ctx, r.timeout, errors.New("reader collect and export timeout")) defer cancel() // TODO (#3047): Use a sync.Pool or persistent pointer instead of allocating rm every Collect. @@ -278,7 +278,7 @@ func (r *PeriodicReader) ForceFlush(ctx context.Context) error { // Prioritize the ctx timeout if it is set. if _, ok := ctx.Deadline(); !ok { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, r.timeout) + ctx, cancel = context.WithTimeoutCause(ctx, r.timeout, errors.New("reader force flush timeout")) defer cancel() } @@ -311,7 +311,7 @@ func (r *PeriodicReader) Shutdown(ctx context.Context) error { // Prioritize the ctx timeout if it is set. if _, ok := ctx.Deadline(); !ok { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, r.timeout) + ctx, cancel = context.WithTimeoutCause(ctx, r.timeout, errors.New("reader shutdown timeout")) defer cancel() } diff --git a/sdk/trace/batch_span_processor.go b/sdk/trace/batch_span_processor.go index 241090788c3..6966ed861e6 100644 --- a/sdk/trace/batch_span_processor.go +++ b/sdk/trace/batch_span_processor.go @@ -268,7 +268,7 @@ func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error { if bsp.o.ExportTimeout > 0 { var cancel context.CancelFunc - ctx, cancel = context.WithTimeoutCause(ctx, bsp.o.ExportTimeout, errors.New("processor export timeout exceeded")) + ctx, cancel = context.WithTimeoutCause(ctx, bsp.o.ExportTimeout, errors.New("processor export timeout")) defer cancel() } From ddec035e80faddccb7b72057dc4de3426d70d8ec Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 18:36:13 -0700 Subject: [PATCH 8/9] Use context with timeout cause in log batch processor --- sdk/log/exporter.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/log/exporter.go b/sdk/log/exporter.go index 8cef5dde6b5..a9d3c439ba3 100644 --- a/sdk/log/exporter.go +++ b/sdk/log/exporter.go @@ -119,7 +119,9 @@ func newTimeoutExporter(exp Exporter, timeout time.Duration) Exporter { // Export sets the timeout of ctx before calling the Exporter e wraps. func (e *timeoutExporter) Export(ctx context.Context, records []Record) error { - ctx, cancel := context.WithTimeout(ctx, e.timeout) + // This only used by the batch processor, and it takes processor timeout config. + // Thus, the error message points to the processor. So users know they should adjust the processor timeout. + ctx, cancel := context.WithTimeoutCause(ctx, e.timeout, errors.New("processor export timeout")) defer cancel() return e.Exporter.Export(ctx, records) } From 94c4243fe73018b1cb7698c47ff8a4ca11df94a6 Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 11 Jun 2025 23:27:23 -0700 Subject: [PATCH 9/9] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5855e7d532f..4e6e4744371 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - The semantic conventions have been upgraded from `v1.26.0` to `v1.34.0` in `go.opentelemetry.io/otel/sdk/trace`. (#6835) - The semantic conventions have been upgraded from `v1.26.0` to `v1.34.0` in `go.opentelemetry.io/otel/trace`. (#6836) - `Record.Resource` now returns `*resource.Resource` instead of `resource.Resource` in `go.opentelemetry.io/otel/sdk/log`. (#6864) +- Retry now shows error cause for context timeout in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`, `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`, `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc`, `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`, `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`, `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp`. (#6898) ### Fixed