Skip to content

Commit 21e5323

Browse files
committed
implement otel.sdk.span.processor.queue_capacity and otel.sdk.span.processor.spans_processed self-observability metrics
1 parent f7f8890 commit 21e5323

File tree

1 file changed

+56
-10
lines changed

1 file changed

+56
-10
lines changed

sdk/trace/batch_span_processor.go

+56-10
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,16 @@ type batchSpanProcessor struct {
6969
e SpanExporter
7070
o BatchSpanProcessorOptions
7171

72-
queue chan ReadOnlySpan
73-
dropped uint32
74-
callbackRegistration metric.Registration
72+
queue chan ReadOnlySpan
73+
dropped uint32
74+
75+
callbackRegistration metric.Registration
76+
spansProcessedCounter metric.Int64Counter
77+
successAttributes metric.MeasurementOption
78+
alreadyShutdownAttributes metric.MeasurementOption
79+
noExporterAttributes metric.MeasurementOption
80+
notSampledAttributes metric.MeasurementOption
81+
queueFullAttributes metric.MeasurementOption
7582

7683
batch []ReadOnlySpan
7784
batchMutex sync.Mutex
@@ -130,6 +137,15 @@ func NewBatchSpanProcessor(exporter SpanExporter, options ...BatchSpanProcessorO
130137
return bsp
131138
}
132139

140+
var processorID atomic.Uint64
141+
142+
// nextProcessorID returns an identifier for this batch span processor,
143+
// starting with 0 and incrementing by 1 each time it is called.
144+
func nextProcessorID() int64 {
145+
return int64(processorID.Add(1) - 1)
146+
}
147+
148+
// configureSelfObservability configures metrics for the batch span processor.
133149
func (bsp *batchSpanProcessor) configureSelfObservability() {
134150
mp := otel.GetMeterProvider()
135151
if !x.SelfObservability.Enabled() {
@@ -140,23 +156,44 @@ func (bsp *batchSpanProcessor) configureSelfObservability() {
140156
metric.WithInstrumentationVersion(version()),
141157
)
142158

143-
queueSizeCounter, err := meter.Int64ObservableUpDownCounter("otel.sdk.span.processor.queue_size",
159+
queueCapacityUpDownCounter, err := meter.Int64ObservableUpDownCounter("otel.sdk.span.processor.queue_capacity",
160+
metric.WithUnit("{span}"),
161+
metric.WithDescription("The maximum number of spans the queue of a given instance of an SDK span processor can hold."),
162+
)
163+
if err != nil {
164+
otel.Handle(err)
165+
}
166+
queueSizeUpDownCounter, err := meter.Int64ObservableUpDownCounter("otel.sdk.span.processor.queue_size",
144167
metric.WithUnit("{span}"),
145168
metric.WithDescription("The number of spans in the queue of a given instance of an SDK span processor."),
146169
)
147170
if err != nil {
148171
otel.Handle(err)
149172
}
150-
151-
attrsOpt := metric.WithAttributes(
152-
attribute.String("otel.sdk.component.name", fmt.Sprintf("batching_span_processor/%p", bsp)),
173+
bsp.spansProcessedCounter, err = meter.Int64Counter("otel.sdk.span.processor.spans_processed",
174+
metric.WithUnit("{span}"),
175+
metric.WithDescription("The number of spans for which the processing has finished, either successful or failed."),
153176
)
177+
if err != nil {
178+
otel.Handle(err)
179+
}
180+
181+
componentTypeAttr := attribute.String("otel.sdk.component.type", "batching_span_processor")
182+
componentNameAttr := attribute.String("otel.sdk.component.name", fmt.Sprintf("batching_span_processor/%d", nextProcessorID()))
183+
bsp.successAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", ""))
184+
bsp.alreadyShutdownAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", "already_shutdown"))
185+
bsp.noExporterAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", "no_exporter"))
186+
bsp.notSampledAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", "not_sampled"))
187+
bsp.queueFullAttributes = metric.WithAttributes(componentNameAttr, componentTypeAttr, attribute.String("error.type", "queue_full"))
188+
callabckAttributesOpt := metric.WithAttributes(componentNameAttr, componentTypeAttr)
154189
bsp.callbackRegistration, err = meter.RegisterCallback(
155190
func(ctx context.Context, o metric.Observer) error {
156-
o.ObserveInt64(queueSizeCounter, int64(len(bsp.queue)), attrsOpt)
191+
o.ObserveInt64(queueSizeUpDownCounter, int64(len(bsp.queue)), callabckAttributesOpt)
192+
o.ObserveInt64(queueCapacityUpDownCounter, int64(bsp.o.MaxQueueSize), callabckAttributesOpt)
193+
// TODO: can we track the number of spans batched, but not exported?
157194
return nil
158195
},
159-
queueSizeCounter)
196+
queueSizeUpDownCounter, queueCapacityUpDownCounter)
160197
if err != nil {
161198
otel.Handle(err)
162199
}
@@ -167,13 +204,16 @@ func (bsp *batchSpanProcessor) OnStart(parent context.Context, s ReadWriteSpan)
167204

168205
// OnEnd method enqueues a ReadOnlySpan for later processing.
169206
func (bsp *batchSpanProcessor) OnEnd(s ReadOnlySpan) {
207+
ctx := context.Background()
170208
// Do not enqueue spans after Shutdown.
171209
if bsp.stopped.Load() {
210+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.alreadyShutdownAttributes)
172211
return
173212
}
174213

175214
// Do not enqueue spans if we are just going to drop them.
176215
if bsp.e == nil {
216+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.noExporterAttributes)
177217
return
178218
}
179219
bsp.enqueue(s)
@@ -315,6 +355,7 @@ func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error {
315355

316356
if l := len(bsp.batch); l > 0 {
317357
global.Debug("exporting spans", "count", len(bsp.batch), "total_dropped", atomic.LoadUint32(&bsp.dropped))
358+
bsp.spansProcessedCounter.Add(ctx, int64(len(bsp.batch)), bsp.successAttributes)
318359
err := bsp.e.ExportSpans(ctx, bsp.batch)
319360

320361
// A new batch is always created after exporting, even if the batch failed to be exported.
@@ -416,19 +457,23 @@ func (bsp *batchSpanProcessor) enqueue(sd ReadOnlySpan) {
416457

417458
func (bsp *batchSpanProcessor) enqueueBlockOnQueueFull(ctx context.Context, sd ReadOnlySpan) bool {
418459
if !sd.SpanContext().IsSampled() {
460+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.notSampledAttributes)
419461
return false
420462
}
421463

464+
// TODO: Can we track the number of spans blocking on the queue?
422465
select {
423466
case bsp.queue <- sd:
424467
return true
425468
case <-ctx.Done():
469+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.queueFullAttributes)
426470
return false
427471
}
428472
}
429473

430-
func (bsp *batchSpanProcessor) enqueueDrop(_ context.Context, sd ReadOnlySpan) bool {
474+
func (bsp *batchSpanProcessor) enqueueDrop(ctx context.Context, sd ReadOnlySpan) bool {
431475
if !sd.SpanContext().IsSampled() {
476+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.notSampledAttributes)
432477
return false
433478
}
434479

@@ -437,6 +482,7 @@ func (bsp *batchSpanProcessor) enqueueDrop(_ context.Context, sd ReadOnlySpan) b
437482
return true
438483
default:
439484
atomic.AddUint32(&bsp.dropped, 1)
485+
bsp.spansProcessedCounter.Add(ctx, 1, bsp.queueFullAttributes)
440486
}
441487
return false
442488
}

0 commit comments

Comments
 (0)