Skip to content
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
override OpenTelemetry.Trace.TracerProvider.Dispose(bool disposing) -> void
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
override OpenTelemetry.Trace.TracerProvider.Dispose(bool disposing) -> void
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
override OpenTelemetry.Trace.TracerProvider.Dispose(bool disposing) -> void
4 changes: 4 additions & 0 deletions src/OpenTelemetry.Api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
trace was running (`Activity.Current != null`).
([#4890](https://github.com/open-telemetry/opentelemetry-dotnet/pull/4890))

* Added a `Tracer` cache inside of `TracerProvider` to prevent repeated calls to
`GetTracer` from leaking memory.
([#4906](https://github.com/open-telemetry/opentelemetry-dotnet/pull/4906))

## 1.6.0

Released 2023-Sep-05
Expand Down
10 changes: 6 additions & 4 deletions src/OpenTelemetry.Api/Trace/Tracer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ namespace OpenTelemetry.Trace;
/// <remarks>Tracer is a wrapper around <see cref="System.Diagnostics.ActivitySource"/> class.</remarks>
public class Tracer
{
internal readonly ActivitySource ActivitySource;
internal ActivitySource? ActivitySource;

internal Tracer(ActivitySource activitySource)
internal Tracer(ActivitySource? activitySource)
{
this.ActivitySource = activitySource;
}
Expand Down Expand Up @@ -213,7 +213,9 @@ private TelemetrySpan StartSpanHelper(
IEnumerable<Link>? links = null,
DateTimeOffset startTime = default)
{
if (!this.ActivitySource.HasListeners())
var activitySource = this.ActivitySource;

if (!(activitySource?.HasListeners() ?? false))
{
return TelemetrySpan.NoopInstance;
}
Expand All @@ -230,7 +232,7 @@ private TelemetrySpan StartSpanHelper(

try
{
var activity = this.ActivitySource.StartActivity(name, activityKind, parentContext.ActivityContext, initialAttributes?.Attributes ?? null, activityLinks, startTime);
var activity = activitySource.StartActivity(name, activityKind, parentContext.ActivityContext, initialAttributes?.Attributes ?? null, activityLinks, startTime);
return activity == null
? TelemetrySpan.NoopInstance
: new TelemetrySpan(activity);
Expand Down
88 changes: 85 additions & 3 deletions src/OpenTelemetry.Api/Trace/TracerProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

#nullable enable

using System.Diagnostics;
using System.Collections.Concurrent;
#if NET6_0_OR_GREATER
using System.Diagnostics.CodeAnalysis;
#endif

namespace OpenTelemetry.Trace;

Expand All @@ -25,6 +28,8 @@ namespace OpenTelemetry.Trace;
/// </summary>
public class TracerProvider : BaseProvider
{
private ConcurrentDictionary<TracerKey, Tracer>? tracers = new();

/// <summary>
/// Initializes a new instance of the <see cref="TracerProvider"/> class.
/// </summary>
Expand All @@ -43,6 +48,83 @@ protected TracerProvider()
/// <param name="name">Name identifying the instrumentation library.</param>
/// <param name="version">Version of the instrumentation library.</param>
/// <returns>Tracer instance.</returns>
public Tracer GetTracer(string name, string? version = null)
=> new(new ActivitySource(name ?? string.Empty, version));
public Tracer GetTracer(
#if NET6_0_OR_GREATER
[AllowNull]
#endif
string name,
string? version = null)
{
var tracers = this.tracers;
if (tracers == null)
{
// Note: Returns a no-op Tracer once dispose has been called.
return new(activitySource: null);
}

var key = new TracerKey(name, version);

Retry:
if (!tracers.TryGetValue(key, out var tracer))
{
lock (tracers)
{
if (this.tracers == null)
{
// Note: We check here for a race with Dispose and return a
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we need to set this.tracers = null inside the same lock. Else we could still run into a situation where some thread calling Dispose sets this.tracers to null after this if check and before the new entry is added to the dictionary. We would want to return a no-op tracer in that case, but we would end up returning a valid tracer.

Copy link
Member Author

@CodeBlanch CodeBlanch Oct 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked it a couple times. I think it is good! Could be I'm not seeing something though. Can you write out a flow for me that you think is flawed? Here are a couple flows I'm imagining.

Case where Dispose runs in the middle of the writer and gets the lock...

  • Writer thread reads the this.tracers on Line 58. It is valid so it begins its work.
  • Dispose thread sets this.tracers to null.
  • Dispose thread takes the lock.
  • Reader thread misses the cache and tries to take the lock. It has to wait.
  • Dispose thread finishes its clean up and releases the lock.
  • Writer thread gets the lock. Now it checks this.tracers == null. This will be true now and it will return a no-op instance.

Case where Dispose runs in the middle of the writer and waits on the lock...

  • Writer thread reads the this.tracers on Line 58. It is valid so it begins its work.
  • Reader thread misses the cache and takes the lock. Inside the lock it checks this.tracers == null which is false. It begins to do its work.
  • Dispose thread sets this.tracers to null.
  • Dispose thread tries to takes the lock. It has to wait.
  • Writer thread adds a new tracer to the cache and releases the lock. It doesn't care that this.tracers is now actually null because it is working on a local copy.
  • Dispose thread gets the lock and makes all the tracers in the cache no-ops including the one that was just added.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For case 2,

Writer thread adds a new tracer to the cache and releases the lock. It doesn't care that this.tracers is now actually null because it is working on a local copy.

I think this is more of design choice. Yes, it doesn't care that this.tracers is now actually null but it could care about it 😄.

I was thinking we could offer a stronger guarantee that we would never return a Tracer when TracerProvider is disposed or being disposed. We could avoid this limbo state where the Dispose method may or may not have marked the newly returned Tracer no-op when its being used.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I merged the PR because I think what's there will work well enough. I'll circle back to this comment when I have a sec to see if I can simplify it or clean it up in a way that doesn't introduce a bunch of contention.

// no-op Tracer in that case.
return new(activitySource: null);
}

tracer = new(new(key.Name, key.Version));
if (!tracers.TryAdd(key, tracer))
{
// Note: This should really never happen due to the
// outer lock.
tracer.ActivitySource!.Dispose();
goto Retry;
}
}
}

return tracer;
}

/// <inheritdoc/>
protected override void Dispose(bool disposing)
{
if (disposing)
{
var tracers = Interlocked.CompareExchange(ref this.tracers, null, this.tracers);
if (tracers != null)
{
lock (tracers)
{
foreach (var kvp in tracers)
{
var tracer = kvp.Value;
var activitySource = tracer.ActivitySource;
tracer.ActivitySource = null;
activitySource?.Dispose();
}

tracers.Clear();
}
}
}

base.Dispose(disposing);
}

private readonly record struct TracerKey
{
public readonly string Name;
public readonly string? Version;

public TracerKey(string? name, string? version)
{
this.Name = name ?? string.Empty;
this.Version = version;
}
}
}
26 changes: 26 additions & 0 deletions test/OpenTelemetry.Api.Tests/Trace/TracerTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,32 @@ public void CreateSpan_NotSampled()
Assert.False(span.IsRecording);
}

[Fact]
public void TracerBecomesNoopWhenParentProviderIsDisposedTest()
{
TracerProvider provider = null;
Tracer tracer = null;

using (var tracerProvider = Sdk.CreateTracerProviderBuilder()
.AddSource("mytracer")
.Build())
{
provider = tracerProvider;
tracer = tracerProvider.GetTracer("mytracer");

var span1 = tracer.StartSpan("foo");
Assert.True(span1.IsRecording);
}

var span2 = tracer.StartSpan("foo");
Assert.False(span2.IsRecording);

var tracer2 = provider.GetTracer("mytracer");

var span3 = tracer2.StartSpan("foo");
Assert.False(span3.IsRecording);
}

public void Dispose()
{
Activity.Current = null;
Expand Down