Skip to content

Commit 76737b4

Browse files
authored
Add OpenTelemetrySpeechToTextClient and friends (#6845)
This is basically the chat client OpenTelemetry client copy/pasted/tweaked to compile. The otel spec doesn't have anything specific to this modality yet, so this is making best guesses on what things should be and also being minimal in what's tracked.
1 parent c378af0 commit 76737b4

File tree

5 files changed

+562
-3
lines changed

5 files changed

+562
-3
lines changed

src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClientBuilderExtensions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace Microsoft.Extensions.AI;
1414
[Experimental("MEAI001")]
1515
public static class LoggingSpeechToTextClientBuilderExtensions
1616
{
17-
/// <summary>Adds logging to the audio transcription client pipeline.</summary>
17+
/// <summary>Adds logging to the speech-to-text client pipeline.</summary>
1818
/// <param name="builder">The <see cref="SpeechToTextClientBuilder"/>.</param>
1919
/// <param name="loggerFactory">
2020
/// An optional <see cref="ILoggerFactory"/> used to create a logger with which logging should be performed.
Lines changed: 367 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Diagnostics;
7+
using System.Diagnostics.CodeAnalysis;
8+
using System.Diagnostics.Metrics;
9+
using System.IO;
10+
using System.Runtime.CompilerServices;
11+
using System.Threading;
12+
using System.Threading.Tasks;
13+
using Microsoft.Extensions.Logging;
14+
using Microsoft.Shared.Diagnostics;
15+
16+
#pragma warning disable S3358 // Ternary operators should not be nested
17+
#pragma warning disable SA1111 // Closing parenthesis should be on line of last parameter
18+
#pragma warning disable SA1113 // Comma should be on the same line as previous parameter
19+
20+
namespace Microsoft.Extensions.AI;
21+
22+
/// <summary>Represents a delegating speech-to-text client that implements the OpenTelemetry Semantic Conventions for Generative AI systems.</summary>
23+
/// <remarks>
24+
/// This class provides an implementation of the Semantic Conventions for Generative AI systems v1.37, defined at <see href="https://opentelemetry.io/docs/specs/semconv/gen-ai/" />.
25+
/// The specification is still experimental and subject to change; as such, the telemetry output by this client is also subject to change.
26+
/// </remarks>
27+
[Experimental("MEAI001")]
28+
public sealed class OpenTelemetrySpeechToTextClient : DelegatingSpeechToTextClient
29+
{
30+
private readonly ActivitySource _activitySource;
31+
private readonly Meter _meter;
32+
33+
private readonly Histogram<int> _tokenUsageHistogram;
34+
private readonly Histogram<double> _operationDurationHistogram;
35+
36+
private readonly string? _defaultModelId;
37+
private readonly string? _providerName;
38+
private readonly string? _serverAddress;
39+
private readonly int _serverPort;
40+
41+
/// <summary>Initializes a new instance of the <see cref="OpenTelemetrySpeechToTextClient"/> class.</summary>
42+
/// <param name="innerClient">The underlying <see cref="ISpeechToTextClient"/>.</param>
43+
/// <param name="logger">The <see cref="ILogger"/> to use for emitting any logging data from the client.</param>
44+
/// <param name="sourceName">An optional source name that will be used on the telemetry data.</param>
45+
#pragma warning disable IDE0060 // Remove unused parameter; it exists for consistency with IChatClient and future use
46+
public OpenTelemetrySpeechToTextClient(ISpeechToTextClient innerClient, ILogger? logger = null, string? sourceName = null)
47+
#pragma warning restore IDE0060
48+
: base(innerClient)
49+
{
50+
Debug.Assert(innerClient is not null, "Should have been validated by the base ctor");
51+
52+
if (innerClient!.GetService<SpeechToTextClientMetadata>() is SpeechToTextClientMetadata metadata)
53+
{
54+
_defaultModelId = metadata.DefaultModelId;
55+
_providerName = metadata.ProviderName;
56+
_serverAddress = metadata.ProviderUri?.Host;
57+
_serverPort = metadata.ProviderUri?.Port ?? 0;
58+
}
59+
60+
string name = string.IsNullOrEmpty(sourceName) ? OpenTelemetryConsts.DefaultSourceName : sourceName!;
61+
_activitySource = new(name);
62+
_meter = new(name);
63+
64+
_tokenUsageHistogram = _meter.CreateHistogram<int>(
65+
OpenTelemetryConsts.GenAI.Client.TokenUsage.Name,
66+
OpenTelemetryConsts.TokensUnit,
67+
OpenTelemetryConsts.GenAI.Client.TokenUsage.Description
68+
#if NET9_0_OR_GREATER
69+
, advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.TokenUsage.ExplicitBucketBoundaries }
70+
#endif
71+
);
72+
73+
_operationDurationHistogram = _meter.CreateHistogram<double>(
74+
OpenTelemetryConsts.GenAI.Client.OperationDuration.Name,
75+
OpenTelemetryConsts.SecondsUnit,
76+
OpenTelemetryConsts.GenAI.Client.OperationDuration.Description
77+
#if NET9_0_OR_GREATER
78+
, advice: new() { HistogramBucketBoundaries = OpenTelemetryConsts.GenAI.Client.OperationDuration.ExplicitBucketBoundaries }
79+
#endif
80+
);
81+
}
82+
83+
/// <inheritdoc/>
84+
protected override void Dispose(bool disposing)
85+
{
86+
if (disposing)
87+
{
88+
_activitySource.Dispose();
89+
_meter.Dispose();
90+
}
91+
92+
base.Dispose(disposing);
93+
}
94+
95+
/// <summary>
96+
/// Gets or sets a value indicating whether potentially sensitive information should be included in telemetry.
97+
/// </summary>
98+
/// <value>
99+
/// <see langword="true"/> if potentially sensitive information should be included in telemetry;
100+
/// <see langword="false"/> if telemetry shouldn't include raw inputs and outputs.
101+
/// The default value is <see langword="false"/>, unless the <c>OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT</c>
102+
/// environment variable is set to "true" (case-insensitive).
103+
/// </value>
104+
/// <remarks>
105+
/// By default, telemetry includes metadata, such as token counts, but not raw inputs
106+
/// and outputs, such as message content, function call arguments, and function call results.
107+
/// The default value can be overridden by setting the <c>OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT</c>
108+
/// environment variable to "true". Explicitly setting this property will override the environment variable.
109+
/// </remarks>
110+
public bool EnableSensitiveData { get; set; } = TelemetryHelpers.EnableSensitiveDataDefault;
111+
112+
/// <inheritdoc/>
113+
public override object? GetService(Type serviceType, object? serviceKey = null) =>
114+
serviceType == typeof(ActivitySource) ? _activitySource :
115+
base.GetService(serviceType, serviceKey);
116+
117+
/// <inheritdoc/>
118+
public override async Task<SpeechToTextResponse> GetTextAsync(Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
119+
{
120+
_ = Throw.IfNull(audioSpeechStream);
121+
122+
using Activity? activity = CreateAndConfigureActivity(options);
123+
Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null;
124+
string? requestModelId = options?.ModelId ?? _defaultModelId;
125+
126+
SpeechToTextResponse? response = null;
127+
Exception? error = null;
128+
try
129+
{
130+
response = await base.GetTextAsync(audioSpeechStream, options, cancellationToken);
131+
return response;
132+
}
133+
catch (Exception ex)
134+
{
135+
error = ex;
136+
throw;
137+
}
138+
finally
139+
{
140+
TraceResponse(activity, requestModelId, response, error, stopwatch);
141+
}
142+
}
143+
144+
/// <inheritdoc/>
145+
public override async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
146+
Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
147+
{
148+
_ = Throw.IfNull(audioSpeechStream);
149+
150+
using Activity? activity = CreateAndConfigureActivity(options);
151+
Stopwatch? stopwatch = _operationDurationHistogram.Enabled ? Stopwatch.StartNew() : null;
152+
string? requestModelId = options?.ModelId ?? _defaultModelId;
153+
154+
IAsyncEnumerable<SpeechToTextResponseUpdate> updates;
155+
try
156+
{
157+
updates = base.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken);
158+
}
159+
catch (Exception ex)
160+
{
161+
TraceResponse(activity, requestModelId, response: null, ex, stopwatch);
162+
throw;
163+
}
164+
165+
var responseEnumerator = updates.GetAsyncEnumerator(cancellationToken);
166+
List<SpeechToTextResponseUpdate> trackedUpdates = [];
167+
Exception? error = null;
168+
try
169+
{
170+
while (true)
171+
{
172+
SpeechToTextResponseUpdate update;
173+
try
174+
{
175+
if (!await responseEnumerator.MoveNextAsync())
176+
{
177+
break;
178+
}
179+
180+
update = responseEnumerator.Current;
181+
}
182+
catch (Exception ex)
183+
{
184+
error = ex;
185+
throw;
186+
}
187+
188+
trackedUpdates.Add(update);
189+
yield return update;
190+
Activity.Current = activity; // workaround for https://github.com/dotnet/runtime/issues/47802
191+
}
192+
}
193+
finally
194+
{
195+
TraceResponse(activity, requestModelId, trackedUpdates.ToSpeechToTextResponse(), error, stopwatch);
196+
197+
await responseEnumerator.DisposeAsync();
198+
}
199+
}
200+
201+
/// <summary>Creates an activity for a speech-to-text request, or returns <see langword="null"/> if not enabled.</summary>
202+
private Activity? CreateAndConfigureActivity(SpeechToTextOptions? options)
203+
{
204+
Activity? activity = null;
205+
if (_activitySource.HasListeners())
206+
{
207+
string? modelId = options?.ModelId ?? _defaultModelId;
208+
209+
activity = _activitySource.StartActivity(
210+
string.IsNullOrWhiteSpace(modelId) ? OpenTelemetryConsts.GenAI.GenerateContentName : $"{OpenTelemetryConsts.GenAI.GenerateContentName} {modelId}",
211+
ActivityKind.Client);
212+
213+
if (activity is { IsAllDataRequested: true })
214+
{
215+
_ = activity
216+
.AddTag(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName)
217+
.AddTag(OpenTelemetryConsts.GenAI.Request.Model, modelId)
218+
.AddTag(OpenTelemetryConsts.GenAI.Provider.Name, _providerName)
219+
.AddTag(OpenTelemetryConsts.GenAI.Output.Type, OpenTelemetryConsts.TypeText);
220+
221+
if (_serverAddress is not null)
222+
{
223+
_ = activity
224+
.AddTag(OpenTelemetryConsts.Server.Address, _serverAddress)
225+
.AddTag(OpenTelemetryConsts.Server.Port, _serverPort);
226+
}
227+
228+
if (options is not null)
229+
{
230+
if (EnableSensitiveData)
231+
{
232+
// Log all additional request options as raw values on the span.
233+
// Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data.
234+
if (options.AdditionalProperties is { } props)
235+
{
236+
foreach (KeyValuePair<string, object?> prop in props)
237+
{
238+
_ = activity.AddTag(prop.Key, prop.Value);
239+
}
240+
}
241+
}
242+
}
243+
}
244+
}
245+
246+
return activity;
247+
}
248+
249+
/// <summary>Adds speech-to-text response information to the activity.</summary>
250+
private void TraceResponse(
251+
Activity? activity,
252+
string? requestModelId,
253+
SpeechToTextResponse? response,
254+
Exception? error,
255+
Stopwatch? stopwatch)
256+
{
257+
if (_operationDurationHistogram.Enabled && stopwatch is not null)
258+
{
259+
TagList tags = default;
260+
261+
AddMetricTags(ref tags, requestModelId, response);
262+
if (error is not null)
263+
{
264+
tags.Add(OpenTelemetryConsts.Error.Type, error.GetType().FullName);
265+
}
266+
267+
_operationDurationHistogram.Record(stopwatch.Elapsed.TotalSeconds, tags);
268+
}
269+
270+
if (_tokenUsageHistogram.Enabled && response?.Usage is { } usage)
271+
{
272+
if (usage.InputTokenCount is long inputTokens)
273+
{
274+
TagList tags = default;
275+
tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeInput);
276+
AddMetricTags(ref tags, requestModelId, response);
277+
_tokenUsageHistogram.Record((int)inputTokens, tags);
278+
}
279+
280+
if (usage.OutputTokenCount is long outputTokens)
281+
{
282+
TagList tags = default;
283+
tags.Add(OpenTelemetryConsts.GenAI.Token.Type, OpenTelemetryConsts.TokenTypeOutput);
284+
AddMetricTags(ref tags, requestModelId, response);
285+
_tokenUsageHistogram.Record((int)outputTokens, tags);
286+
}
287+
}
288+
289+
if (error is not null)
290+
{
291+
_ = activity?
292+
.AddTag(OpenTelemetryConsts.Error.Type, error.GetType().FullName)
293+
.SetStatus(ActivityStatusCode.Error, error.Message);
294+
}
295+
296+
if (response is not null)
297+
{
298+
AddOutputMessagesTags(response, activity);
299+
300+
if (activity is not null)
301+
{
302+
if (!string.IsNullOrWhiteSpace(response.ResponseId))
303+
{
304+
_ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Id, response.ResponseId);
305+
}
306+
307+
if (response.ModelId is not null)
308+
{
309+
_ = activity.AddTag(OpenTelemetryConsts.GenAI.Response.Model, response.ModelId);
310+
}
311+
312+
if (response.Usage?.InputTokenCount is long inputTokens)
313+
{
314+
_ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.InputTokens, (int)inputTokens);
315+
}
316+
317+
if (response.Usage?.OutputTokenCount is long outputTokens)
318+
{
319+
_ = activity.AddTag(OpenTelemetryConsts.GenAI.Usage.OutputTokens, (int)outputTokens);
320+
}
321+
322+
// Log all additional response properties as raw values on the span.
323+
// Since AdditionalProperties has undefined meaning, we treat it as potentially sensitive data.
324+
if (EnableSensitiveData && response.AdditionalProperties is { } props)
325+
{
326+
foreach (KeyValuePair<string, object?> prop in props)
327+
{
328+
_ = activity.AddTag(prop.Key, prop.Value);
329+
}
330+
}
331+
}
332+
}
333+
334+
void AddMetricTags(ref TagList tags, string? requestModelId, SpeechToTextResponse? response)
335+
{
336+
tags.Add(OpenTelemetryConsts.GenAI.Operation.Name, OpenTelemetryConsts.GenAI.GenerateContentName);
337+
338+
if (requestModelId is not null)
339+
{
340+
tags.Add(OpenTelemetryConsts.GenAI.Request.Model, requestModelId);
341+
}
342+
343+
tags.Add(OpenTelemetryConsts.GenAI.Provider.Name, _providerName);
344+
345+
if (_serverAddress is string endpointAddress)
346+
{
347+
tags.Add(OpenTelemetryConsts.Server.Address, endpointAddress);
348+
tags.Add(OpenTelemetryConsts.Server.Port, _serverPort);
349+
}
350+
351+
if (response?.ModelId is string responseModel)
352+
{
353+
tags.Add(OpenTelemetryConsts.GenAI.Response.Model, responseModel);
354+
}
355+
}
356+
}
357+
358+
private void AddOutputMessagesTags(SpeechToTextResponse response, Activity? activity)
359+
{
360+
if (EnableSensitiveData && activity is { IsAllDataRequested: true })
361+
{
362+
_ = activity.AddTag(
363+
OpenTelemetryConsts.GenAI.Output.Messages,
364+
OpenTelemetryChatClient.SerializeChatMessages([new(ChatRole.Assistant, response.Contents)]));
365+
}
366+
}
367+
}

0 commit comments

Comments
 (0)