Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sdk/ai/Azure.AI.VoiceLive/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@
## 1.0.0-beta.2 (Unreleased)

### Features Added
Added overloads for MessageItem creation to accept a single content part.

### Breaking Changes
AudioFormat was split into InputAudioFormat and OutputAudioFormat.
Emotion classes / options dropped.
Eou and TurnDetection classes renamed.
API properties that were duration based are now TimeSpans
Methods to configure session collapsed to ConfigureSession
Renamed ToolChoiceFunctionObjectFunction to ToolChoiceFunctionObject

### Bugs Fixed

Expand Down
28 changes: 13 additions & 15 deletions sdk/ai/Azure.AI.VoiceLive/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,22 +151,22 @@ VoiceLiveSessionOptions sessionOptions = new()
Model = model,
Instructions = "You are a helpful AI assistant. Respond naturally and conversationally.",
Voice = new AzureStandardVoice("en-US-AvaNeural"),
TurnDetection = new ServerVad()
TurnDetection = new AzureSemanticVadTurnDetection()
{
Threshold = 0.5f,
PrefixPaddingMs = 300,
SilenceDurationMs = 500
PrefixPadding = TimeSpan.FromMilliseconds(300),
SilenceDuration = TimeSpan.FromMilliseconds(500)
},
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Ensure modalities include audio
sessionOptions.Modalities.Clear();
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);

// Process events from the session
await foreach (SessionUpdate serverEvent in session.GetUpdatesAsync().ConfigureAwait(false))
Expand Down Expand Up @@ -196,22 +196,20 @@ VoiceLiveSessionOptions sessionOptions = new()
{
Temperature = 0.8f
},
TurnDetection = new AzureSemanticVad()
TurnDetection = new AzureSemanticVadTurnDetection()
{
NegThreshold = 0.3f,
WindowSize = 300,
RemoveFillerWords = true
},
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Ensure modalities include audio
sessionOptions.Modalities.Clear();
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
```

### Function calling example
Expand Down Expand Up @@ -240,8 +238,8 @@ VoiceLiveSessionOptions sessionOptions = new()
Model = model,
Instructions = "You are a weather assistant. Use the get_current_weather function to help users with weather information.",
Voice = new AzureStandardVoice("en-US-AvaNeural"),
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Add the function tool
Expand All @@ -252,7 +250,7 @@ sessionOptions.Modalities.Clear();
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
```

## Troubleshooting
Expand Down
547 changes: 218 additions & 329 deletions sdk/ai/Azure.AI.VoiceLive/api/Azure.AI.VoiceLive.net8.0.cs

Large diffs are not rendered by default.

546 changes: 218 additions & 328 deletions sdk/ai/Azure.AI.VoiceLive/api/Azure.AI.VoiceLive.netstandard2.0.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace Azure.AI.VoiceLive.Samples;
/// This sample now demonstrates some of the new convenience methods added to the VoiceLive SDK:
/// - ClearStreamingAudioAsync() - Clears all input audio currently being streamed
/// - CancelResponseAsync() - Cancels the current response generation (existing method)
/// - ConfigureConversationSessionAsync() - Configures session options (existing method)
/// - ConfigureSessionAsync() - Configures session options (existing method)
///
/// Additional convenience methods available but not shown in this sample:
/// - StartAudioTurnAsync() / EndAudioTurnAsync() / CancelAudioTurnAsync() - Audio turn management
Expand Down Expand Up @@ -123,11 +123,11 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
var azureVoice = new AzureStandardVoice(_voice);

// Create strongly typed turn detection configuration
var turnDetectionConfig = new ServerVad
var turnDetectionConfig = new ServerVadTurnDetection
{
Threshold = 0.5f,
PrefixPaddingMs = 300,
SilenceDurationMs = 500
PrefixPadding = TimeSpan.FromMilliseconds(300),
SilenceDuration = TimeSpan.FromMilliseconds(500)
};

// Create conversation session options
Expand All @@ -137,8 +137,8 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
Model = _model,
Instructions = _instructions,
Voice = azureVoice,
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16,
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16,
TurnDetection = turnDetectionConfig
};

Expand All @@ -147,7 +147,7 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await _session!.ConfigureConversationSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
await _session!.ConfigureSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);

_logger.LogInformation("Session configuration sent");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ This sample now demonstrates some of the new convenience methods added to the Vo

**Used in this sample:**
- `ClearStreamingAudioAsync()` - Clears all input audio currently being streamed
- `ConfigureConversationSessionAsync()` - Configures conversation session options
- `ConfigureSessionAsync()` - Configures conversation session options
- `CancelResponseAsync()` - Cancels the current response generation
- `SendInputAudioAsync()` - Sends audio data to the service

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
var azureVoice = new AzureStandardVoice(_voice);

// Create strongly typed turn detection configuration
var turnDetectionConfig = new ServerVad
var turnDetectionConfig = new ServerVadTurnDetection
{
Threshold = 0.5f,
PrefixPaddingMs = 300,
SilenceDurationMs = 500
PrefixPadding = TimeSpan.FromMilliseconds(300),
SilenceDuration = TimeSpan.FromMilliseconds(500)
};

// Create conversation session options with function tools
Expand All @@ -139,8 +139,8 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
Model = _model,
Instructions = _instructions,
Voice = azureVoice,
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16,
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16,
TurnDetection = turnDetectionConfig
};

Expand All @@ -157,7 +157,7 @@ private async Task SetupSessionAsync(CancellationToken cancellationToken)
sessionOptions.Tools.Add(CreateUpdateShippingAddressTool());


await _session!.ConfigureConversationSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);
await _session!.ConfigureSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false);

_logger.LogInformation("Session configuration sent with {ToolCount} customer service tools", sessionOptions.Tools.Count);
}
Expand Down
28 changes: 13 additions & 15 deletions sdk/ai/Azure.AI.VoiceLive/samples/snippets/BasicUsageSnippets.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@ public async Task BasicVoiceAssistantExample()
Model = model,
Instructions = "You are a helpful AI assistant. Respond naturally and conversationally.",
Voice = new AzureStandardVoice("en-US-AvaNeural"),
TurnDetection = new ServerVad()
TurnDetection = new AzureSemanticVadTurnDetection()
{
Threshold = 0.5f,
PrefixPaddingMs = 300,
SilenceDurationMs = 500
PrefixPadding = TimeSpan.FromMilliseconds(300),
SilenceDuration = TimeSpan.FromMilliseconds(500)
},
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Ensure modalities include audio
sessionOptions.Modalities.Clear();
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);

// Process events from the session
await foreach (SessionUpdate serverEvent in session.GetUpdatesAsync().ConfigureAwait(false))
Expand Down Expand Up @@ -92,22 +92,20 @@ public async Task AdvancedVoiceConfiguration()
{
Temperature = 0.8f
},
TurnDetection = new AzureSemanticVad()
TurnDetection = new AzureSemanticVadTurnDetection()
{
NegThreshold = 0.3f,
WindowSize = 300,
RemoveFillerWords = true
},
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Ensure modalities include audio
sessionOptions.Modalities.Clear();
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
#endregion
}

Expand Down Expand Up @@ -148,8 +146,8 @@ public async Task FunctionCallingExample()
Model = model,
Instructions = "You are a weather assistant. Use the get_current_weather function to help users with weather information.",
Voice = new AzureStandardVoice("en-US-AvaNeural"),
InputAudioFormat = AudioFormat.Pcm16,
OutputAudioFormat = AudioFormat.Pcm16
InputAudioFormat = InputAudioFormat.Pcm16,
OutputAudioFormat = OutputAudioFormat.Pcm16
};

// Add the function tool
Expand All @@ -160,7 +158,7 @@ public async Task FunctionCallingExample()
sessionOptions.Modalities.Add(InputModality.Text);
sessionOptions.Modalities.Add(InputModality.Audio);

await session.ConfigureConversationSessionAsync(sessionOptions).ConfigureAwait(false);
await session.ConfigureSessionAsync(sessionOptions).ConfigureAwait(false);
#endregion
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#nullable disable

using System;
using System.Collections.Generic;

namespace Azure.AI.VoiceLive
{
/// <summary> Configuration for animation outputs including blendshapes, visemes, and emotion metadata. </summary>
public partial class AnimationOptions
{
/// <summary> Interval for emotion detection in milliseconds. If not set, emotion detection is disabled. </summary>
public int? EmotionDetectionIntervalMs { get; set; }

/// <summary> Interval for emotion detection. If not set, emotion detection is disabled. </summary>
public TimeSpan? EmotionDetectionInterval
{
get => EmotionDetectionIntervalMs.HasValue ? TimeSpan.FromMilliseconds(EmotionDetectionIntervalMs.Value) : (TimeSpan?)null;
set => EmotionDetectionIntervalMs = value.HasValue ? (int?)value.Value.TotalMilliseconds : null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#nullable disable

using System;
using System.Collections.Generic;
using System.Linq;

namespace Azure.AI.VoiceLive
{
/// <summary> The AssistantMessageItem. </summary>
public partial class AssistantMessageItem : MessageItem
{
/// <summary> Initializes a new instance of <see cref="AssistantMessageItem"/>. </summary>
/// <param name="content"></param>
/// <exception cref="ArgumentNullException"> <paramref name="content"/> is null. </exception>
public AssistantMessageItem(OutputTextContentPart content) : this(new[] { content }) { }
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#nullable disable

using System;

namespace Azure.AI.VoiceLive
{
/// <summary> Azure semantic end-of-utterance detection (default). </summary>
public partial class AzureSemanticEouDetection
{
/// <summary> Gets or sets the Timeout. </summary>
internal float? TimeoutMs { get; set; }

/// <summary> Gets or sets the Timeout. </summary>
public TimeSpan Timeout
{
get => TimeSpan.FromMilliseconds(TimeoutMs ?? 0);
set => TimeoutMs = (float)value.TotalMilliseconds;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#nullable disable

using System;

namespace Azure.AI.VoiceLive
{
/// <summary> Azure semantic end-of-utterance detection (default). </summary>
public partial class AzureSemanticEouDetectionEn
{
/// <summary> Gets or sets the Timeout. </summary>
internal float? TimeoutMs { get; set; }

/// <summary> Gets or sets the Timeout. </summary>
public TimeSpan Timeout
{
get => TimeSpan.FromMilliseconds(TimeoutMs ?? 0);
set => TimeoutMs = (float)value.TotalMilliseconds;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#nullable disable

using System;

namespace Azure.AI.VoiceLive
{
/// <summary> Azure semantic end-of-utterance detection (default). </summary>
public partial class AzureSemanticEouDetectionMultilingual
{
/// <summary> Gets or sets the Timeout. </summary>
internal float? TimeoutMs { get; set; }

/// <summary> Gets or sets the Timeout. </summary>
public TimeSpan Timeout
{
get => TimeSpan.FromMilliseconds(TimeoutMs ?? 0);
set => TimeoutMs = (float)value.TotalMilliseconds;
}
}
}
Loading