Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .vscode/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,16 @@
"filename": "/sdk/ai/azure-ai-voicelive/**",
"words": [
"Dexec",
"filler",
"FILLER",
"foundry",
"FOUNDRY",
"viseme",
"VISEME",
"webrtc",
"WEBRTC"
"WEBRTC",
"xhigh",
"XHIGH"
]
},
{
Expand Down
24 changes: 24 additions & 0 deletions sdk/ai/azure-ai-voicelive/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,35 @@
- Added `startSession(String model, VoiceLiveRequestOptions requestOptions)` for model with custom options
- Added `startSession(VoiceLiveRequestOptions requestOptions)` for custom options without explicit model parameter
- Original `startSession(String model)` and `startSession()` methods preserved for backward compatibility
- Added Foundry Agent tool support:
- `FoundryAgentTool` for integrating Foundry agents as tools in VoiceLive sessions
- `FoundryAgentContextType` enum for configuring agent context (no_context, agent_context)
Comment thread
xitzhang marked this conversation as resolved.
- `ResponseFoundryAgentCallItem` for tracking Foundry agent call responses
- Foundry agent call lifecycle events: `ServerEventResponseFoundryAgentCallArgumentsDelta`, `ServerEventResponseFoundryAgentCallArgumentsDone`, `ServerEventResponseFoundryAgentCallInProgress`, `ServerEventResponseFoundryAgentCallCompleted`, `ServerEventResponseFoundryAgentCallFailed`
- `ItemType.FOUNDRY_AGENT_CALL` and `ToolType.FOUNDRY_AGENT` discriminator values
- Added filler response configuration for handling latency and tool calls:
- `FillerResponseConfigBase` base class for filler response configurations
- `BasicFillerResponseConfig` for static/random text filler responses
- `LlmFillerResponseConfig` for LLM-generated context-aware filler responses
- `FillerResponseConfigType` enum (static_filler, llm_filler)
- `FillerTrigger` enum for trigger conditions (latency, tool)
- Added `fillerResponse` property to `VoiceLiveSessionOptions` and `VoiceLiveSessionResponse`
- Added reasoning effort configuration for reasoning models:
- `ReasoningEffort` enum with levels: none, minimal, low, medium, high, xhigh
- Added `reasoningEffort` property to `VoiceLiveSessionOptions`, `VoiceLiveSessionResponse`, and `ResponseCreateParams`
- Added metadata support:
- Added `metadata` property to `ResponseCreateParams` and `SessionResponse` for attaching key-value pairs
- Added custom text normalization URL support for Azure voices:
- Added `customTextNormalizationUrl` property to `AzureCustomVoice`, `AzurePersonalVoice`, and `AzureStandardVoice`

### Breaking Changes

### Bugs Fixed

- Fixed `OutputAudioFormat` enum values from dash-separated to underscore-separated:
- `pcm16-8000hz` → `pcm16_8000hz`
- `pcm16-16000hz` → `pcm16_16000hz`

### Other Changes

## 1.0.0-beta.3 (2025-12-03)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStringField("type", this.type == null ? null : this.type.toString());
jsonWriter.writeNumberField("temperature", this.temperature);
jsonWriter.writeStringField("custom_lexicon_url", this.customLexiconUri);
jsonWriter.writeStringField("custom_text_normalization_url", this.customTextNormalizationUrl);
jsonWriter.writeArrayField("prefer_locales", this.preferLocales,
(writer, element) -> writer.writeString(element));
jsonWriter.writeStringField("locale", this.locale);
Expand Down Expand Up @@ -341,6 +342,7 @@ public static AzureCustomVoice fromJson(JsonReader jsonReader) throws IOExceptio
AzureVoiceType type = AzureVoiceType.AZURE_CUSTOM;
Double temperature = null;
String customLexiconUri = null;
String customTextNormalizationUrl = null;
List<String> preferLocales = null;
String locale = null;
String style = null;
Expand All @@ -360,6 +362,8 @@ public static AzureCustomVoice fromJson(JsonReader jsonReader) throws IOExceptio
temperature = reader.getNullable(JsonReader::getDouble);
} else if ("custom_lexicon_url".equals(fieldName)) {
customLexiconUri = reader.getString();
} else if ("custom_text_normalization_url".equals(fieldName)) {
customTextNormalizationUrl = reader.getString();
} else if ("prefer_locales".equals(fieldName)) {
preferLocales = reader.readArray(reader1 -> reader1.getString());
} else if ("locale".equals(fieldName)) {
Expand All @@ -380,6 +384,7 @@ public static AzureCustomVoice fromJson(JsonReader jsonReader) throws IOExceptio
deserializedAzureCustomVoice.type = type;
deserializedAzureCustomVoice.temperature = temperature;
deserializedAzureCustomVoice.customLexiconUri = customLexiconUri;
deserializedAzureCustomVoice.customTextNormalizationUrl = customTextNormalizationUrl;
deserializedAzureCustomVoice.preferLocales = preferLocales;
deserializedAzureCustomVoice.locale = locale;
deserializedAzureCustomVoice.style = style;
Expand All @@ -389,4 +394,32 @@ public static AzureCustomVoice fromJson(JsonReader jsonReader) throws IOExceptio
return deserializedAzureCustomVoice;
});
}

/*
* The custom_text_normalization_url property.
*/
@Generated
private String customTextNormalizationUrl;

/**
* Get the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @return the customTextNormalizationUrl value.
*/
@Generated
public String getCustomTextNormalizationUrl() {
return this.customTextNormalizationUrl;
}

/**
* Set the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @param customTextNormalizationUrl the customTextNormalizationUrl value to set.
* @return the AzureCustomVoice object itself.
*/
@Generated
public AzureCustomVoice setCustomTextNormalizationUrl(String customTextNormalizationUrl) {
this.customTextNormalizationUrl = customTextNormalizationUrl;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStringField("type", this.type == null ? null : this.type.toString());
jsonWriter.writeNumberField("temperature", this.temperature);
jsonWriter.writeStringField("custom_lexicon_url", this.customLexiconUrl);
jsonWriter.writeStringField("custom_text_normalization_url", this.customTextNormalizationUrl);
jsonWriter.writeArrayField("prefer_locales", this.preferLocales,
(writer, element) -> writer.writeString(element));
jsonWriter.writeStringField("locale", this.locale);
Expand Down Expand Up @@ -145,6 +146,7 @@ public static AzurePersonalVoice fromJson(JsonReader jsonReader) throws IOExcept
AzureVoiceType type = AzureVoiceType.AZURE_PERSONAL;
Double temperature = null;
String customLexiconUrl = null;
String customTextNormalizationUrl = null;
List<String> preferLocales = null;
String locale = null;
String style = null;
Expand All @@ -164,6 +166,8 @@ public static AzurePersonalVoice fromJson(JsonReader jsonReader) throws IOExcept
temperature = reader.getNullable(JsonReader::getDouble);
} else if ("custom_lexicon_url".equals(fieldName)) {
customLexiconUrl = reader.getString();
} else if ("custom_text_normalization_url".equals(fieldName)) {
customTextNormalizationUrl = reader.getString();
} else if ("prefer_locales".equals(fieldName)) {
preferLocales = reader.readArray(reader1 -> reader1.getString());
} else if ("locale".equals(fieldName)) {
Expand All @@ -184,6 +188,7 @@ public static AzurePersonalVoice fromJson(JsonReader jsonReader) throws IOExcept
deserializedAzurePersonalVoice.type = type;
deserializedAzurePersonalVoice.temperature = temperature;
deserializedAzurePersonalVoice.customLexiconUrl = customLexiconUrl;
deserializedAzurePersonalVoice.customTextNormalizationUrl = customTextNormalizationUrl;
deserializedAzurePersonalVoice.preferLocales = preferLocales;
deserializedAzurePersonalVoice.locale = locale;
deserializedAzurePersonalVoice.style = style;
Expand Down Expand Up @@ -389,4 +394,32 @@ public AzurePersonalVoice setVolume(String volume) {
this.volume = volume;
return this;
}

/*
* The custom_text_normalization_url property.
*/
@Generated
private String customTextNormalizationUrl;

/**
* Get the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @return the customTextNormalizationUrl value.
*/
@Generated
public String getCustomTextNormalizationUrl() {
return this.customTextNormalizationUrl;
}

/**
* Set the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @param customTextNormalizationUrl the customTextNormalizationUrl value to set.
* @return the AzurePersonalVoice object itself.
*/
@Generated
public AzurePersonalVoice setCustomTextNormalizationUrl(String customTextNormalizationUrl) {
this.customTextNormalizationUrl = customTextNormalizationUrl;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStringField("type", this.type == null ? null : this.type.toString());
jsonWriter.writeNumberField("temperature", this.temperature);
jsonWriter.writeStringField("custom_lexicon_url", this.customLexiconUrl);
jsonWriter.writeStringField("custom_text_normalization_url", this.customTextNormalizationUrl);
jsonWriter.writeArrayField("prefer_locales", this.preferLocales,
(writer, element) -> writer.writeString(element));
jsonWriter.writeStringField("locale", this.locale);
Expand All @@ -321,6 +322,7 @@ public static AzureStandardVoice fromJson(JsonReader jsonReader) throws IOExcept
AzureVoiceType type = AzureVoiceType.AZURE_STANDARD;
Double temperature = null;
String customLexiconUrl = null;
String customTextNormalizationUrl = null;
List<String> preferLocales = null;
String locale = null;
String style = null;
Expand All @@ -338,6 +340,8 @@ public static AzureStandardVoice fromJson(JsonReader jsonReader) throws IOExcept
temperature = reader.getNullable(JsonReader::getDouble);
} else if ("custom_lexicon_url".equals(fieldName)) {
customLexiconUrl = reader.getString();
} else if ("custom_text_normalization_url".equals(fieldName)) {
customTextNormalizationUrl = reader.getString();
} else if ("prefer_locales".equals(fieldName)) {
preferLocales = reader.readArray(reader1 -> reader1.getString());
} else if ("locale".equals(fieldName)) {
Expand All @@ -358,6 +362,7 @@ public static AzureStandardVoice fromJson(JsonReader jsonReader) throws IOExcept
deserializedAzureStandardVoice.type = type;
deserializedAzureStandardVoice.temperature = temperature;
deserializedAzureStandardVoice.customLexiconUrl = customLexiconUrl;
deserializedAzureStandardVoice.customTextNormalizationUrl = customTextNormalizationUrl;
deserializedAzureStandardVoice.preferLocales = preferLocales;
deserializedAzureStandardVoice.locale = locale;
deserializedAzureStandardVoice.style = style;
Expand All @@ -367,4 +372,32 @@ public static AzureStandardVoice fromJson(JsonReader jsonReader) throws IOExcept
return deserializedAzureStandardVoice;
});
}

/*
* The custom_text_normalization_url property.
*/
@Generated
private String customTextNormalizationUrl;

/**
* Get the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @return the customTextNormalizationUrl value.
*/
@Generated
public String getCustomTextNormalizationUrl() {
return this.customTextNormalizationUrl;
}

/**
* Set the customTextNormalizationUrl property: The custom_text_normalization_url property.
*
* @param customTextNormalizationUrl the customTextNormalizationUrl value to set.
* @return the AzureStandardVoice object itself.
*/
@Generated
public AzureStandardVoice setCustomTextNormalizationUrl(String customTextNormalizationUrl) {
this.customTextNormalizationUrl = customTextNormalizationUrl;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Code generated by Microsoft (R) TypeSpec Code Generator.
package com.azure.ai.voicelive.models;

import com.azure.core.annotation.Fluent;
import com.azure.core.annotation.Generated;
import com.azure.json.JsonReader;
import com.azure.json.JsonToken;
import com.azure.json.JsonWriter;
import java.io.IOException;
import java.util.List;

/**
* Configuration for basic/static filler response generation.
* Randomly selects from configured texts when any trigger condition is met.
*/
@Fluent
public final class BasicFillerResponseConfig extends FillerResponseConfigBase {

/*
* The type of filler response configuration.
*/
@Generated
private FillerResponseConfigType type = FillerResponseConfigType.STATIC_FILLER;

/*
* List of filler text options to randomly select from.
*/
@Generated
private List<String> texts;

/**
* Creates an instance of BasicFillerResponseConfig class.
*/
@Generated
public BasicFillerResponseConfig() {
}

/**
* Get the type property: The type of filler response configuration.
*
* @return the type value.
*/
@Generated
@Override
public FillerResponseConfigType getType() {
return this.type;
}

/**
* Get the texts property: List of filler text options to randomly select from.
*
* @return the texts value.
*/
@Generated
public List<String> getTexts() {
return this.texts;
}

/**
* Set the texts property: List of filler text options to randomly select from.
*
* @param texts the texts value to set.
* @return the BasicFillerResponseConfig object itself.
*/
@Generated
public BasicFillerResponseConfig setTexts(List<String> texts) {
this.texts = texts;
return this;
}

/**
* {@inheritDoc}
*/
@Generated
@Override
public BasicFillerResponseConfig setTriggers(List<FillerTrigger> triggers) {
super.setTriggers(triggers);
return this;
}

/**
* {@inheritDoc}
*/
@Generated
@Override
public BasicFillerResponseConfig setLatencyThresholdMs(Integer latencyThresholdMs) {
super.setLatencyThresholdMs(latencyThresholdMs);
return this;
}

/**
* {@inheritDoc}
*/
@Generated
@Override
public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
jsonWriter.writeStartObject();
jsonWriter.writeArrayField("triggers", getTriggers(),
(writer, element) -> writer.writeString(element == null ? null : element.toString()));
jsonWriter.writeNumberField("latency_threshold_ms", getLatencyThresholdMs());
jsonWriter.writeStringField("type", this.type == null ? null : this.type.toString());
jsonWriter.writeArrayField("texts", this.texts, (writer, element) -> writer.writeString(element));
return jsonWriter.writeEndObject();
}

/**
* Reads an instance of BasicFillerResponseConfig from the JsonReader.
*
* @param jsonReader The JsonReader being read.
* @return An instance of BasicFillerResponseConfig if the JsonReader was pointing to an instance of it, or null if
* it was pointing to JSON null.
* @throws IOException If an error occurs while reading the BasicFillerResponseConfig.
*/
@Generated
public static BasicFillerResponseConfig fromJson(JsonReader jsonReader) throws IOException {
return jsonReader.readObject(reader -> {
BasicFillerResponseConfig deserializedBasicFillerResponseConfig = new BasicFillerResponseConfig();
while (reader.nextToken() != JsonToken.END_OBJECT) {
String fieldName = reader.getFieldName();
reader.nextToken();
if ("triggers".equals(fieldName)) {
List<FillerTrigger> triggers
= reader.readArray(reader1 -> FillerTrigger.fromString(reader1.getString()));
deserializedBasicFillerResponseConfig.setTriggers(triggers);
} else if ("latency_threshold_ms".equals(fieldName)) {
deserializedBasicFillerResponseConfig.setLatencyThresholdMs(reader.getNullable(JsonReader::getInt));
} else if ("type".equals(fieldName)) {
deserializedBasicFillerResponseConfig.type
= FillerResponseConfigType.fromString(reader.getString());
} else if ("texts".equals(fieldName)) {
List<String> texts = reader.readArray(reader1 -> reader1.getString());
deserializedBasicFillerResponseConfig.texts = texts;
} else {
reader.skipChildren();
}
}
return deserializedBasicFillerResponseConfig;
});
}
}
Loading
Loading