Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -2188,7 +2188,7 @@
},
{
"filename": "sdk/ai/azure-ai-voicelive/**",
"words": ["viseme","VISEME","ulaw","ULAW","logprobs","pyaudio","PyAudio","libasound","webrtc","WEBRTC"]
"words": ["viseme","VISEME","ulaw","ULAW","logprobs","pyaudio","PyAudio","libasound","webrtc","WEBRTC","xhigh", "XHIGH"]
}
],
"allowCompoundWords": true
Expand Down
20 changes: 19 additions & 1 deletion sdk/ai/azure-ai-voicelive/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,31 @@
### Features Added

- **Support for Explicit Null Values**: Enhanced `RequestSession` to properly serialize explicitly set `None` values (e.g., `turn_detection=None` now correctly sends `"turn_detection": null` in the WebSocket message)
- **Filler Response Configuration**: Added support for filler response generation during latency or tool calls:
- `BasicFillerResponseConfig` for static filler texts that are randomly selected
- `LlmFillerResponseConfig` for LLM-generated context-aware filler responses
- `FillerTrigger` enum with `latency` and `tool` triggers
- `filler_response` field in `RequestSession` and `ResponseSession`
- **Foundry Agent Integration**: Added support for Azure AI Foundry agents:
- `FoundryAgentTool` for defining Foundry agent configurations
- `ResponseFoundryAgentCallItem` for Foundry agent call responses
- `FoundryAgentContextType` enum for context management (`no_context`, `agent_context`)
- Server events for Foundry agent call lifecycle: `ServerEventResponseFoundryAgentCallArgumentsDelta`, `ServerEventResponseFoundryAgentCallArgumentsDone`, `ServerEventResponseFoundryAgentCallInProgress`, `ServerEventResponseFoundryAgentCallCompleted`, `ServerEventResponseFoundryAgentCallFailed`
- **Reasoning Effort Control**: Added `reasoning_effort` field to `RequestSession`, `ResponseSession`, and `ResponseCreateParams` for controlling reasoning models effort levels with `ReasoningEffort` enum (`none`, `minimal`, `low`, `medium`, `high`, `xhigh`)
- **Response Metadata**: Added `metadata` field to `Response` and `ResponseCreateParams` for attaching up to 16 key-value pairs (max 64 chars for keys, 512 chars for values)
- **Array Encoding Support**: Enhanced serialization to support pipe, space, comma, and newline-delimited array encoding formats
- **Custom Text Normalization**: Added `custom_text_normalization_url` field to `AzureStandardVoice`, `AzureCustomVoice`, and `AzurePersonalVoice` for custom text normalization configurations

### Other Changes

- **Dependency Update**: Updated minimum `azure-core` version from 1.35.0 to 1.36.0
- **Dependency Update**: Updated minimum `azure-core` version from 1.36.0 to 1.37.0
- **Security Enhancement**: Removed `eval()` usage in serialization utilities, replaced with explicit type checking for improved security
- **Serialization Improvements**: Enhanced model_base deserialization for mutable types and array-encoded strings

### Bug Fixes

- **Audio Format Values**: Fixed `OutputAudioFormat` enum values to use underscore format (`pcm16_8000hz`, `pcm16_16000hz`) instead of hyphenated format for consistency with wire protocol and backward compatibility

## 1.2.0b2 (2025-11-20)

### Features Added
Expand Down
16 changes: 15 additions & 1 deletion sdk/ai/azure-ai-voicelive/apiview-properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
"azure.ai.voicelive.models.AzureSemanticVadMultilingual": "VoiceLive.AzureSemanticVadMultilingual",
"azure.ai.voicelive.models.AzureStandardVoice": "VoiceLive.AzureStandardVoice",
"azure.ai.voicelive.models.Background": "VoiceLive.Background",
"azure.ai.voicelive.models.FillerResponseConfigBase": "VoiceLive.FillerResponseConfigBase",
"azure.ai.voicelive.models.BasicFillerResponseConfig": "VoiceLive.BasicFillerResponseConfig",
"azure.ai.voicelive.models.CachedTokenDetails": "VoiceLive.CachedTokenDetails",
"azure.ai.voicelive.models.ClientEvent": "VoiceLive.ClientEvent",
"azure.ai.voicelive.models.ClientEventConversationItemCreate": "VoiceLive.ClientEventConversationItemCreate",
Expand All @@ -44,15 +46,17 @@
"azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart",
"azure.ai.voicelive.models.ConversationItemBase": "VoiceLive.ConversationItemBase",
"azure.ai.voicelive.models.ErrorResponse": "VoiceLive.ErrorResponse",
"azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
"azure.ai.voicelive.models.FoundryAgentTool": "VoiceLive.FoundryAgentTool",
"azure.ai.voicelive.models.FunctionCallItem": "VoiceLive.FunctionCallItem",
"azure.ai.voicelive.models.FunctionCallOutputItem": "VoiceLive.FunctionCallOutputItem",
"azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
"azure.ai.voicelive.models.FunctionTool": "VoiceLive.FunctionTool",
"azure.ai.voicelive.models.IceServer": "VoiceLive.IceServer",
"azure.ai.voicelive.models.MessageContentPart": "VoiceLive.MessageContentPart",
"azure.ai.voicelive.models.InputAudioContentPart": "VoiceLive.InputAudioContentPart",
"azure.ai.voicelive.models.InputTextContentPart": "VoiceLive.InputTextContentPart",
"azure.ai.voicelive.models.InputTokenDetails": "VoiceLive.InputTokenDetails",
"azure.ai.voicelive.models.LlmFillerResponseConfig": "VoiceLive.LlmFillerResponseConfig",
"azure.ai.voicelive.models.LogProbProperties": "VoiceLive.LogProbProperties",
"azure.ai.voicelive.models.MCPApprovalResponseRequestItem": "VoiceLive.MCPApprovalResponseRequestItem",
"azure.ai.voicelive.models.MCPServer": "VoiceLive.MCPServer",
Expand All @@ -71,6 +75,7 @@
"azure.ai.voicelive.models.ResponseCreateParams": "VoiceLive.ResponseCreateParams",
"azure.ai.voicelive.models.ResponseFailedDetails": "VoiceLive.ResponseFailedDetails",
"azure.ai.voicelive.models.ResponseItem": "VoiceLive.ResponseItem",
"azure.ai.voicelive.models.ResponseFoundryAgentCallItem": "VoiceLive.ResponseFoundryAgentCallItem",
"azure.ai.voicelive.models.ResponseFunctionCallItem": "VoiceLive.ResponseFunctionCallItem",
"azure.ai.voicelive.models.ResponseFunctionCallOutputItem": "VoiceLive.ResponseFunctionCallOutputItem",
"azure.ai.voicelive.models.ResponseIncompleteDetails": "VoiceLive.ResponseIncompleteDetails",
Expand Down Expand Up @@ -112,6 +117,11 @@
"azure.ai.voicelive.models.ServerEventResponseContentPartDone": "VoiceLive.ServerEventResponseContentPartDone",
"azure.ai.voicelive.models.ServerEventResponseCreated": "VoiceLive.ServerEventResponseCreated",
"azure.ai.voicelive.models.ServerEventResponseDone": "VoiceLive.ServerEventResponseDone",
"azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallArgumentsDelta": "VoiceLive.ServerEventResponseFoundryAgentCallArgumentsDelta",
"azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallArgumentsDone": "VoiceLive.ServerEventResponseFoundryAgentCallArgumentsDone",
"azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallCompleted": "VoiceLive.ServerEventResponseFoundryAgentCallCompleted",
"azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallFailed": "VoiceLive.ServerEventResponseFoundryAgentCallFailed",
"azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallInProgress": "VoiceLive.ServerEventResponseFoundryAgentCallInProgress",
"azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
"azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone",
"azure.ai.voicelive.models.ServerEventResponseMcpCallArgumentsDelta": "VoiceLive.ServerEventResponseMcpCallArgumentsDelta",
Expand Down Expand Up @@ -148,7 +158,9 @@
"azure.ai.voicelive.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels",
"azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat",
"azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
"azure.ai.voicelive.models.FoundryAgentContextType": "VoiceLive.FoundryAgentContextType",
"azure.ai.voicelive.models.MCPApprovalType": "VoiceLive.MCPApprovalType",
"azure.ai.voicelive.models.ReasoningEffort": "VoiceLive.ReasoningEffort",
"azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType",
"azure.ai.voicelive.models.InputAudioFormat": "VoiceLive.InputAudioFormat",
"azure.ai.voicelive.models.TurnDetectionType": "VoiceLive.TurnDetectionType",
Expand All @@ -158,6 +170,8 @@
"azure.ai.voicelive.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol",
"azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType",
"azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral",
"azure.ai.voicelive.models.FillerResponseConfigType": "VoiceLive.FillerResponseConfigType",
"azure.ai.voicelive.models.FillerTrigger": "VoiceLive.FillerTrigger",
"azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus",
"azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
"azure.ai.voicelive.models.RequestImageContentPartDetail": "VoiceLive.RequestImageContentPartDetail",
Expand Down
1 change: 1 addition & 0 deletions sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from . import models as _models
Voice = Union[str, "_models.OpenAIVoiceName", "_models.OpenAIVoice", "_models.AzureVoice"]
ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceSelection"]
FillerResponseConfig = Union["_models.BasicFillerResponseConfig", "_models.LlmFillerResponseConfig"]
110 changes: 100 additions & 10 deletions sdk/ai/azure-ai-voicelive/azure/ai/voicelive/_utils/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

TZ_UTC = timezone.utc
_T = typing.TypeVar("_T")
_NONE_TYPE = type(None)


def _timedelta_as_isostr(td: timedelta) -> str:
Expand Down Expand Up @@ -171,6 +172,21 @@ def default(self, o): # pylint: disable=too-many-return-statements
r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{4}\s\d{2}:\d{2}:\d{2}\sGMT"
)

_ARRAY_ENCODE_MAPPING = {
"pipeDelimited": "|",
"spaceDelimited": " ",
"commaDelimited": ",",
"newlineDelimited": "\n",
}


def _deserialize_array_encoded(delimit: str, attr):
if isinstance(attr, str):
if attr == "":
return []
return attr.split(delimit)
return attr


def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
"""Deserialize ISO-8601 formatted string into Datetime object.
Expand Down Expand Up @@ -202,7 +218,7 @@ def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
test_utc = date_obj.utctimetuple()
if test_utc.tm_year > 9999 or test_utc.tm_year < 1:
raise OverflowError("Hit max or min date")
return date_obj
return date_obj # type: ignore[no-any-return]


def _deserialize_datetime_rfc7231(attr: typing.Union[str, datetime]) -> datetime:
Expand Down Expand Up @@ -256,7 +272,7 @@ def _deserialize_time(attr: typing.Union[str, time]) -> time:
"""
if isinstance(attr, time):
return attr
return isodate.parse_time(attr)
return isodate.parse_time(attr) # type: ignore[no-any-return]


def _deserialize_bytes(attr):
Expand Down Expand Up @@ -315,6 +331,8 @@ def _deserialize_int_as_str(attr):
def get_deserializer(annotation: typing.Any, rf: typing.Optional["_RestField"] = None):
if annotation is int and rf and rf._format == "str":
return _deserialize_int_as_str
if annotation is str and rf and rf._format in _ARRAY_ENCODE_MAPPING:
return functools.partial(_deserialize_array_encoded, _ARRAY_ENCODE_MAPPING[rf._format])
if rf and rf._format:
return _DESERIALIZE_MAPPING_WITHFORMAT.get(rf._format)
return _DESERIALIZE_MAPPING.get(annotation) # pyright: ignore
Expand Down Expand Up @@ -353,9 +371,39 @@ def __contains__(self, key: typing.Any) -> bool:
return key in self._data

def __getitem__(self, key: str) -> typing.Any:
# If this key has been deserialized (for mutable types), we need to handle serialization
if hasattr(self, "_attr_to_rest_field"):
cache_attr = f"_deserialized_{key}"
if hasattr(self, cache_attr):
rf = _get_rest_field(getattr(self, "_attr_to_rest_field"), key)
if rf:
value = self._data.get(key)
if isinstance(value, (dict, list, set)):
# For mutable types, serialize and return
# But also update _data with serialized form and clear flag
# so mutations via this returned value affect _data
serialized = _serialize(value, rf._format)
# If serialized form is same type (no transformation needed),
# return _data directly so mutations work
if isinstance(serialized, type(value)) and serialized == value:
return self._data.get(key)
# Otherwise return serialized copy and clear flag
try:
object.__delattr__(self, cache_attr)
except AttributeError:
pass
# Store serialized form back
self._data[key] = serialized
return serialized
return self._data.__getitem__(key)

def __setitem__(self, key: str, value: typing.Any) -> None:
# Clear any cached deserialized value when setting through dictionary access
cache_attr = f"_deserialized_{key}"
try:
object.__delattr__(self, cache_attr)
except AttributeError:
pass
self._data.__setitem__(key, value)

def __delitem__(self, key: str) -> None:
Expand Down Expand Up @@ -483,6 +531,8 @@ def _is_model(obj: typing.Any) -> bool:

def _serialize(o, format: typing.Optional[str] = None): # pylint: disable=too-many-return-statements
if isinstance(o, list):
if format in _ARRAY_ENCODE_MAPPING and all(isinstance(x, str) for x in o):
return _ARRAY_ENCODE_MAPPING[format].join(o)
return [_serialize(x, format) for x in o]
if isinstance(o, dict):
return {k: _serialize(v, format) for k, v in o.items()}
Expand Down Expand Up @@ -767,6 +817,17 @@ def _deserialize_sequence(
return obj
if isinstance(obj, ET.Element):
obj = list(obj)
try:
if (
isinstance(obj, str)
and isinstance(deserializer, functools.partial)
and isinstance(deserializer.args[0], functools.partial)
and deserializer.args[0].func == _deserialize_array_encoded # pylint: disable=comparison-with-callable
):
# encoded string may be deserialized to sequence
return deserializer(obj)
except: # pylint: disable=bare-except
Comment thread
xitzhang marked this conversation as resolved.
pass
return type(obj)(_deserialize(deserializer, entry, module) for entry in obj)


Expand Down Expand Up @@ -817,16 +878,16 @@ def _get_deserialize_callable_from_annotation( # pylint: disable=too-many-retur

# is it optional?
try:
if any(a for a in annotation.__args__ if a == type(None)): # pyright: ignore
if any(a is _NONE_TYPE for a in annotation.__args__): # pyright: ignore
if len(annotation.__args__) <= 2: # pyright: ignore
if_obj_deserializer = _get_deserialize_callable_from_annotation(
next(a for a in annotation.__args__ if a != type(None)), module, rf # pyright: ignore
next(a for a in annotation.__args__ if a is not _NONE_TYPE), module, rf # pyright: ignore
)

return functools.partial(_deserialize_with_optional, if_obj_deserializer)
# the type is Optional[Union[...]], we need to remove the None type from the Union
annotation_copy = copy.copy(annotation)
annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a != type(None)] # pyright: ignore
annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a is not _NONE_TYPE] # pyright: ignore
return _get_deserialize_callable_from_annotation(annotation_copy, module, rf)
except AttributeError:
pass
Expand Down Expand Up @@ -998,7 +1059,11 @@ def __init__(

@property
def _class_type(self) -> typing.Any:
return getattr(self._type, "args", [None])[0]
result = getattr(self._type, "args", [None])[0]
# type may be wrapped by nested functools.partial so we need to check for that
if isinstance(result, functools.partial):
return getattr(result, "args", [None])[0]
return result

@property
def _rest_name(self) -> str:
Expand All @@ -1009,14 +1074,37 @@ def _rest_name(self) -> str:
def __get__(self, obj: Model, type=None): # pylint: disable=redefined-builtin
# by this point, type and rest_name will have a value bc we default
# them in __new__ of the Model class
item = obj.get(self._rest_name)
# Use _data.get() directly to avoid triggering __getitem__ which clears the cache
item = obj._data.get(self._rest_name)
if item is None:
return item
if self._is_model:
return item
return _deserialize(self._type, _serialize(item, self._format), rf=self)

# For mutable types, we want mutations to directly affect _data
# Check if we've already deserialized this value
cache_attr = f"_deserialized_{self._rest_name}"
if hasattr(obj, cache_attr):
# Return the value from _data directly (it's been deserialized in place)
return obj._data.get(self._rest_name)

deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self)

# For mutable types, store the deserialized value back in _data
# so mutations directly affect _data
if isinstance(deserialized, (dict, list, set)):
obj._data[self._rest_name] = deserialized
object.__setattr__(obj, cache_attr, True) # Mark as deserialized
return deserialized

return deserialized

def __set__(self, obj: Model, value) -> None:
# Clear the cached deserialized object when setting a new value
cache_attr = f"_deserialized_{self._rest_name}"
if hasattr(obj, cache_attr):
object.__delattr__(obj, cache_attr)

if value is None:
# we want to wipe out entries if users set attr to None
try:
Expand Down Expand Up @@ -1184,7 +1272,7 @@ def _get_wrapped_element(
_get_element(v, exclude_readonly, meta, wrapped_element)
else:
wrapped_element.text = _get_primitive_type_value(v)
return wrapped_element
return wrapped_element # type: ignore[no-any-return]


def _get_primitive_type_value(v) -> str:
Expand All @@ -1197,7 +1285,9 @@ def _get_primitive_type_value(v) -> str:
return str(v)


def _create_xml_element(tag, prefix=None, ns=None):
def _create_xml_element(
tag: typing.Any, prefix: typing.Optional[str] = None, ns: typing.Optional[str] = None
) -> ET.Element:
if prefix and ns:
ET.register_namespace(prefix, ns)
if ns:
Expand Down
Loading