Azure · xitzhang · Jan 30, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/.vscode/cspell.json b/.vscode/cspell.json
@@ -2188,7 +2188,7 @@
     },
     {
       "filename": "sdk/ai/azure-ai-voicelive/**",
-      "words": ["viseme","VISEME","ulaw","ULAW","logprobs","pyaudio","PyAudio","libasound","webrtc","WEBRTC"]
+      "words": ["viseme","VISEME","ulaw","ULAW","logprobs","pyaudio","PyAudio","libasound","webrtc","WEBRTC","xhigh", "XHIGH"]
     }
   ],
   "allowCompoundWords": true

@@ -5,13 +5,31 @@
 ### Features Added
 
 - **Support for Explicit Null Values**: Enhanced `RequestSession` to properly serialize explicitly set `None` values (e.g., `turn_detection=None` now correctly sends `"turn_detection": null` in the WebSocket message)
+- **Filler Response Configuration**: Added support for filler response generation during latency or tool calls:
+  - `BasicFillerResponseConfig` for static filler texts that are randomly selected
+  - `LlmFillerResponseConfig` for LLM-generated context-aware filler responses
+  - `FillerTrigger` enum with `latency` and `tool` triggers
+  - `filler_response` field in `RequestSession` and `ResponseSession`
+- **Foundry Agent Integration**: Added support for Azure AI Foundry agents:
+  - `FoundryAgentTool` for defining Foundry agent configurations
+  - `ResponseFoundryAgentCallItem` for Foundry agent call responses
+  - `FoundryAgentContextType` enum for context management (`no_context`, `agent_context`)
+  - Server events for Foundry agent call lifecycle: `ServerEventResponseFoundryAgentCallArgumentsDelta`, `ServerEventResponseFoundryAgentCallArgumentsDone`, `ServerEventResponseFoundryAgentCallInProgress`, `ServerEventResponseFoundryAgentCallCompleted`, `ServerEventResponseFoundryAgentCallFailed`
+- **Reasoning Effort Control**: Added `reasoning_effort` field to `RequestSession`, `ResponseSession`, and `ResponseCreateParams` for controlling reasoning models effort levels with `ReasoningEffort` enum (`none`, `minimal`, `low`, `medium`, `high`, `xhigh`)
+- **Response Metadata**: Added `metadata` field to `Response` and `ResponseCreateParams` for attaching up to 16 key-value pairs (max 64 chars for keys, 512 chars for values)
+- **Array Encoding Support**: Enhanced serialization to support pipe, space, comma, and newline-delimited array encoding formats
+- **Custom Text Normalization**: Added `custom_text_normalization_url` field to `AzureStandardVoice`, `AzureCustomVoice`, and `AzurePersonalVoice` for custom text normalization configurations
 
 ### Other Changes
 
-- **Dependency Update**: Updated minimum `azure-core` version from 1.35.0 to 1.36.0
+- **Dependency Update**: Updated minimum `azure-core` version from 1.36.0 to 1.37.0
+- **Security Enhancement**: Removed `eval()` usage in serialization utilities, replaced with explicit type checking for improved security
+- **Serialization Improvements**: Enhanced model_base deserialization for mutable types and array-encoded strings
 
 ### Bug Fixes
 
+- **Audio Format Values**: Fixed `OutputAudioFormat` enum values to use underscore format (`pcm16_8000hz`, `pcm16_16000hz`) instead of hyphenated format for consistency with wire protocol and backward compatibility
+
 ## 1.2.0b2 (2025-11-20)
 
 ### Features Added

@@ -23,6 +23,8 @@
         "azure.ai.voicelive.models.AzureSemanticVadMultilingual": "VoiceLive.AzureSemanticVadMultilingual",
         "azure.ai.voicelive.models.AzureStandardVoice": "VoiceLive.AzureStandardVoice",
         "azure.ai.voicelive.models.Background": "VoiceLive.Background",
+        "azure.ai.voicelive.models.FillerResponseConfigBase": "VoiceLive.FillerResponseConfigBase",
+        "azure.ai.voicelive.models.BasicFillerResponseConfig": "VoiceLive.BasicFillerResponseConfig",
         "azure.ai.voicelive.models.CachedTokenDetails": "VoiceLive.CachedTokenDetails",
         "azure.ai.voicelive.models.ClientEvent": "VoiceLive.ClientEvent",
         "azure.ai.voicelive.models.ClientEventConversationItemCreate": "VoiceLive.ClientEventConversationItemCreate",
@@ -44,15 +46,17 @@
         "azure.ai.voicelive.models.ContentPart": "VoiceLive.ContentPart",
         "azure.ai.voicelive.models.ConversationItemBase": "VoiceLive.ConversationItemBase",
         "azure.ai.voicelive.models.ErrorResponse": "VoiceLive.ErrorResponse",
+        "azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
+        "azure.ai.voicelive.models.FoundryAgentTool": "VoiceLive.FoundryAgentTool",
         "azure.ai.voicelive.models.FunctionCallItem": "VoiceLive.FunctionCallItem",
         "azure.ai.voicelive.models.FunctionCallOutputItem": "VoiceLive.FunctionCallOutputItem",
-        "azure.ai.voicelive.models.Tool": "VoiceLive.Tool",
         "azure.ai.voicelive.models.FunctionTool": "VoiceLive.FunctionTool",
         "azure.ai.voicelive.models.IceServer": "VoiceLive.IceServer",
         "azure.ai.voicelive.models.MessageContentPart": "VoiceLive.MessageContentPart",
         "azure.ai.voicelive.models.InputAudioContentPart": "VoiceLive.InputAudioContentPart",
         "azure.ai.voicelive.models.InputTextContentPart": "VoiceLive.InputTextContentPart",
         "azure.ai.voicelive.models.InputTokenDetails": "VoiceLive.InputTokenDetails",
+        "azure.ai.voicelive.models.LlmFillerResponseConfig": "VoiceLive.LlmFillerResponseConfig",
         "azure.ai.voicelive.models.LogProbProperties": "VoiceLive.LogProbProperties",
         "azure.ai.voicelive.models.MCPApprovalResponseRequestItem": "VoiceLive.MCPApprovalResponseRequestItem",
         "azure.ai.voicelive.models.MCPServer": "VoiceLive.MCPServer",
@@ -71,6 +75,7 @@
         "azure.ai.voicelive.models.ResponseCreateParams": "VoiceLive.ResponseCreateParams",
         "azure.ai.voicelive.models.ResponseFailedDetails": "VoiceLive.ResponseFailedDetails",
         "azure.ai.voicelive.models.ResponseItem": "VoiceLive.ResponseItem",
+        "azure.ai.voicelive.models.ResponseFoundryAgentCallItem": "VoiceLive.ResponseFoundryAgentCallItem",
         "azure.ai.voicelive.models.ResponseFunctionCallItem": "VoiceLive.ResponseFunctionCallItem",
         "azure.ai.voicelive.models.ResponseFunctionCallOutputItem": "VoiceLive.ResponseFunctionCallOutputItem",
         "azure.ai.voicelive.models.ResponseIncompleteDetails": "VoiceLive.ResponseIncompleteDetails",
@@ -112,6 +117,11 @@
         "azure.ai.voicelive.models.ServerEventResponseContentPartDone": "VoiceLive.ServerEventResponseContentPartDone",
         "azure.ai.voicelive.models.ServerEventResponseCreated": "VoiceLive.ServerEventResponseCreated",
         "azure.ai.voicelive.models.ServerEventResponseDone": "VoiceLive.ServerEventResponseDone",
+        "azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallArgumentsDelta": "VoiceLive.ServerEventResponseFoundryAgentCallArgumentsDelta",
+        "azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallArgumentsDone": "VoiceLive.ServerEventResponseFoundryAgentCallArgumentsDone",
+        "azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallCompleted": "VoiceLive.ServerEventResponseFoundryAgentCallCompleted",
+        "azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallFailed": "VoiceLive.ServerEventResponseFoundryAgentCallFailed",
+        "azure.ai.voicelive.models.ServerEventResponseFoundryAgentCallInProgress": "VoiceLive.ServerEventResponseFoundryAgentCallInProgress",
         "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDelta": "VoiceLive.ServerEventResponseFunctionCallArgumentsDelta",
         "azure.ai.voicelive.models.ServerEventResponseFunctionCallArgumentsDone": "VoiceLive.ServerEventResponseFunctionCallArgumentsDone",
         "azure.ai.voicelive.models.ServerEventResponseMcpCallArgumentsDelta": "VoiceLive.ServerEventResponseMcpCallArgumentsDelta",
@@ -148,7 +158,9 @@
         "azure.ai.voicelive.models.PersonalVoiceModels": "VoiceLive.PersonalVoiceModels",
         "azure.ai.voicelive.models.OutputAudioFormat": "VoiceLive.OutputAudioFormat",
         "azure.ai.voicelive.models.ToolType": "VoiceLive.ToolType",
+        "azure.ai.voicelive.models.FoundryAgentContextType": "VoiceLive.FoundryAgentContextType",
         "azure.ai.voicelive.models.MCPApprovalType": "VoiceLive.MCPApprovalType",
+        "azure.ai.voicelive.models.ReasoningEffort": "VoiceLive.ReasoningEffort",
         "azure.ai.voicelive.models.AnimationOutputType": "VoiceLive.AnimationOutputType",
         "azure.ai.voicelive.models.InputAudioFormat": "VoiceLive.InputAudioFormat",
         "azure.ai.voicelive.models.TurnDetectionType": "VoiceLive.TurnDetectionType",
@@ -158,6 +170,8 @@
         "azure.ai.voicelive.models.AvatarOutputProtocol": "VoiceLive.AvatarOutputProtocol",
         "azure.ai.voicelive.models.AudioTimestampType": "VoiceLive.AudioTimestampType",
         "azure.ai.voicelive.models.ToolChoiceLiteral": "VoiceLive.ToolChoiceLiteral",
+        "azure.ai.voicelive.models.FillerResponseConfigType": "VoiceLive.FillerResponseConfigType",
+        "azure.ai.voicelive.models.FillerTrigger": "VoiceLive.FillerTrigger",
         "azure.ai.voicelive.models.ResponseStatus": "VoiceLive.ResponseStatus",
         "azure.ai.voicelive.models.ResponseItemStatus": "VoiceLive.ResponseItemStatus",
         "azure.ai.voicelive.models.RequestImageContentPartDetail": "VoiceLive.RequestImageContentPartDetail",

@@ -12,3 +12,4 @@
     from . import models as _models
 Voice = Union[str, "_models.OpenAIVoiceName", "_models.OpenAIVoice", "_models.AzureVoice"]
 ToolChoice = Union[str, "_models.ToolChoiceLiteral", "_models.ToolChoiceSelection"]
+FillerResponseConfig = Union["_models.BasicFillerResponseConfig", "_models.LlmFillerResponseConfig"]
@@ -37,6 +37,7 @@
 
 TZ_UTC = timezone.utc
 _T = typing.TypeVar("_T")
+_NONE_TYPE = type(None)
 
 
 def _timedelta_as_isostr(td: timedelta) -> str:
@@ -171,6 +172,21 @@ def default(self, o):  # pylint: disable=too-many-return-statements
     r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{4}\s\d{2}:\d{2}:\d{2}\sGMT"
 )
 
+_ARRAY_ENCODE_MAPPING = {
+    "pipeDelimited": "|",
+    "spaceDelimited": " ",
+    "commaDelimited": ",",
+    "newlineDelimited": "\n",
+}
+
+
+def _deserialize_array_encoded(delimit: str, attr):
+    if isinstance(attr, str):
+        if attr == "":
+            return []
+        return attr.split(delimit)
+    return attr
+
 
 def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
     """Deserialize ISO-8601 formatted string into Datetime object.
@@ -202,7 +218,7 @@ def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
     test_utc = date_obj.utctimetuple()
     if test_utc.tm_year > 9999 or test_utc.tm_year < 1:
         raise OverflowError("Hit max or min date")
-    return date_obj
+    return date_obj  # type: ignore[no-any-return]
 
 
 def _deserialize_datetime_rfc7231(attr: typing.Union[str, datetime]) -> datetime:
@@ -256,7 +272,7 @@ def _deserialize_time(attr: typing.Union[str, time]) -> time:
     """
     if isinstance(attr, time):
         return attr
-    return isodate.parse_time(attr)
+    return isodate.parse_time(attr)  # type: ignore[no-any-return]
 
 
 def _deserialize_bytes(attr):
@@ -315,6 +331,8 @@ def _deserialize_int_as_str(attr):
 def get_deserializer(annotation: typing.Any, rf: typing.Optional["_RestField"] = None):
     if annotation is int and rf and rf._format == "str":
         return _deserialize_int_as_str
+    if annotation is str and rf and rf._format in _ARRAY_ENCODE_MAPPING:
+        return functools.partial(_deserialize_array_encoded, _ARRAY_ENCODE_MAPPING[rf._format])
     if rf and rf._format:
         return _DESERIALIZE_MAPPING_WITHFORMAT.get(rf._format)
     return _DESERIALIZE_MAPPING.get(annotation)  # pyright: ignore
@@ -353,9 +371,39 @@ def __contains__(self, key: typing.Any) -> bool:
         return key in self._data
 
     def __getitem__(self, key: str) -> typing.Any:
+        # If this key has been deserialized (for mutable types), we need to handle serialization
+        if hasattr(self, "_attr_to_rest_field"):
+            cache_attr = f"_deserialized_{key}"
+            if hasattr(self, cache_attr):
+                rf = _get_rest_field(getattr(self, "_attr_to_rest_field"), key)
+                if rf:
+                    value = self._data.get(key)
+                    if isinstance(value, (dict, list, set)):
+                        # For mutable types, serialize and return
+                        # But also update _data with serialized form and clear flag
+                        # so mutations via this returned value affect _data
+                        serialized = _serialize(value, rf._format)
+                        # If serialized form is same type (no transformation needed),
+                        # return _data directly so mutations work
+                        if isinstance(serialized, type(value)) and serialized == value:
+                            return self._data.get(key)
+                        # Otherwise return serialized copy and clear flag
+                        try:
+                            object.__delattr__(self, cache_attr)
+                        except AttributeError:
+                            pass
+                        # Store serialized form back
+                        self._data[key] = serialized
+                        return serialized
         return self._data.__getitem__(key)
 
     def __setitem__(self, key: str, value: typing.Any) -> None:
+        # Clear any cached deserialized value when setting through dictionary access
+        cache_attr = f"_deserialized_{key}"
+        try:
+            object.__delattr__(self, cache_attr)
+        except AttributeError:
+            pass
         self._data.__setitem__(key, value)
 
     def __delitem__(self, key: str) -> None:
@@ -483,6 +531,8 @@ def _is_model(obj: typing.Any) -> bool:
 
 def _serialize(o, format: typing.Optional[str] = None):  # pylint: disable=too-many-return-statements
     if isinstance(o, list):
+        if format in _ARRAY_ENCODE_MAPPING and all(isinstance(x, str) for x in o):
+            return _ARRAY_ENCODE_MAPPING[format].join(o)
         return [_serialize(x, format) for x in o]
     if isinstance(o, dict):
         return {k: _serialize(v, format) for k, v in o.items()}
@@ -767,6 +817,17 @@ def _deserialize_sequence(
         return obj
     if isinstance(obj, ET.Element):
         obj = list(obj)
+    try:
+        if (
+            isinstance(obj, str)
+            and isinstance(deserializer, functools.partial)
+            and isinstance(deserializer.args[0], functools.partial)
+            and deserializer.args[0].func == _deserialize_array_encoded  # pylint: disable=comparison-with-callable
+        ):
+            # encoded string may be deserialized to sequence
+            return deserializer(obj)
+    except:  # pylint: disable=bare-except
+        pass
     return type(obj)(_deserialize(deserializer, entry, module) for entry in obj)
 
 
@@ -817,16 +878,16 @@ def _get_deserialize_callable_from_annotation(  # pylint: disable=too-many-retur
 
     # is it optional?
     try:
-        if any(a for a in annotation.__args__ if a == type(None)):  # pyright: ignore
+        if any(a is _NONE_TYPE for a in annotation.__args__):  # pyright: ignore
             if len(annotation.__args__) <= 2:  # pyright: ignore
                 if_obj_deserializer = _get_deserialize_callable_from_annotation(
-                    next(a for a in annotation.__args__ if a != type(None)), module, rf  # pyright: ignore
+                    next(a for a in annotation.__args__ if a is not _NONE_TYPE), module, rf  # pyright: ignore
                 )
 
                 return functools.partial(_deserialize_with_optional, if_obj_deserializer)
             # the type is Optional[Union[...]], we need to remove the None type from the Union
             annotation_copy = copy.copy(annotation)
-            annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a != type(None)]  # pyright: ignore
+            annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a is not _NONE_TYPE]  # pyright: ignore
             return _get_deserialize_callable_from_annotation(annotation_copy, module, rf)
     except AttributeError:
         pass
@@ -998,7 +1059,11 @@ def __init__(
 
     @property
     def _class_type(self) -> typing.Any:
-        return getattr(self._type, "args", [None])[0]
+        result = getattr(self._type, "args", [None])[0]
+        # type may be wrapped by nested functools.partial so we need to check for that
+        if isinstance(result, functools.partial):
+            return getattr(result, "args", [None])[0]
+        return result
 
     @property
     def _rest_name(self) -> str:
@@ -1009,14 +1074,37 @@ def _rest_name(self) -> str:
     def __get__(self, obj: Model, type=None):  # pylint: disable=redefined-builtin
         # by this point, type and rest_name will have a value bc we default
         # them in __new__ of the Model class
-        item = obj.get(self._rest_name)
+        # Use _data.get() directly to avoid triggering __getitem__ which clears the cache
+        item = obj._data.get(self._rest_name)
         if item is None:
             return item
         if self._is_model:
             return item
-        return _deserialize(self._type, _serialize(item, self._format), rf=self)
+
+        # For mutable types, we want mutations to directly affect _data
+        # Check if we've already deserialized this value
+        cache_attr = f"_deserialized_{self._rest_name}"
+        if hasattr(obj, cache_attr):
+            # Return the value from _data directly (it's been deserialized in place)
+            return obj._data.get(self._rest_name)
+
+        deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self)
+
+        # For mutable types, store the deserialized value back in _data
+        # so mutations directly affect _data
+        if isinstance(deserialized, (dict, list, set)):
+            obj._data[self._rest_name] = deserialized
+            object.__setattr__(obj, cache_attr, True)  # Mark as deserialized
+            return deserialized
+
+        return deserialized
 
     def __set__(self, obj: Model, value) -> None:
+        # Clear the cached deserialized object when setting a new value
+        cache_attr = f"_deserialized_{self._rest_name}"
+        if hasattr(obj, cache_attr):
+            object.__delattr__(obj, cache_attr)
+
         if value is None:
             # we want to wipe out entries if users set attr to None
             try:
@@ -1184,7 +1272,7 @@ def _get_wrapped_element(
         _get_element(v, exclude_readonly, meta, wrapped_element)
     else:
         wrapped_element.text = _get_primitive_type_value(v)
-    return wrapped_element
+    return wrapped_element  # type: ignore[no-any-return]
 
 
 def _get_primitive_type_value(v) -> str:
@@ -1197,7 +1285,9 @@ def _get_primitive_type_value(v) -> str:
     return str(v)
 
 
-def _create_xml_element(tag, prefix=None, ns=None):
+def _create_xml_element(
+    tag: typing.Any, prefix: typing.Optional[str] = None, ns: typing.Optional[str] = None
+) -> ET.Element:
     if prefix and ns:
         ET.register_namespace(prefix, ns)
     if ns: