Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions sdk/cognitiveservices/azure-ai-transcription/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release History

## 1.0.0b3 (2026-02-03)

### Features Added

- Enhanced Mode now automatically sets `enabled=True` when `task`, `target_language`, or `prompt` are specified

### Bugs Fixed

- Fixed Enhanced Mode not being activated when using `EnhancedModeProperties` without explicitly setting `enabled=True`

## 1.0.0b2 (2025-12-19)

### Bugs Fixed
Expand Down
25 changes: 4 additions & 21 deletions sdk/cognitiveservices/azure-ai-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,25 +244,15 @@ api_key = os.environ["AZURE_SPEECH_API_KEY"]
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))

# Path to your audio file
import pathlib

audio_file_path = pathlib.Path(__file__).parent / "assets" / "audio.wav"

# Open and read the audio file
with open(audio_file_path, "rb") as audio_file:
# Create enhanced mode properties
# Enable enhanced mode for advanced processing capabilities
enhanced_mode = EnhancedModeProperties(
task="translation", # Specify the task type (e.g., "translation", "summarization")
target_language="es-ES", # Target language for translation
prompt=[
"Translate the following audio to Spanish",
"Focus on technical terminology",
], # Optional prompts to guide the enhanced mode
)
# Enhanced mode is automatically enabled when task is specified
enhanced_mode = EnhancedModeProperties(task="transcribe")

# Create transcription options with enhanced mode
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)
options = TranscriptionOptions(enhanced_mode=enhanced_mode)

# Create the request content
request_content = TranscriptionContent(definition=options, audio=audio_file)
Expand All @@ -271,14 +261,7 @@ with open(audio_file_path, "rb") as audio_file:
result = client.transcribe(request_content)

# Print the transcription result
print("Transcription with enhanced mode:")
print(f"{result.combined_phrases[0].text}")

# Print individual phrases if available
if result.phrases:
print("\nDetailed phrases:")
for phrase in result.phrases:
print(f" [{phrase.offset_milliseconds}ms]: {phrase.text}")
print(result.combined_phrases[0].text)
```

<!-- END SNIPPET -->
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{
Comment thread
amber-yujueWang marked this conversation as resolved.
"apiVersion": "2025-10-15",
"service_name": "Cognitive Services",
"msDocService": "cognitive-services"
"apiVersion": "2025-10-15"
}
2 changes: 1 addition & 1 deletion sdk/cognitiveservices/azure-ai-transcription/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "python",
"TagPrefix": "python/cognitiveservices/azure-ai-transcription",
"Tag": "python/cognitiveservices/azure-ai-transcription_5f9f60e291"
"Tag": "python/cognitiveservices/azure-ai-transcription_807296d8e0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class TranscriptionClient(_TranscriptionClientOperationsMixin):
"""TranscriptionClient.

:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example:
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_.
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_).
Required.
:type endpoint: str
:param credential: Credential used to authenticate requests to the service. Is either a key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TranscriptionClientConfiguration: # pylint: disable=too-many-instance-att
attributes.

:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example:
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_.
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_).
Required.
:type endpoint: str
:param credential: Credential used to authenticate requests to the service. Is either a key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,11 @@ def transcribe(self, body: Union[_models.TranscriptionContent, JSON], **kwargs:
_body = body.as_dict() if isinstance(body, _Model) else body
_file_fields: list[str] = ["audio"]
_data_fields: list[str] = ["definition"]
_files, _data = prepare_multipart_form_data(_body, _file_fields, _data_fields)
_files = prepare_multipart_form_data(_body, _file_fields, _data_fields)

_request = build_transcription_transcribe_request(
api_version=self._config.api_version,
files=_files,
data=_data,
headers=_headers,
params=_params,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from typing import Any, Optional
import json
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import map_error, HttpResponseError, ClientAuthenticationError, ResourceNotFoundError, ResourceExistsError, ResourceNotModifiedError
from azure.core.exceptions import (
map_error,
HttpResponseError,
ClientAuthenticationError,
ResourceNotFoundError,
ResourceExistsError,
ResourceNotModifiedError,
)

from .. import models as _models
from .._utils.model_base import _deserialize, SdkJSONEncoder
Expand Down Expand Up @@ -93,7 +100,9 @@ def transcribe_from_url(
}
_request.url = self._client.format_url(_request.url, **path_format_arguments)

pipeline_response = self._client._pipeline.run(_request, stream=False, **kwargs) # pylint: disable=protected-access
pipeline_response = self._client._pipeline.run( # pylint: disable=protected-access
_request, stream=False, **kwargs
)
response = pipeline_response.http_response

if response.status_code not in [200]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

TZ_UTC = timezone.utc
_T = typing.TypeVar("_T")
_NONE_TYPE = type(None)


def _timedelta_as_isostr(td: timedelta) -> str:
Expand Down Expand Up @@ -171,6 +172,21 @@ def default(self, o): # pylint: disable=too-many-return-statements
r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s\d{4}\s\d{2}:\d{2}:\d{2}\sGMT"
)

_ARRAY_ENCODE_MAPPING = {
"pipeDelimited": "|",
"spaceDelimited": " ",
"commaDelimited": ",",
"newlineDelimited": "\n",
}


def _deserialize_array_encoded(delimit: str, attr):
if isinstance(attr, str):
if attr == "":
return []
return attr.split(delimit)
return attr


def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
"""Deserialize ISO-8601 formatted string into Datetime object.
Expand Down Expand Up @@ -202,7 +218,7 @@ def _deserialize_datetime(attr: typing.Union[str, datetime]) -> datetime:
test_utc = date_obj.utctimetuple()
if test_utc.tm_year > 9999 or test_utc.tm_year < 1:
raise OverflowError("Hit max or min date")
return date_obj
return date_obj # type: ignore[no-any-return]


def _deserialize_datetime_rfc7231(attr: typing.Union[str, datetime]) -> datetime:
Expand Down Expand Up @@ -256,7 +272,7 @@ def _deserialize_time(attr: typing.Union[str, time]) -> time:
"""
if isinstance(attr, time):
return attr
return isodate.parse_time(attr)
return isodate.parse_time(attr) # type: ignore[no-any-return]


def _deserialize_bytes(attr):
Expand Down Expand Up @@ -315,6 +331,8 @@ def _deserialize_int_as_str(attr):
def get_deserializer(annotation: typing.Any, rf: typing.Optional["_RestField"] = None):
if annotation is int and rf and rf._format == "str":
return _deserialize_int_as_str
if annotation is str and rf and rf._format in _ARRAY_ENCODE_MAPPING:
return functools.partial(_deserialize_array_encoded, _ARRAY_ENCODE_MAPPING[rf._format])
if rf and rf._format:
return _DESERIALIZE_MAPPING_WITHFORMAT.get(rf._format)
return _DESERIALIZE_MAPPING.get(annotation) # pyright: ignore
Expand Down Expand Up @@ -353,9 +371,39 @@ def __contains__(self, key: typing.Any) -> bool:
return key in self._data

def __getitem__(self, key: str) -> typing.Any:
# If this key has been deserialized (for mutable types), we need to handle serialization
if hasattr(self, "_attr_to_rest_field"):
cache_attr = f"_deserialized_{key}"
if hasattr(self, cache_attr):
rf = _get_rest_field(getattr(self, "_attr_to_rest_field"), key)
if rf:
value = self._data.get(key)
if isinstance(value, (dict, list, set)):
# For mutable types, serialize and return
# But also update _data with serialized form and clear flag
# so mutations via this returned value affect _data
serialized = _serialize(value, rf._format)
# If serialized form is same type (no transformation needed),
# return _data directly so mutations work
if isinstance(serialized, type(value)) and serialized == value:
return self._data.get(key)
# Otherwise return serialized copy and clear flag
try:
object.__delattr__(self, cache_attr)
except AttributeError:
pass
# Store serialized form back
self._data[key] = serialized
return serialized
return self._data.__getitem__(key)

def __setitem__(self, key: str, value: typing.Any) -> None:
# Clear any cached deserialized value when setting through dictionary access
cache_attr = f"_deserialized_{key}"
try:
object.__delattr__(self, cache_attr)
except AttributeError:
pass
self._data.__setitem__(key, value)

def __delitem__(self, key: str) -> None:
Expand Down Expand Up @@ -483,6 +531,8 @@ def _is_model(obj: typing.Any) -> bool:

def _serialize(o, format: typing.Optional[str] = None): # pylint: disable=too-many-return-statements
if isinstance(o, list):
if format in _ARRAY_ENCODE_MAPPING and all(isinstance(x, str) for x in o):
return _ARRAY_ENCODE_MAPPING[format].join(o)
return [_serialize(x, format) for x in o]
if isinstance(o, dict):
return {k: _serialize(v, format) for k, v in o.items()}
Expand Down Expand Up @@ -758,6 +808,14 @@ def _deserialize_multiple_sequence(
return type(obj)(_deserialize(deserializer, entry, module) for entry, deserializer in zip(obj, entry_deserializers))


def _is_array_encoded_deserializer(deserializer: functools.partial) -> bool:
return (
isinstance(deserializer, functools.partial)
and isinstance(deserializer.args[0], functools.partial)
and deserializer.args[0].func == _deserialize_array_encoded # pylint: disable=comparison-with-callable
)


def _deserialize_sequence(
deserializer: typing.Optional[typing.Callable],
module: typing.Optional[str],
Expand All @@ -767,6 +825,19 @@ def _deserialize_sequence(
return obj
if isinstance(obj, ET.Element):
obj = list(obj)

# encoded string may be deserialized to sequence
if isinstance(obj, str) and isinstance(deserializer, functools.partial):
# for list[str]
if _is_array_encoded_deserializer(deserializer):
return deserializer(obj)

# for list[Union[...]]
if isinstance(deserializer.args[0], list):
for sub_deserializer in deserializer.args[0]:
if _is_array_encoded_deserializer(sub_deserializer):
return sub_deserializer(obj)

return type(obj)(_deserialize(deserializer, entry, module) for entry in obj)


Expand Down Expand Up @@ -817,16 +888,16 @@ def _get_deserialize_callable_from_annotation( # pylint: disable=too-many-retur

# is it optional?
try:
if any(a for a in annotation.__args__ if a == type(None)): # pyright: ignore
if any(a is _NONE_TYPE for a in annotation.__args__): # pyright: ignore
if len(annotation.__args__) <= 2: # pyright: ignore
if_obj_deserializer = _get_deserialize_callable_from_annotation(
next(a for a in annotation.__args__ if a != type(None)), module, rf # pyright: ignore
next(a for a in annotation.__args__ if a is not _NONE_TYPE), module, rf # pyright: ignore
)

return functools.partial(_deserialize_with_optional, if_obj_deserializer)
# the type is Optional[Union[...]], we need to remove the None type from the Union
annotation_copy = copy.copy(annotation)
annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a != type(None)] # pyright: ignore
annotation_copy.__args__ = [a for a in annotation_copy.__args__ if a is not _NONE_TYPE] # pyright: ignore
return _get_deserialize_callable_from_annotation(annotation_copy, module, rf)
except AttributeError:
pass
Expand Down Expand Up @@ -952,7 +1023,7 @@ def _failsafe_deserialize(
) -> typing.Any:
try:
return _deserialize(deserializer, response.json(), module, rf, format)
except DeserializationError:
except Exception: # pylint: disable=broad-except
_LOGGER.warning(
"Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
)
Expand All @@ -965,7 +1036,7 @@ def _failsafe_deserialize_xml(
) -> typing.Any:
try:
return _deserialize_xml(deserializer, response.text())
except DeserializationError:
except Exception: # pylint: disable=broad-except
_LOGGER.warning(
"Ran into a deserialization error. Ignoring since this is failsafe deserialization", exc_info=True
)
Expand Down Expand Up @@ -998,7 +1069,11 @@ def __init__(

@property
def _class_type(self) -> typing.Any:
return getattr(self._type, "args", [None])[0]
result = getattr(self._type, "args", [None])[0]
# type may be wrapped by nested functools.partial so we need to check for that
if isinstance(result, functools.partial):
return getattr(result, "args", [None])[0]
return result

@property
def _rest_name(self) -> str:
Expand All @@ -1009,14 +1084,37 @@ def _rest_name(self) -> str:
def __get__(self, obj: Model, type=None): # pylint: disable=redefined-builtin
# by this point, type and rest_name will have a value bc we default
# them in __new__ of the Model class
item = obj.get(self._rest_name)
# Use _data.get() directly to avoid triggering __getitem__ which clears the cache
item = obj._data.get(self._rest_name)
if item is None:
return item
if self._is_model:
return item
return _deserialize(self._type, _serialize(item, self._format), rf=self)

# For mutable types, we want mutations to directly affect _data
# Check if we've already deserialized this value
cache_attr = f"_deserialized_{self._rest_name}"
if hasattr(obj, cache_attr):
# Return the value from _data directly (it's been deserialized in place)
return obj._data.get(self._rest_name)

deserialized = _deserialize(self._type, _serialize(item, self._format), rf=self)

# For mutable types, store the deserialized value back in _data
# so mutations directly affect _data
if isinstance(deserialized, (dict, list, set)):
obj._data[self._rest_name] = deserialized
object.__setattr__(obj, cache_attr, True) # Mark as deserialized
return deserialized

return deserialized

def __set__(self, obj: Model, value) -> None:
# Clear the cached deserialized object when setting a new value
cache_attr = f"_deserialized_{self._rest_name}"
if hasattr(obj, cache_attr):
object.__delattr__(obj, cache_attr)

if value is None:
# we want to wipe out entries if users set attr to None
try:
Expand Down Expand Up @@ -1184,7 +1282,7 @@ def _get_wrapped_element(
_get_element(v, exclude_readonly, meta, wrapped_element)
else:
wrapped_element.text = _get_primitive_type_value(v)
return wrapped_element
return wrapped_element # type: ignore[no-any-return]


def _get_primitive_type_value(v) -> str:
Expand All @@ -1197,7 +1295,9 @@ def _get_primitive_type_value(v) -> str:
return str(v)


def _create_xml_element(tag, prefix=None, ns=None):
def _create_xml_element(
tag: typing.Any, prefix: typing.Optional[str] = None, ns: typing.Optional[str] = None
) -> ET.Element:
if prefix and ns:
ET.register_namespace(prefix, ns)
if ns:
Expand Down
Loading