Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions sdk/cognitiveservices/azure-ai-transcription/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Release History

## 1.0.0b3 (2026-02-04)

### Features Added

- Enhanced Mode now automatically sets `enabled=True` when `task`, `target_language`, or `prompt` are specified

### Bugs Fixed

- Fixed Enhanced Mode not being activated when using `EnhancedModeProperties` without explicitly setting `enabled=True`

## 1.0.0b2 (2025-12-19)

### Bugs Fixed
Expand Down
73 changes: 44 additions & 29 deletions sdk/cognitiveservices/azure-ai-transcription/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,18 @@ from azure.ai.transcription.models import TranscriptionContent, TranscriptionOpt

# Get configuration from environment variables
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
api_key = os.environ["AZURE_SPEECH_API_KEY"]

# We recommend using role-based access control (RBAC) for production scenarios
api_key = os.environ.get("AZURE_SPEECH_API_KEY")
if api_key:
credential = AzureKeyCredential(api_key)
else:
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()

# Create the transcription client
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
client = TranscriptionClient(endpoint=endpoint, credential=credential)

# Path to your audio file
import pathlib
Expand Down Expand Up @@ -197,10 +205,18 @@ from azure.ai.transcription.models import TranscriptionOptions

# Get configuration from environment variables
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
api_key = os.environ["AZURE_SPEECH_API_KEY"]

# We recommend using role-based access control (RBAC) for production scenarios
api_key = os.environ.get("AZURE_SPEECH_API_KEY")
if api_key:
credential = AzureKeyCredential(api_key)
else:
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()

# Create the transcription client
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
client = TranscriptionClient(endpoint=endpoint, credential=credential)

# URL to your audio file (must be publicly accessible)
audio_url = "https://example.com/path/to/audio.wav"
Expand Down Expand Up @@ -238,31 +254,29 @@ from azure.ai.transcription.models import (

# Get configuration from environment variables
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
api_key = os.environ["AZURE_SPEECH_API_KEY"]

# We recommend using role-based access control (RBAC) for production scenarios
api_key = os.environ.get("AZURE_SPEECH_API_KEY")
if api_key:
credential = AzureKeyCredential(api_key)
else:
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()

# Create the transcription client
client = TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
client = TranscriptionClient(endpoint=endpoint, credential=credential)

# Path to your audio file
import pathlib

audio_file_path = pathlib.Path(__file__).parent / "assets" / "audio.wav"

# Open and read the audio file
with open(audio_file_path, "rb") as audio_file:
# Create enhanced mode properties
# Enable enhanced mode for advanced processing capabilities
enhanced_mode = EnhancedModeProperties(
task="translation", # Specify the task type (e.g., "translation", "summarization")
target_language="es-ES", # Target language for translation
prompt=[
"Translate the following audio to Spanish",
"Focus on technical terminology",
], # Optional prompts to guide the enhanced mode
)
# Enhanced mode is automatically enabled when task is specified
enhanced_mode = EnhancedModeProperties(task="transcribe")

# Create transcription options with enhanced mode
options = TranscriptionOptions(locales=["en-US"], enhanced_mode=enhanced_mode)
options = TranscriptionOptions(enhanced_mode=enhanced_mode)

# Create the request content
request_content = TranscriptionContent(definition=options, audio=audio_file)
Expand All @@ -271,14 +285,7 @@ with open(audio_file_path, "rb") as audio_file:
result = client.transcribe(request_content)

# Print the transcription result
print("Transcription with enhanced mode:")
print(f"{result.combined_phrases[0].text}")

# Print individual phrases if available
if result.phrases:
print("\nDetailed phrases:")
for phrase in result.phrases:
print(f" [{phrase.offset_milliseconds}ms]: {phrase.text}")
print(result.combined_phrases[0].text)
```

<!-- END SNIPPET -->
Expand All @@ -296,10 +303,18 @@ from azure.ai.transcription.models import TranscriptionContent, TranscriptionOpt

# Get configuration from environment variables
endpoint = os.environ["AZURE_SPEECH_ENDPOINT"]
api_key = os.environ["AZURE_SPEECH_API_KEY"]

# We recommend using role-based access control (RBAC) for production scenarios
api_key = os.environ.get("AZURE_SPEECH_API_KEY")
if api_key:
credential = AzureKeyCredential(api_key)
else:
from azure.identity.aio import DefaultAzureCredential

credential = DefaultAzureCredential()

# Create the transcription client
async with TranscriptionClient(endpoint=endpoint, credential=AzureKeyCredential(api_key)) as client:
async with TranscriptionClient(endpoint=endpoint, credential=credential) as client:
# Path to your audio file
import pathlib

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{
"apiVersion": "2025-10-15",
"service_name": "Cognitive Services",
"msDocService": "cognitive-services"
"apiVersion": "2025-10-15"
}
2 changes: 1 addition & 1 deletion sdk/cognitiveservices/azure-ai-transcription/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "python",
"TagPrefix": "python/cognitiveservices/azure-ai-transcription",
"Tag": "python/cognitiveservices/azure-ai-transcription_5f9f60e291"
"Tag": "python/cognitiveservices/azure-ai-transcription_807296d8e0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class TranscriptionClient(_TranscriptionClientOperationsMixin):
"""TranscriptionClient.

:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example:
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_.
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_).
Required.
:type endpoint: str
:param credential: Credential used to authenticate requests to the service. Is either a key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class TranscriptionClientConfiguration: # pylint: disable=too-many-instance-att
attributes.

:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example:
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_.
`https://westus.api.cognitive.microsoft.com <https://westus.api.cognitive.microsoft.com>`_).
Required.
:type endpoint: str
:param credential: Credential used to authenticate requests to the service. Is either a key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,11 @@ def transcribe(self, body: Union[_models.TranscriptionContent, JSON], **kwargs:
_body = body.as_dict() if isinstance(body, _Model) else body
_file_fields: list[str] = ["audio"]
_data_fields: list[str] = ["definition"]
_files, _data = prepare_multipart_form_data(_body, _file_fields, _data_fields)
_files = prepare_multipart_form_data(_body, _file_fields, _data_fields)

_request = build_transcription_transcribe_request(
api_version=self._config.api_version,
files=_files,
data=_data,
headers=_headers,
params=_params,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@
from typing import Any, Optional
import json
from azure.core.tracing.decorator import distributed_trace
from azure.core.exceptions import map_error, HttpResponseError, ClientAuthenticationError, ResourceNotFoundError, ResourceExistsError, ResourceNotModifiedError
from azure.core.exceptions import (
map_error,
HttpResponseError,
ClientAuthenticationError,
ResourceNotFoundError,
ResourceExistsError,
ResourceNotModifiedError,
)

from .. import models as _models
from .._utils.model_base import _deserialize, SdkJSONEncoder
Expand Down Expand Up @@ -93,7 +100,9 @@ def transcribe_from_url(
}
_request.url = self._client.format_url(_request.url, **path_format_arguments)

pipeline_response = self._client._pipeline.run(_request, stream=False, **kwargs) # pylint: disable=protected-access
pipeline_response = self._client._pipeline.run( # pylint: disable=protected-access
_request, stream=False, **kwargs
)
response = pipeline_response.http_response

if response.status_code not in [200]:
Expand Down
Loading