Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
# Release History

## 3.2.0b3 (Unreleased)
## 3.2.0b3 (2022-02-10)

### Features Added
- Added new `CurrencyValue` model to represent the amount and currency symbol values found in documents.
- Added `DocumentBuildMode` enum with values `template` and `neural`. These enum values can be passed in for the `build_mode` parameter in `begin_build_model()`.
- Added `api_version` and `tags` properties on `ModelOperation`, `ModelOperationInfo`, `DocumentModel`, `DocumentModelInfo`.
- Added `build_mode` property on `DocTypeInfo`.
- Added a `tags` keyword argument to `begin_build_model()`, `begin_create_composed_model()`, and `get_copy_authorization()`.
- Added `languages` property on `AnalyzeResult`.
- Added model `DocumentLanguage` that includes information about the detected languages found in a document.
- Added `sample_analyze_read.py` and `sample_analyze_read_async.py` under the `v3.2-beta` samples directory. These samples use the new `prebuilt-read` model added by the service.
- Added `sample_analyze_tax_us_w2.py` and `sample_analyze_tax_us_w2_async.py` under the `v3.2-beta` samples directory. These samples use the new `prebuilt-tax.us.w2` model added by the service.

### Breaking Changes

### Bugs Fixed
- Default the `percent_completed` property to 0 when not returned with model operation information.
- Added new required parameter `build_mode` to `begin_build_model()`.
- Some models that previously returned float for currency related fields may now return a `CurrencyValue`. TIP: Use `get_model()` to see updated prebuilt model schemas.

### Other Changes
- Python 2.7 is no longer supported in this release. Please use Python 3.6 or later.
- Bumped `azure-core` minimum dependency version from `1.13.0` to `1.20.1`.
- Updated samples that call `begin_build_model()` to send the `build_mode` parameter.

## 3.2.0b2 (2021-11-09)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@
AnalyzeResult,
AnalyzedDocument,
BoundingRegion,
CurrencyValue,
DocumentBuildMode,
DocumentContentElement,
DocumentEntity,
DocumentField,
DocumentKeyValuePair,
DocumentKeyValueElement,
DocumentLanguage,
DocumentLine,
DocumentPage,
DocumentSelectionMark,
Expand Down Expand Up @@ -100,11 +103,14 @@
"AnalyzeResult",
"AnalyzedDocument",
"BoundingRegion",
"CurrencyValue",
"DocumentBuildMode",
"DocumentContentElement",
"DocumentEntity",
"DocumentField",
"DocumentKeyValueElement",
"DocumentKeyValuePair",
"DocumentLanguage",
"DocumentLine",
"DocumentPage",
"DocumentSelectionMark",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class DocumentAnalysisApiVersion(str, Enum):
"""Form Recognizer API versions supported by DocumentAnalysisClient and DocumentModelAdministrationClient."""

#: This is the default version
V2021_09_30_PREVIEW = "2021-09-30-preview"
V2022_01_30_PREVIEW = "2022-01-30-preview"


class FormRecognizerApiVersion(str, Enum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class DocumentAnalysisClient(FormRecognizerClientBase):
def __init__(self, endpoint, credential, **kwargs):
# type: (str, Union[AzureKeyCredential, TokenCredential], Any) -> None
api_version = kwargs.pop(
"api_version", DocumentAnalysisApiVersion.V2021_09_30_PREVIEW
"api_version", DocumentAnalysisApiVersion.V2022_01_30_PREVIEW
)
super(DocumentAnalysisClient, self).__init__(
endpoint=endpoint,
Expand Down Expand Up @@ -125,7 +125,7 @@ def begin_analyze_document(self, model, document, **kwargs):
cls = kwargs.pop("cls", self._analyze_document_callback)
continuation_token = kwargs.pop("continuation_token", None)

return self._client.begin_analyze_document(
return self._client.begin_analyze_document( # type: ignore
model_id=model,
analyze_request=document,
content_type="application/octet-stream",
Expand Down Expand Up @@ -174,7 +174,7 @@ def begin_analyze_document_from_url(self, model, document_url, **kwargs):
cls = kwargs.pop("cls", self._analyze_document_callback)
continuation_token = kwargs.pop("continuation_token", None)

return self._client.begin_analyze_document(
return self._client.begin_analyze_document( # type: ignore
model_id=model,
analyze_request={"url_source": document_url},
string_index_type="unicodeCodePoint",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ._form_base_client import FormRecognizerClientBase
from ._document_analysis_client import DocumentAnalysisClient
from ._models import (
DocumentBuildMode,
DocumentModel,
DocumentModelInfo,
ModelOperation,
Expand Down Expand Up @@ -85,7 +86,7 @@ class DocumentModelAdministrationClient(FormRecognizerClientBase):
def __init__(self, endpoint, credential, **kwargs):
# type: (str, Union[AzureKeyCredential, TokenCredential], Any) -> None
api_version = kwargs.pop(
"api_version", DocumentAnalysisApiVersion.V2021_09_30_PREVIEW
"api_version", DocumentAnalysisApiVersion.V2022_01_30_PREVIEW
)
super(DocumentModelAdministrationClient, self).__init__(
endpoint=endpoint,
Expand All @@ -96,8 +97,8 @@ def __init__(self, endpoint, credential, **kwargs):
)

@distributed_trace
def begin_build_model(self, source, **kwargs):
# type: (str, Any) -> DocumentModelAdministrationLROPoller[DocumentModel]
def begin_build_model(self, source, build_mode, **kwargs):
# type: (str, Union[str, DocumentBuildMode], Any) -> DocumentModelAdministrationLROPoller[DocumentModel]
"""Build a custom model.

The request must include a `source` parameter that is an
Expand All @@ -108,17 +109,24 @@ def begin_build_model(self, source, **kwargs):
:param str source: An Azure Storage blob container's SAS URI. A container URI (without SAS)
can be used if the container is public. For more information on setting up a training data set, see:
https://aka.ms/azsdk/formrecognizer/buildtrainingset
:param build_mode: The custom model build mode. Possible values include: "template", "neural".
:type build_mode: str or :class:`~azure.ai.formrecognizer.DocumentBuildMode`
:keyword str model_id: A unique ID for your model. If not specified, a model ID will be created for you.
:keyword str description: An optional description to add to the model.
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path.
For example, when using an Azure storage blob URI, use the prefix to restrict sub folders.
`prefix` should end in '/' to avoid cases where filenames share the same prefix.
:keyword tags: List of user defined key-value tag attributes associated with the model.
:paramtype tags: dict[str, str]
:keyword str continuation_token: A continuation token to restart a poller from a saved state.
:return: An instance of an DocumentModelAdministrationLROPoller. Call `result()` on the poller
object to return a :class:`~azure.ai.formrecognizer.DocumentModel`.
:rtype: ~azure.ai.formrecognizer.DocumentModelAdministrationLROPoller[DocumentModel]
:raises ~azure.core.exceptions.HttpResponseError:

.. versionadded:: v2022-01-30-preview
The *tags* keyword argument

.. admonition:: Example:

.. literalinclude:: ../samples/v3.2-beta/sample_build_model.py
Expand All @@ -140,6 +148,7 @@ def callback(raw_response, _, headers): # pylint: disable=unused-argument

description = kwargs.pop("description", None)
model_id = kwargs.pop("model_id", None)
tags = kwargs.pop("tags", None)
cls = kwargs.pop("cls", callback)
continuation_token = kwargs.pop("continuation_token", None)
polling_interval = kwargs.pop(
Expand All @@ -152,7 +161,9 @@ def callback(raw_response, _, headers): # pylint: disable=unused-argument
return self._client.begin_build_document_model( # type: ignore
build_request=self._generated_models.BuildDocumentModelRequest(
model_id=model_id,
build_mode=build_mode,
description=description,
tags=tags,
azure_blob_source=self._generated_models.AzureBlobContentSource(
container_url=source,
prefix=kwargs.pop("prefix", None),
Expand Down Expand Up @@ -181,12 +192,17 @@ def begin_create_composed_model(self, model_ids, **kwargs):
:keyword str model_id: A unique ID for your composed model.
If not specified, a model ID will be created for you.
:keyword str description: An optional description to add to the model.
:keyword tags: List of user defined key-value tag attributes associated with the model.
:paramtype tags: dict[str, str]
:keyword str continuation_token: A continuation token to restart a poller from a saved state.
:return: An instance of an DocumentModelAdministrationLROPoller. Call `result()` on the poller
object to return a :class:`~azure.ai.formrecognizer.DocumentModel`.
:rtype: ~azure.ai.formrecognizer.DocumentModelAdministrationLROPoller[DocumentModel]
:raises ~azure.core.exceptions.HttpResponseError:

.. versionadded:: v2022-01-30-preview
The *tags* keyword argument

.. admonition:: Example:

.. literalinclude:: ../samples/v3.2-beta/sample_create_composed_model.py
Expand All @@ -210,6 +226,7 @@ def _compose_callback(

model_id = kwargs.pop("model_id", None)
description = kwargs.pop("description", None)
tags = kwargs.pop("tags", None)
continuation_token = kwargs.pop("continuation_token", None)
polling_interval = kwargs.pop(
"polling_interval", self._client._config.polling_interval
Expand All @@ -218,10 +235,11 @@ def _compose_callback(
if model_id is None:
model_id = str(uuid.uuid4())

return self._client.begin_compose_document_model(
return self._client.begin_compose_document_model( # type: ignore
compose_request=self._generated_models.ComposeDocumentModelRequest(
model_id=model_id,
description=description,
tags=tags,
component_models=[
self._generated_models.ComponentModelInfo(model_id=model_id)
for model_id in model_ids
Expand Down Expand Up @@ -250,20 +268,26 @@ def get_copy_authorization(self, **kwargs):
:keyword str model_id: A unique ID for your copied model.
If not specified, a model ID will be created for you.
:keyword str description: An optional description to add to the model.
:keyword tags: List of user defined key-value tag attributes associated with the model.
:paramtype tags: dict[str, str]
:return: A dictionary with values necessary for the copy authorization.
:rtype: Dict[str, str]
:raises ~azure.core.exceptions.HttpResponseError:

.. versionadded:: v2022-01-30-preview
The *tags* keyword argument
"""

model_id = kwargs.pop("model_id", None)
description = kwargs.pop("description", None)
tags = kwargs.pop("tags", None)

if model_id is None:
model_id = str(uuid.uuid4())

response = self._client.authorize_copy_document_model(
authorize_copy_request=self._generated_models.AuthorizeCopyRequest(
model_id=model_id, description=description
model_id=model_id, description=description, tags=tags
),
**kwargs
)
Expand Down Expand Up @@ -322,7 +346,7 @@ def _copy_callback(raw_response, _, headers): # pylint: disable=unused-argument
)
continuation_token = kwargs.pop("continuation_token", None)

return self._client.begin_copy_document_model_to(
return self._client.begin_copy_document_model_to( # type: ignore
model_id=model_id,
copy_to_request=self._generated_models.CopyAuthorization(
target_resource_id=target["targetResourceId"],
Expand Down Expand Up @@ -389,7 +413,7 @@ def list_models(self, **kwargs):
:caption: List all models that were built successfully under the Form Recognizer resource.
"""

return self._client.get_models(
return self._client.get_models( # type: ignore
cls=kwargs.pop(
"cls",
lambda objs: [DocumentModelInfo._from_generated(x) for x in objs],
Expand Down Expand Up @@ -468,7 +492,7 @@ def list_operations(self, **kwargs):
:caption: List all document model operations in the past 24 hours.
"""

return self._client.get_operations(
return self._client.get_operations( # type: ignore
cls=kwargs.pop(
"cls",
lambda objs: [ModelOperationInfo._from_generated(x) for x in objs],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def callback_v2_1(raw_response, _, headers): # pylint: disable=unused-argument
deserialization_callback=deserialization_callback,
)

response = self._client.train_custom_model_async(
response = self._client.train_custom_model_async( # type: ignore
train_request=self._generated_models.TrainRequest(
source=training_files_url,
use_label_file=use_training_labels,
Expand Down Expand Up @@ -486,7 +486,7 @@ def _compose_callback(
)
continuation_token = kwargs.pop("continuation_token", None)
try:
return self._client.begin_compose_custom_models_async(
return self._client.begin_compose_custom_models_async( # type: ignore
{"model_ids": model_ids, "model_name": model_name},
cls=kwargs.pop("cls", _compose_callback),
polling=LROBasePolling(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ class FormRecognizerClientConfiguration(Configuration):

:param credential: Credential needed for the client to connect to Azure.
:type credential: ~azure.core.credentials.TokenCredential
:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for
example: https://westus2.api.cognitive.microsoft.com).
:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example: https://westus2.api.cognitive.microsoft.com).
:type endpoint: str
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from typing import Any, Optional

from azure.core.credentials import TokenCredential
from azure.core.pipeline.transport import HttpRequest, HttpResponse

class _SDKClient(object):
def __init__(self, *args, **kwargs):
Expand All @@ -34,7 +33,7 @@ def __init__(self, *args, **kwargs):
pass

class FormRecognizerClient(FormRecognizerClientOperationsMixin, MultiApiClientMixin, _SDKClient):
"""Extracts content, layout, and structured data from documents.
"""Extracts information from forms and images into structured data.

This ready contains multiple API versions, to help you deal with all of the Azure clouds
(Azure Stack, Azure Government, Azure China, etc.).
Expand All @@ -46,8 +45,7 @@ class FormRecognizerClient(FormRecognizerClientOperationsMixin, MultiApiClientMi

:param credential: Credential needed for the client to connect to Azure.
:type credential: ~azure.core.credentials.TokenCredential
:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for
example: https://westus2.api.cognitive.microsoft.com).
:param endpoint: Supported Cognitive Services endpoints (protocol and hostname, for example: https://westus2.api.cognitive.microsoft.com).
:type endpoint: str
:param api_version: API version to use if no profile is provided, or if missing in profile.
:type api_version: str
Expand All @@ -56,11 +54,24 @@ class FormRecognizerClient(FormRecognizerClientOperationsMixin, MultiApiClientMi
:keyword int polling_interval: Default waiting time between two polls for LRO operations if no Retry-After header is present.
"""

DEFAULT_API_VERSION = '2021-09-30-preview'
DEFAULT_API_VERSION = '2.1'
_PROFILE_TAG = "azure.ai.formrecognizer.FormRecognizerClient"
LATEST_PROFILE = ProfileDefinition({
_PROFILE_TAG: {
None: DEFAULT_API_VERSION,
'authorize_copy_document_model': '2022-01-30-preview',
'begin_analyze_document': '2022-01-30-preview',
'begin_build_document_model': '2022-01-30-preview',
'begin_compose_document_model': '2022-01-30-preview',
'begin_copy_document_model_to': '2022-01-30-preview',
'delete_model': '2022-01-30-preview',
'get_analyze_document_result': '2022-01-30-preview',
'get_info': '2022-01-30-preview',
'get_model': '2022-01-30-preview',
'get_models': '2022-01-30-preview',
'get_operation': '2022-01-30-preview',
'get_operations': '2022-01-30-preview',
'train_custom_model_async': '2.0',
}},
_PROFILE_TAG + " latest"
)
Expand All @@ -73,7 +84,7 @@ def __init__(
profile=KnownProfiles.default, # type: KnownProfiles
**kwargs # type: Any
):
if api_version == '2021-09-30-preview':
if api_version == '2022-01-30-preview':
base_url = '{endpoint}/formrecognizer'
elif api_version == '2.0':
base_url = '{endpoint}/formrecognizer/v2.0'
Expand All @@ -96,12 +107,12 @@ def _models_dict(cls, api_version):
def models(cls, api_version=DEFAULT_API_VERSION):
"""Module depends on the API version:

* 2021-09-30-preview: :mod:`v2021_09_30_preview.models<azure.ai.formrecognizer.v2021_09_30_preview.models>`
* 2022-01-30-preview: :mod:`v2022_01_30_preview.models<azure.ai.formrecognizer.v2022_01_30_preview.models>`
* 2.0: :mod:`v2_0.models<azure.ai.formrecognizer.v2_0.models>`
* 2.1: :mod:`v2_1.models<azure.ai.formrecognizer.v2_1.models>`
"""
if api_version == '2021-09-30-preview':
from .v2021_09_30_preview import models
if api_version == '2022-01-30-preview':
from .v2022_01_30_preview import models
return models
elif api_version == '2.0':
from .v2_0 import models
Expand Down
Loading