diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md b/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md index 468cc1380b17..d18e83951cf8 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md +++ b/sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md @@ -1,6 +1,6 @@ # Change Log azure-ai-formrecognizer -## 1.0.0b1 (Unreleased) +## 1.0.0b1 (2020-04-23) Version (1.0.0b1) is the first preview of our efforts to create a user-friendly and Pythonic client library for Azure Form Recognizer. This library replaces the package found here: https://pypi.org/project/azure-cognitiveservices-formrecognizer/ diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py index 32e5a29cd038..109f5456d179 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py @@ -79,7 +79,8 @@ def begin_recognize_receipts(self, stream, **kwargs): :param stream: .pdf, .jpg, .png or .tiff type file stream. Currently only supports US sales receipts. :type stream: stream - :keyword bool include_text_content: Include text lines and text content references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword str content_type: Media type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see :class:`~azure.ai.formrecognizer.FormContentType`. @@ -128,7 +129,8 @@ def begin_recognize_receipts_from_url(self, url, **kwargs): :param url: The url of the receipt. Currently only supports US sales receipts. :type url: str - :keyword bool include_text_content: Include text lines and text content references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations if no Retry-After header is present. Defaults to 5 seconds. :return: An instance of an LROPoller. Call `result()` on the poller @@ -245,7 +247,8 @@ def begin_recognize_custom_forms(self, model_id, stream, **kwargs): :param str model_id: Custom model identifier. :param stream: .pdf, .jpg, .png or .tiff type file stream. :type stream: stream - :keyword bool include_text_content: Include text lines and element references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword str content_type: Media type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see :class:`~azure.ai.formrecognizer.FormContentType`. @@ -302,7 +305,8 @@ def begin_recognize_custom_forms_from_url(self, model_id, url, **kwargs): :param str model_id: Custom model identifier. :param url: The url of the document. :type url: str - :keyword bool include_text_content: Include text lines and element references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations if no Retry-After header is present. Defaults to 5 seconds. :return: An instance of an LROPoller. Call `result()` on the poller diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index 5fb81c28b3f3..b1b967b2eb05 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -127,10 +127,12 @@ class FormContent(object): :ivar str text: The text content of the line. :ivar list[~azure.ai.formrecognizer.Point] bounding_box: - The quadrangle bounding box that outlines the text. + A list of 4 points representing the quadrilateral bounding box + that outlines the text. The points are listed in clockwise + order: top-left, top-right, bottom-right, bottom-left. Units are in pixels for images and inches for PDF. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. """ def __init__(self, **kwargs): self.bounding_box = kwargs.get("bounding_box", None) @@ -139,18 +141,21 @@ def __init__(self, **kwargs): class RecognizedForm(object): - """Represents a recognized form. + """Represents a form that has been recognized by a trained model. - :ivar str form_type: The type of form. + :ivar str form_type: + The type of form the model identified the submitted form to be. :ivar fields: - A dictionary of the fields found on the form. + A dictionary of the fields found on the form. The fields dictionary + keys are the `name` of the field. For models trained with labels, + this is the training-time label of the field. For models trained + without labels, a unique name is generated for each field. :vartype fields: dict[str, ~azure.ai.formrecognizer.FormField] :ivar ~azure.ai.formrecognizer.PageRange page_range: The first and last page of the input form. :ivar list[~azure.ai.formrecognizer.FormPage] pages: - Contains page metadata such as page width, length, text angle, unit. - If `include_text_content=True` is passed, contains a list - of extracted text lines for each page in the input document. + A list of pages recognized from the input document. Contains lines, + words, tables and page metadata. """ def __init__(self, **kwargs): self.fields = kwargs.get("fields", None) @@ -160,7 +165,8 @@ def __init__(self, **kwargs): class USReceipt(object): # pylint: disable=too-many-instance-attributes - """Extracted fields and values found on the input US sales receipt. + """Extracted fields found on the US sales receipt. Provides + attributes for accessing common fields present in US sales receipts. :ivar ~azure.ai.formrecognizer.FormField merchant_address: The address of the merchant. @@ -217,7 +223,7 @@ def __init__(self, **kwargs): class FormField(object): - """Represents a recognized field and its value. + """Represents a field recognized in an input form. :ivar ~azure.ai.formrecognizer.FieldText label_data: Contains the text, bounding box, and text content of the field label. @@ -229,9 +235,10 @@ class FormField(object): 'date', 'time', 'phoneNumber', 'number', 'integer', 'object', or 'array'. :vartype value: str, int, float, :class:`~datetime.date`, :class:`~datetime.time`, :class:`~azure.ai.formrecognizer.FormField`, or list[:class:`~azure.ai.formrecognizer.FormField`] - :ivar float confidence: Confidence score. + :ivar float confidence: + Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0]. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. """ def __init__(self, **kwargs): @@ -266,16 +273,20 @@ def _from_generated_unlabeled(cls, field, idx, page, read_result): class FieldText(FormContent): - """Represents the properties of a field or value. + """Represents the text that is part of a form field. This includes + the location of the text in the form and a collection of the + elements that make up the text. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. :ivar str text: The string representation of the field or value. :ivar list[~azure.ai.formrecognizer.Point] bounding_box: - The quadrangle bounding box that outlines the text. + A list of 4 points representing the quadrilateral bounding box + that outlines the text. The points are listed in clockwise + order: top-left, top-right, bottom-right, bottom-left. Units are in pixels for images and inches for PDF. :ivar text_content: - When `include_text_content` is set to true, a list of references to the text + When `include_text_content` is set to true, a list of text elements constituting this field or value is returned. :vartype text_content: list[~azure.ai.formrecognizer.FormWord, ~azure.ai.formrecognizer.FormLine] """ @@ -316,10 +327,11 @@ def _from_generated_unlabeled(cls, field, page, read_result): class FormPage(object): - """Page metadata, text lines, and tables extracted from a page in the input document. + """Represents a page recognized from the input document. Contains lines, + words, tables and page metadata. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. :ivar float text_angle: The general orientation of the text in clockwise direction, measured in degrees between (-180, 180]. @@ -364,16 +376,18 @@ def _from_generated(cls, read_result): class FormLine(FormContent): - """An object representing an extracted text line. + """An object representing an extracted line of text. :ivar str text: The text content of the line. :ivar list[~azure.ai.formrecognizer.Point] bounding_box: - The quadrangle bounding box that outlines the text. + A list of 4 points representing the quadrilateral bounding box + that outlines the text. The points are listed in clockwise + order: top-left, top-right, bottom-right, bottom-left. Units are in pixels for images and inches for PDF. :ivar list[~azure.ai.formrecognizer.FormWord] words: - List of words in the text line. + A list of the words that make up the line. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. """ def __init__(self, **kwargs): @@ -396,15 +410,18 @@ def _from_generated(cls, line, page): class FormWord(FormContent): - """An object representing a word on the page. + """Represents a word recognized from the input document. :ivar str text: The text content of the word. :ivar list[~azure.ai.formrecognizer.Point] bounding_box: - The quadrangle bounding box that outlines the text. + A list of 4 points representing the quadrilateral bounding box + that outlines the text. The points are listed in clockwise + order: top-left, top-right, bottom-right, bottom-left. Units are in pixels for images and inches for PDF. - :ivar float confidence: Confidence value. + :ivar float confidence: + Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0]. :ivar int page_number: - The 1-based page number in the input document. + The 1-based number of the page in which this content is present. """ def __init__(self, **kwargs): @@ -432,7 +449,8 @@ class USReceiptType(object): :ivar str type: The type of the receipt. For example, "Itemized", "CreditCard", "Gas", "Parking", "Gas", "Other". - :ivar float confidence: The confidence score of the receipt type. + :ivar float confidence: + Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0]. """ def __init__(self, **kwargs): @@ -453,11 +471,11 @@ class USReceiptItem(object): :ivar ~azure.ai.formrecognizer.FormField name: The name of the item. :ivar ~azure.ai.formrecognizer.FormField quantity: - The quantity purchased of the item. + The quantity associated with this item. :ivar ~azure.ai.formrecognizer.FormField price: - The individual price of the item. + The price of a single unit of this item. :ivar ~azure.ai.formrecognizer.FormField total_price: - The total price of the item(s). + The total price of this item, taking the quantity into account. """ def __init__(self, **kwargs): @@ -498,7 +516,7 @@ def __init__(self, **kwargs): class FormTableCell(FormContent): - """Information about the extracted cell in a table. + """Represents a cell contained in a table recognized from the input document. :ivar str text: Text content of the cell. :ivar int row_index: Row index of the cell. @@ -506,16 +524,20 @@ class FormTableCell(FormContent): :ivar int row_span: Number of rows spanned by this cell. :ivar int column_span: Number of columns spanned by this cell. :ivar list[~azure.ai.formrecognizer.Point] bounding_box: - The quadrangle bounding box that outlines the cell text. + A list of 4 points representing the quadrilateral bounding box + that outlines the text. The points are listed in clockwise + order: top-left, top-right, bottom-right, bottom-left. Units are in pixels for images and inches for PDF. - :ivar float confidence: Confidence value. + :ivar float confidence: + Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0]. :ivar bool is_header: Whether the current cell is a header cell. :ivar bool is_footer: Whether the current cell is a footer cell. - :ivar int page_number: The 1-based page number in the input document. + :ivar int page_number: + The 1-based number of the page in which this content is present. :ivar text_content: - When `include_text_content` is set to true, a list of references to the text - elements constituting this cell is returned. For calls to recognize content, - this list is always populated. + When `include_text_content` is set to true, a list of text + elements constituting this cell is returned. + For calls to recognize content, this list is always populated. :vartype text_content: list[~azure.ai.formrecognizer.FormWord, ~azure.ai.formrecognizer.FormLine] """ @@ -555,18 +577,18 @@ def _from_generated(cls, cell, page, read_result): class CustomFormModel(object): """Represents a model trained from custom forms. - :ivar str model_id: Model identifier. + :ivar str model_id: The unique identifier of this model. :ivar str status: Status indicating the model's readiness for use, :class:`~azure.ai.formrecognizer.CustomFormModelStatus`. Possible values include: 'creating', 'ready', 'invalid'. :ivar ~datetime.datetime created_on: - Date and time (UTC) when the model was created. + The date and time (UTC) when model training was started. :ivar ~datetime.datetime last_modified: Date and time (UTC) when model training completed. :ivar list[~azure.ai.formrecognizer.CustomFormSubModel] models: - A list of submodels, each of which extract fields from - a different type of form. + A list of submodels that are part of this model, each of + which can recognize and extract fields from a different type of form. :ivar list[~azure.ai.formrecognizer.FormRecognizerError] errors: List of any training errors. :ivar ~azure.ai.formrecognizer.TrainingDocumentInfo training_documents: @@ -601,8 +623,11 @@ class CustomFormSubModel(object): """Represents a submodel that extracts fields from a specific type of form. :ivar float accuracy: The mean of the model's field accuracies. - :ivar fields: Form fields that this submodel will extract when analyzing - this form type. + :ivar fields: A dictionary of the fields that this submodel will recognize + from the input document. The fields dictionary keys are the `name` of + the field. For models trained with labels, this is the training-time + label of the field. For models trained without labels, a unique name + is generated for each field. :vartype fields: dict[str, ~azure.ai.formrecognizer.CustomFormModelField] :ivar str form_type: Type of form this submodel recognizes. """ @@ -633,8 +658,8 @@ class CustomFormModelField(object): """A field that the model will extract from forms it analyzes. :ivar str label: The form fields label on the form. - :ivar str name: Unique name of the field. - :ivar float accuracy: Estimated extraction accuracy for this field. + :ivar str name: Canonical name; uniquely identifies a field within the form. + :ivar float accuracy: The estimated recognition accuracy for this field. """ def __init__(self, **kwargs): self.label = kwargs.get("label", None) @@ -709,12 +734,12 @@ def _from_generated(cls, err): class CustomFormModelInfo(object): """Custom model information. - :ivar str model_id: Model identifier. + :ivar str model_id: The unique identifier of the model. :ivar str status: The status of the model, :class:`~azure.ai.formrecognizer.CustomFormModelStatus`. Possible values include: 'creating', 'ready', 'invalid'. :ivar ~datetime.datetime created_on: - Date and time (UTC) when the model was created. + Date and time (UTC) when model training was started. :ivar ~datetime.datetime last_modified: Date and time (UTC) when model training completed. """ diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py index a5f837dd47de..f8c17f3a6c63 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py @@ -91,7 +91,8 @@ async def recognize_receipts( :param stream: .pdf, .jpg, .png or .tiff type file stream. Currently only supports US sales receipts. :type stream: stream - :keyword bool include_text_content: Include text lines and text content references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword str content_type: Media type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see :class:`~azure.ai.formrecognizer.FormContentType`. @@ -142,7 +143,8 @@ async def recognize_receipts_from_url( :param url: The url of the receipt. Currently only supports US sales receipts. :type url: str - :keyword bool include_text_content: Include text lines and text content references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations if no Retry-After header is present. Defaults to 5 seconds. :return: A list of USReceipt. @@ -257,7 +259,8 @@ async def recognize_custom_forms( :param str model_id: Custom model identifier. :param stream: .pdf, .jpg, .png or .tiff type file stream. :type stream: stream - :keyword bool include_text_content: Include text lines and element references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword str content_type: Media type of the body sent to the API. Content-type is auto-detected, but can be overridden by passing this keyword argument. For options, see :class:`~azure.ai.formrecognizer.FormContentType`. @@ -318,7 +321,8 @@ async def recognize_custom_forms_from_url( :param str model_id: Custom model identifier. :param url: The url of the document. :type url: str - :keyword bool include_text_content: Include text lines and element references in the result. + :keyword bool include_text_content: + Whether or not to include text elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations if no Retry-After header is present. Defaults to 5 seconds. :return: A list of RecognizedForm.