diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index c9a06a3800d4..b1eab0c1e297 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -144,6 +144,9 @@ class Point(namedtuple("Point", "x y")): def __new__(cls, x, y): return super(Point, cls).__new__(cls, x, y) + def to_dict(self): + return {"x": self.x, "y": self.y} + class FormPageRange(namedtuple("FormPageRange", "first_page_number last_page_number")): """The 1-based page range of the form. @@ -159,6 +162,12 @@ def __new__(cls, first_page_number, last_page_number): cls, first_page_number, last_page_number ) + def to_dict(self): + return { + "first_page_number": self.first_page_number, + "last_page_number": self.last_page_number, + } + class FormElement(object): """Base type which includes properties for a form element. @@ -183,6 +192,14 @@ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.kind = kwargs.get("kind", None) + def to_dict(self): + return { + "text": self.text, + "bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [], + "page_number": self.page_number, + "kind": self.kind, + } + class RecognizedForm(object): """Represents a form that has been recognized by a trained or prebuilt model. @@ -231,6 +248,16 @@ def __repr__(self): )[:1024] ) + def to_dict(self): + return { + "fields": {k: v.to_dict() for k, v in self.fields.items()} if self.fields else {}, + "form_type": self.form_type, + "pages": [v.to_dict() for v in self.pages] if self.pages else [], + "model_id": self.model_id, + "form_type_confidence": self.form_type_confidence, + "page_range": self.page_range.to_dict() if self.page_range else None + } + class FormField(object): """Represents a field recognized in an input form. @@ -302,6 +329,20 @@ def __repr__(self): :1024 ] + def to_dict(self): + value = self.value + if isinstance(self.value, dict): + value = {k: v.to_dict() for k, v in self.value.items()} + elif isinstance(self.value, list): + value = [v.to_dict() for v in self.value] + return { + "value_type": self.value_type, + "name": self.name, + "value": value, + "confidence": self.confidence, + "label_data": self.label_data.to_dict() if self.label_data else None, + "value_data": self.value_data.to_dict() if self.value_data else None, + } class FieldData(object): """Contains the data for the form field. This includes the text, @@ -371,6 +412,14 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "text": self.text, + "bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [], + "page_number": self.page_number, + "field_elements": [f.to_dict() for f in self.field_elements] if self.field_elements else [] + } + class FormPage(object): """Represents a page recognized from the input document. Contains lines, @@ -430,6 +479,17 @@ def __repr__(self): )[:1024] ) + def to_dict(self): + return { + "page_number": self.page_number, + "text_angle": self.text_angle, + "width": self.width, + "height": self.height, + "unit": self.unit, + "tables": [table.to_dict() for table in self.tables] if self.tables else [], + "lines": [line.to_dict() for line in self.lines] if self.lines else [], + "selection_marks": [mark.to_dict() for mark in self.selection_marks] if self.selection_marks else [] + } class FormLine(FormElement): """An object representing an extracted line of text. @@ -486,6 +546,16 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "text": self.text, + "bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [], + "words": [f.to_dict() for f in self.words] if self.words else [], + "page_number": self.page_number, + "kind": self.kind, + "appearance": self.appearance.to_dict() if self.appearance else None + } + class FormWord(FormElement): """Represents a word recognized from the input document. @@ -523,6 +593,15 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "text": self.text, + "bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [], + "confidence": self.confidence, + "page_number": self.page_number, + "kind": self.kind, + } + class FormSelectionMark(FormElement): """Information about the extracted selection mark. @@ -557,12 +636,22 @@ def _from_generated(cls, mark, page): ) def __repr__(self): - return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={})".format( - self.text, self.bounding_box, self.confidence, self.page_number, self.state + return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={}, kind={})".format( + self.text, self.bounding_box, self.confidence, self.page_number, self.state, self.kind )[ :1024 ] + def to_dict(self): + return { + "text": self.text, + "bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [], + "confidence": self.confidence, + "state": self.state, + "page_number": self.page_number, + "kind": self.kind, + } + class FormTable(object): """Information about the extracted table contained on a page. @@ -603,6 +692,15 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "page_number": self.page_number, + "row_count": self.row_count, + "column_count": self.column_count, + "cells": [cell.to_dict() for cell in self.cells], + "bounding_box": [box.to_dict() for box in self.bounding_box] if self.bounding_box else [] + } + class FormTableCell(object): # pylint:disable=too-many-instance-attributes """Represents a cell contained in a table recognized from the input document. @@ -688,6 +786,22 @@ def __repr__(self): ] ) + def to_dict(self): + return { + "text": self.text, + "row_index": self.row_index, + "column_index": self.column_index, + "row_span": self.row_span, + "column_span": self.column_span, + "confidence": self.confidence, + "is_header": self.is_header, + "is_footer": self.is_footer, + "page_number": self.page_number, + "bounding_box": [box.to_dict() for box in self.bounding_box] if self.bounding_box else [], + "field_elements": [element.to_dict() for element in self.field_elements] + if self.field_elements else None + } + class CustomFormModel(object): """Represents a model trained from custom forms. @@ -790,6 +904,18 @@ def __repr__(self): ] ) + def to_dict(self): + return { + "model_id": self.model_id, + "status": self.status, + "training_started_on": self.training_started_on, + "training_completed_on": self.training_completed_on, + "submodels": [submodel.to_dict() for submodel in self.submodels] if self.submodels else [], + "errors": [err.to_dict() for err in self.errors] if self.errors else [], + "training_documents": [doc.to_dict() for doc in self.training_documents] if self.training_documents else [], + "model_name": self.model_name, + "properties": self.properties.to_dict() if self.properties else None + } class CustomFormSubmodel(object): """Represents a submodel that extracts fields from a specific type of form. @@ -884,6 +1010,14 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "model_id": self.model_id, + "accuracy": self.accuracy, + "fields": {k: v.to_dict() for k, v in self.fields.items()} if self.fields else {}, + "form_type": self.form_type + } + class CustomFormModelField(object): """A field that the model will extract from forms it analyzes. @@ -917,6 +1051,13 @@ def __repr__(self): self.label, self.name, self.accuracy )[:1024] + def to_dict(self): + return { + "label": self.label, + "accuracy": self.accuracy, + "name": self.name + } + class TrainingDocumentInfo(object): """Report for an individual document used for training @@ -988,6 +1129,15 @@ def __repr__(self): :1024 ] + def to_dict(self): + return { + "name": self.name, + "status": self.status, + "page_count": self.page_count, + "errors": [err.to_dict() for err in self.errors], + "model_id": self.model_id + } + class FormRecognizerError(object): """Represents an error that occurred while training. @@ -1013,6 +1163,12 @@ def __repr__(self): self.code, self.message )[:1024] + def to_dict(self): + return { + "code": self.code, + "message": self.message + } + class CustomFormModelInfo(object): """Custom model information. @@ -1078,6 +1234,16 @@ def __repr__(self): )[:1024] ) + def to_dict(self): + return { + "model_id": self.model_id, + "status": self.status, + "training_started_on": self.training_started_on, + "training_completed_on": self.training_completed_on, + "model_name": self.model_name, + "properties": self.properties.to_dict() if self.properties else None + } + class AccountProperties(object): """Summary of all the custom models on the account. @@ -1102,6 +1268,12 @@ def __repr__(self): self.custom_model_count, self.custom_model_limit )[:1024] + def to_dict(self): + return { + "custom_model_count": self.custom_model_count, + "custom_model_limit": self.custom_model_limit + } + class CustomFormModelProperties(object): """Optional model properties. @@ -1123,6 +1295,11 @@ def __repr__(self): self.is_composed_model ) + def to_dict(self): + return { + "is_composed_model": self.is_composed_model + } + class TextAppearance(object): """An object representing the appearance of the text line. @@ -1147,6 +1324,11 @@ def _from_generated(cls, appearance): def __repr__(self): return "TextAppearance(style={})".format(repr(self.style)) + def to_dict(self): + return { + "style": self.style.to_dict() if self.style else None + } + class TextStyle(object): """An object representing the style of the text line. @@ -1164,3 +1346,6 @@ def __init__(self, **kwargs): def __repr__(self): return "TextStyle(name={}, confidence={})".format(self.name, self.confidence) + + def to_dict(self): + return {"name": self.name, "confidence": self.confidence} diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model.py index 0972c6558b61..d2c21c0fc25d 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model.py @@ -33,6 +33,10 @@ def test_compose_model_with_model_name(self, client, formrecognizer_storage_cont self.assertEqual(composed_model.model_name, "my composed model") self.assertComposedModelHasValues(composed_model, model_1, model_2) + composed_model_dict = composed_model.to_dict() + self.assertEqual(composed_model_dict.get("model_name"), "my composed model") + self.assertIsNotNone(composed_model_dict.get("model_id")) + @FormRecognizerPreparer() @GlobalClientPreparer() def test_compose_model_no_model_name(self, client, formrecognizer_storage_container_sas_url): diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model_async.py index 053bc0e66890..9cd1b66cba14 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_compose_model_async.py @@ -33,6 +33,10 @@ async def test_compose_model_with_model_name(self, client, formrecognizer_storag self.assertEqual(composed_model.model_name, "my composed model") self.assertComposedModelHasValues(composed_model, model_1, model_2) + composed_model_dict = composed_model.to_dict() + self.assertEqual(composed_model_dict.get("model_name"), "my composed model") + self.assertIsNotNone(composed_model_dict.get("model_id")) + @FormRecognizerPreparer() @GlobalClientPreparer() async def test_compose_model_no_model_name(self, client, formrecognizer_storage_container_sas_url): diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms.py index 20562e2837a8..02c9d870df42 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms.py @@ -205,6 +205,11 @@ def callback(raw_response, _, headers): self.assertIsNotNone(recognized_form[0].model_id) self.assertUnlabeledFormFieldDictTransformCorrect(recognized_form[0].fields, actual_fields, read_results) + recognized_form_dict = [v.to_dict() for v in recognized_form] + self.assertIsNone(recognized_form_dict[0].get("form_type_confidence")) + self.assertIsNotNone(recognized_form_dict[0].get("model_id")) + self.assertEqual(recognized_form_dict[0].get("form_type"), "form-0") + @FormRecognizerPreparer() @GlobalClientPreparer() def test_custom_form_multipage_unlabeled_transform(self, client, formrecognizer_multipage_storage_container_sas_url): diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms_async.py index 30389aaf31c1..342d1ec62e28 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_custom_forms_async.py @@ -224,6 +224,11 @@ def callback(raw_response, _, headers): self.assertIsNotNone(recognized_form[0].model_id) self.assertUnlabeledFormFieldDictTransformCorrect(recognized_form[0].fields, actual_fields, read_results) + recognized_form_dict = [v.to_dict() for v in recognized_form] + self.assertIsNone(recognized_form_dict[0].get("form_type_confidence")) + self.assertIsNotNone(recognized_form_dict[0].get("model_id")) + self.assertEqual(recognized_form_dict[0].get("form_type"), "form-0") + @FormRecognizerPreparer() @GlobalClientPreparer() async def test_custom_forms_multipage_unlabeled_transform(self, client, formrecognizer_multipage_storage_container_sas_url): diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_to_dict.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_to_dict.py new file mode 100644 index 000000000000..bc3c968cbc30 --- /dev/null +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_to_dict.py @@ -0,0 +1,1076 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import pytest +import functools +from datetime import datetime +from azure.ai.formrecognizer import _models +from azure.ai.formrecognizer import FormRecognizerClient, FormContentType, FormTrainingClient +from testcase import FormRecognizerTest +from preparers import GlobalClientPreparer as _GlobalClientPreparer +from preparers import FormRecognizerPreparer + +GlobalClientPreparer = functools.partial(_GlobalClientPreparer, FormTrainingClient) + +class TestToDict(FormRecognizerTest): + def test_point_to_dict(self): + model = [_models.Point(1, 2), _models.Point(3, 4)] + d = [p.to_dict() for p in model] + final = [ + {"x": 1, "y": 2}, + { + "x": 3, + "y": 4, + }, + ] + assert d == final + + def test_form_word_to_dict(self): + form_word = _models.FormWord( + text="word", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ) + + d = form_word.to_dict() + final = { + "text": "word", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + } + assert d == final + + def test_form_line_to_dict(self): + form_line = _models.FormLine( + text="sample line", + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + words=[ + _models.FormWord( + text="sample", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + _models.FormWord( + text="line", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + page_number=2, + appearance=_models.TextAppearance( + style=_models.TextStyle(name="other", confidence=0.90) + ), + ) + + d = form_line.to_dict() + final = { + "text": "sample line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "words": [ + { + "text": "sample", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + { + "text": "line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + ], + "page_number": 2, + "kind": "line", + "appearance": {"style": {"name": "other", "confidence": 0.90}}, + } + assert d == final + + def test_form_selection_mark_to_dict(self): + form_selection_mark = _models.FormSelectionMark( + text="checkbox", + state="selected", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ) + + d = form_selection_mark.to_dict() + final = { + "text": "checkbox", + "state": "selected", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "selectionMark", + } + assert d == final + + def test_form_element_to_dict(self): + form_element = _models.FormElement( + kind="selectionMark", + text="element", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ) + + d = form_element.to_dict() + final = { + "text": "element", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "kind": "selectionMark", + } + assert d == final + + def test_text_appearance_to_dict(self): + model = _models.TextAppearance( + style=_models.TextStyle(name="other", confidence=0.98) + ) + + d = model.to_dict() + final = {"style": {"name": "other", "confidence": 0.98}} + assert d == final + + def test_text_style_to_dict(self): + model = _models.TextStyle(name="other", confidence=0.98) + + d = model.to_dict() + final = {"name": "other", "confidence": 0.98} + assert d == final + + def test_field_data_to_dict(self): + model = _models.FieldData( + text="element", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + field_elements=[ + _models.FormWord( + text="word", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + ) + + d = model.to_dict() + final = { + "text": "element", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "field_elements": [ + { + "text": "word", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + } + ], + } + assert d == final + + def test_form_field_to_dict(self): + form_field = _models.FormField( + value_type="phoneNumber", + label_data=_models.FieldData( + text="phone", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + value_data=_models.FieldData( + text="55554444", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + name="phone", + value="55554444", + confidence=0.99, + ) + + d = form_field.to_dict() + final = { + "value_type": "phoneNumber", + "label_data": { + "text": "phone", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "field_elements": [] + }, + "value_data": { + "text": "55554444", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "field_elements": [] + }, + "name": "phone", + "value": "55554444", + "confidence": 0.99, + } + assert d == final + + def test_recognized_form_to_dict(self): + form = _models.RecognizedForm( + form_type="test_form", + form_type_confidence="0.84", + model_id="examplemodel123", + page_range=_models.FormPageRange(1, 1), + fields={ + "example": _models.FormField( + value_type="phoneNumber", + label_data=_models.FieldData( + text="phone", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + value_data=_models.FieldData( + text="55554444", + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + name="phone", + value="55554444", + confidence=0.99, + ) + }, + pages=[_models.FormPage( + page_number=1, + text_angle=180.0, + width=5.5, + height=8.0, + unit="pixel", + lines=[_models.FormLine( + text="sample line", + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + words=[ + _models.FormWord( + text="sample", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + _models.FormWord( + text="line", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + page_number=2, + appearance=_models.TextAppearance( + style=_models.TextStyle(name="other", confidence=0.90) + ), + )], + ) + ] + ) + + d = form.to_dict() + final = { + "form_type": "test_form", + "form_type_confidence": "0.84", + "model_id": "examplemodel123", + "page_range": {"first_page_number": 1, "last_page_number": 1}, + "fields": { + "example": { + "value_type": "phoneNumber", + "label_data": { + "text": "phone", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "field_elements": [] + }, + "value_data": { + "text": "55554444", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "page_number": 1, + "field_elements": [] + }, + "name": "phone", + "value": "55554444", + "confidence": 0.99, + } + }, + "pages": [{ + "page_number": 1, + "text_angle": 180.0, + "width": 5.5, + "height": 8.0, + "unit": "pixel", + "lines": [{ + "text": "sample line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "words": [ + { + "text": "sample", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + { + "text": "line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + ], + "page_number": 2, + "kind": "line", + "appearance": {"style": {"name": "other", "confidence": 0.90}}, + }], + "selection_marks": [], + "tables": [], + }], + } + assert d == final + + def test_form_page_to_dict(self): + form_page = _models.FormPage( + page_number=1, + text_angle=180.0, + width=5.5, + height=8.0, + unit="pixel", + tables= [ + _models.FormTable( + page_number=2, + cells=[ + _models.FormTableCell( + text="info", + row_index=1, + column_index=3, + row_span=1, + column_span=2, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + confidence=0.87, + is_header=False, + is_footer=True, + page_number=1, + field_elements=[ + _models.FormWord( + text="word", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ] + ) + ], + row_count=10, + column_count=5, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + lines=[_models.FormLine( + text="sample line", + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + words=[ + _models.FormWord( + text="sample", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + _models.FormWord( + text="line", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + page_number=2, + appearance=_models.TextAppearance( + style=_models.TextStyle(name="other", confidence=0.90) + ), + ), + ], + selection_marks=[_models.FormSelectionMark( + text="checkbox", + state="selected", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ], + ) + d = form_page.to_dict() + final = { + "page_number": 1, + "text_angle": 180.0, + "width": 5.5, + "height": 8.0, + "unit": "pixel", + "tables": [ + {"cells": [ + { + "text": "info", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "row_index": 1, + "column_index": 3, + "row_span": 1, + "column_span": 2, + "confidence": 0.87, + "is_header": False, + "is_footer": True, + "page_number": 1, + "field_elements": [ + { + "text": "word", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + } + ], + }, + ], + "page_number": 2, + "row_count": 10, + "column_count": 5, + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + }, + ], + "lines": [{ + "text": "sample line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "words": [ + { + "text": "sample", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + { + "text": "line", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + }, + ], + "page_number": 2, + "kind": "line", + "appearance": {"style": {"name": "other", "confidence": 0.90}}, + }], + "selection_marks": [{ + "text": "checkbox", + "state": "selected", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "selectionMark", + }], + } + assert d == final + + def test_form_table_cell_to_dict(self): + table_cell = _models.FormTableCell( + text="info", + row_index=1, + column_index=3, + row_span=1, + column_span=2, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + confidence=0.87, + is_header=False, + is_footer=True, + page_number=1, + field_elements=[ + _models.FormWord( + text="word", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ] + ) + + d = table_cell.to_dict() + final = { + "text": "info", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "row_index": 1, + "column_index": 3, + "row_span": 1, + "column_span": 2, + "confidence": 0.87, + "is_header": False, + "is_footer": True, + "page_number": 1, + "field_elements": [ + { + "text": "word", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + } + ], + } + assert d == final + + def test_form_table_to_dict(self): + table = _models.FormTable( + page_number=2, + cells=[ + _models.FormTableCell( + text="info", + row_index=1, + column_index=3, + row_span=1, + column_span=2, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + confidence=0.87, + is_header=False, + is_footer=True, + page_number=1, + field_elements=[ + _models.FormWord( + text="word", + confidence=0.92, + page_number=1, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ), + ] + ) + ], + row_count=10, + column_count=5, + bounding_box=[ + _models.Point(1427.0, 1669.0), + _models.Point(1527.0, 1669.0), + _models.Point(1527.0, 1698.0), + _models.Point(1427.0, 1698.0), + ], + ) + + d = table.to_dict() + final = { + "cells": [ + { + "text": "info", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "row_index": 1, + "column_index": 3, + "row_span": 1, + "column_span": 2, + "confidence": 0.87, + "is_header": False, + "is_footer": True, + "page_number": 1, + "field_elements": [ + { + "text": "word", + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + "confidence": 0.92, + "page_number": 1, + "kind": "word", + } + ], + }, + ], + "page_number": 2, + "row_count": 10, + "column_count": 5, + "bounding_box": [ + {"x": 1427.0, "y": 1669.0}, + {"x": 1527.0, "y": 1669.0}, + {"x": 1527.0, "y": 1698.0}, + {"x": 1427.0, "y": 1698.0}, + ], + } + assert d == final + + def test_custom_form_model_properties_to_dict(self): + model = _models.CustomFormModelProperties( + is_composed_model=True, + ) + d = model.to_dict() + final = { + "is_composed_model": True, + } + assert d == final + + def test_account_properties_to_dict(self): + model = _models.AccountProperties( + custom_model_count=5, + custom_model_limit=10, + ) + d = model.to_dict() + final = { + "custom_model_count": 5, + "custom_model_limit": 10, + } + assert d == final + + def test_custom_form_model_info_to_dict(self): + model = _models.CustomFormModelInfo( + model_id="1234", + status="creating", + training_started_on=datetime(2021, 1, 10, 23, 55, 59, 342380), + training_completed_on=datetime(2021, 1, 10, 23, 55, 59, 342380), + model_name="sample_model", + properties=_models.CustomFormModelProperties( + is_composed_model=False, + ) + ) + d = model.to_dict() + final = { + "model_id": "1234", + "status": "creating", + "training_started_on": datetime(2021, 1, 10, 23, 55, 59, 342380), + "training_completed_on": datetime(2021, 1, 10, 23, 55, 59, 342380), + "model_name": "sample_model", + "properties": { + "is_composed_model": False, + } + } + assert d == final + + def test_form_recognizer_error_to_dict(self): + model = _models.FormRecognizerError( + code=404, + message="error not found", + ) + d = model.to_dict() + final = { + "code": 404, + "message": "error not found", + } + assert d == final + + def test_training_document_info_to_dict(self): + model = _models.TrainingDocumentInfo( + name="sample doc", + status="succeeded", + page_count=3, + errors=[ + _models.FormRecognizerError( + code=404, + message="error not found", + ) + ], + model_id="1234", + ) + d = model.to_dict() + final = { + "name": "sample doc", + "status": "succeeded", + "page_count": 3, + "errors": [ + { + "code": 404, + "message": "error not found", + } + ], + "model_id": "1234", + } + assert d == final + + def test_custom_form_model_field_to_dict(self): + model = _models.CustomFormModelField( + label="field_label", + name="field", + accuracy=0.98, + ) + d = model.to_dict() + final = { + "label": "field_label", + "name": "field", + "accuracy": 0.98, + } + assert d == final + + def test_custom_form_submodel_to_dict(self): + model = _models.CustomFormSubmodel( + model_id="1234", + form_type="submodel", + accuracy=0.98, + fields={ + "example": _models.CustomFormModelField( + label="field_label", + name="field", + accuracy=0.98, + ) + } + ) + d = model.to_dict() + final = { + "model_id": "1234", + "form_type": "submodel", + "accuracy": 0.98, + "fields": { + "example": { + "label": "field_label", + "name": "field", + "accuracy": 0.98, + } + } + } + assert d == final + + def test_custom_form_model_to_dict(self): + model = _models.CustomFormModel( + model_id="1234", + status="ready", + training_started_on=datetime(2021, 1, 10, 23, 55, 59, 342380), + training_completed_on=datetime(2021, 1, 10, 23, 55, 59, 342380), + submodels=[ + _models.CustomFormSubmodel( + model_id="1234", + form_type="submodel", + accuracy=0.98, + fields={ + "example": _models.CustomFormModelField( + label="field_label", + name="field", + accuracy=0.98, + ) + } + ) + ], + errors=[ + _models.FormRecognizerError( + code=404, + message="error not found", + ) + ], + training_documents=[ + _models.TrainingDocumentInfo( + name="sample doc", + status="succeeded", + page_count=3, + errors=[ + _models.FormRecognizerError( + code=404, + message="error not found", + ) + ], + model_id="1234", + ) + ], + model_name="sample model", + properties=_models.CustomFormModelProperties( + is_composed_model=True, + ) + ) + d = model.to_dict() + final = { + "model_id": "1234", + "status": "ready", + "training_started_on": datetime(2021, 1, 10, 23, 55, 59, 342380), + "training_completed_on": datetime(2021, 1, 10, 23, 55, 59, 342380), + "submodels": [{ + "model_id": "1234", + "form_type": "submodel", + "accuracy": 0.98, + "fields": { + "example": + { + "label": "field_label", + "name": "field", + "accuracy": 0.98, + } + } + }], + "errors": [ + { + "code": 404, + "message": "error not found", + } + ], + "training_documents": [ + { + "name": "sample doc", + "status": "succeeded", + "page_count": 3, + "errors": [ + { + "code": 404, + "message": "error not found", + } + ], + "model_id": "1234", + } + ], + "model_name": "sample model", + "properties": { + "is_composed_model": True, + } + } + assert d == final