Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ class Point(namedtuple("Point", "x y")):
def __new__(cls, x, y):
return super(Point, cls).__new__(cls, x, y)

def to_dict(self):
return {"x": self.x, "y": self.y}


class FormPageRange(namedtuple("FormPageRange", "first_page_number last_page_number")):
"""The 1-based page range of the form.
Expand All @@ -159,6 +162,12 @@ def __new__(cls, first_page_number, last_page_number):
cls, first_page_number, last_page_number
)

def to_dict(self):
return {
"first_page_number": self.first_page_number,
"last_page_number": self.last_page_number,
}


class FormElement(object):
"""Base type which includes properties for a form element.
Expand All @@ -183,6 +192,14 @@ def __init__(self, **kwargs):
self.text = kwargs.get("text", None)
self.kind = kwargs.get("kind", None)

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"page_number": self.page_number,
"kind": self.kind,
}


class RecognizedForm(object):
"""Represents a form that has been recognized by a trained or prebuilt model.
Expand Down Expand Up @@ -231,6 +248,16 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"fields": {k: v.to_dict() for k, v in self.fields.items()} if self.fields else {},
"form_type": self.form_type,
"pages": [v.to_dict() for v in self.pages] if self.pages else [],
"model_id": self.model_id,
"form_type_confidence": self.form_type_confidence,
"page_range": self.page_range.to_dict() if self.page_range else None
}


class FormField(object):
"""Represents a field recognized in an input form.
Expand Down Expand Up @@ -302,6 +329,20 @@ def __repr__(self):
:1024
]

def to_dict(self):
value = self.value
if isinstance(self.value, dict):
value = {k: v.to_dict() for k, v in self.value.items()}
elif isinstance(self.value, list):
value = [v.to_dict() for v in self.value]
return {
"value_type": self.value_type,
"name": self.name,
"value": value,
"confidence": self.confidence,
"label_data": self.label_data.to_dict() if self.label_data else None,
"value_data": self.value_data.to_dict() if self.value_data else None,
}

class FieldData(object):
"""Contains the data for the form field. This includes the text,
Expand Down Expand Up @@ -371,6 +412,14 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"page_number": self.page_number,
"field_elements": [f.to_dict() for f in self.field_elements] if self.field_elements else []
}


class FormPage(object):
"""Represents a page recognized from the input document. Contains lines,
Expand Down Expand Up @@ -430,6 +479,17 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"page_number": self.page_number,
"text_angle": self.text_angle,
"width": self.width,
"height": self.height,
"unit": self.unit,
"tables": [table.to_dict() for table in self.tables] if self.tables else [],
"lines": [line.to_dict() for line in self.lines] if self.lines else [],
"selection_marks": [mark.to_dict() for mark in self.selection_marks] if self.selection_marks else []
}

class FormLine(FormElement):
"""An object representing an extracted line of text.
Expand Down Expand Up @@ -486,6 +546,16 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"words": [f.to_dict() for f in self.words] if self.words else [],
"page_number": self.page_number,
"kind": self.kind,
"appearance": self.appearance.to_dict() if self.appearance else None
}


class FormWord(FormElement):
"""Represents a word recognized from the input document.
Expand Down Expand Up @@ -523,6 +593,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"confidence": self.confidence,
"page_number": self.page_number,
"kind": self.kind,
}


class FormSelectionMark(FormElement):
"""Information about the extracted selection mark.
Expand Down Expand Up @@ -557,12 +636,22 @@ def _from_generated(cls, mark, page):
)

def __repr__(self):
return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={})".format(
self.text, self.bounding_box, self.confidence, self.page_number, self.state
return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={}, kind={})".format(
self.text, self.bounding_box, self.confidence, self.page_number, self.state, self.kind
)[
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"confidence": self.confidence,
"state": self.state,
"page_number": self.page_number,
"kind": self.kind,
}


class FormTable(object):
"""Information about the extracted table contained on a page.
Expand Down Expand Up @@ -603,6 +692,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"page_number": self.page_number,
"row_count": self.row_count,
"column_count": self.column_count,
"cells": [cell.to_dict() for cell in self.cells],
"bounding_box": [box.to_dict() for box in self.bounding_box] if self.bounding_box else []
}


class FormTableCell(object): # pylint:disable=too-many-instance-attributes
"""Represents a cell contained in a table recognized from the input document.
Expand Down Expand Up @@ -688,6 +786,22 @@ def __repr__(self):
]
)

def to_dict(self):
return {
"text": self.text,
"row_index": self.row_index,
"column_index": self.column_index,
"row_span": self.row_span,
"column_span": self.column_span,
"confidence": self.confidence,
"is_header": self.is_header,
"is_footer": self.is_footer,
"page_number": self.page_number,
"bounding_box": [box.to_dict() for box in self.bounding_box] if self.bounding_box else [],
"field_elements": [element.to_dict() for element in self.field_elements]
if self.field_elements else None
}


class CustomFormModel(object):
"""Represents a model trained from custom forms.
Expand Down Expand Up @@ -790,6 +904,18 @@ def __repr__(self):
]
)

def to_dict(self):
return {
"model_id": self.model_id,
"status": self.status,
"training_started_on": self.training_started_on,
"training_completed_on": self.training_completed_on,
"submodels": [submodel.to_dict() for submodel in self.submodels] if self.submodels else [],
"errors": [err.to_dict() for err in self.errors] if self.errors else [],
"training_documents": [doc.to_dict() for doc in self.training_documents] if self.training_documents else [],
"model_name": self.model_name,
"properties": self.properties.to_dict() if self.properties else None
}

class CustomFormSubmodel(object):
"""Represents a submodel that extracts fields from a specific type of form.
Expand Down Expand Up @@ -884,6 +1010,14 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"model_id": self.model_id,
"accuracy": self.accuracy,
"fields": {k: v.to_dict() for k, v in self.fields.items()} if self.fields else {},
"form_type": self.form_type
}


class CustomFormModelField(object):
"""A field that the model will extract from forms it analyzes.
Expand Down Expand Up @@ -917,6 +1051,13 @@ def __repr__(self):
self.label, self.name, self.accuracy
)[:1024]

def to_dict(self):
return {
"label": self.label,
"accuracy": self.accuracy,
"name": self.name
}


class TrainingDocumentInfo(object):
"""Report for an individual document used for training
Expand Down Expand Up @@ -988,6 +1129,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"name": self.name,
"status": self.status,
"page_count": self.page_count,
"errors": [err.to_dict() for err in self.errors],
"model_id": self.model_id
}


class FormRecognizerError(object):
"""Represents an error that occurred while training.
Expand All @@ -1013,6 +1163,12 @@ def __repr__(self):
self.code, self.message
)[:1024]

def to_dict(self):
return {
"code": self.code,
"message": self.message
}


class CustomFormModelInfo(object):
"""Custom model information.
Expand Down Expand Up @@ -1078,6 +1234,16 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"model_id": self.model_id,
"status": self.status,
"training_started_on": self.training_started_on,
"training_completed_on": self.training_completed_on,
"model_name": self.model_name,
"properties": self.properties.to_dict() if self.properties else None
}


class AccountProperties(object):
"""Summary of all the custom models on the account.
Expand All @@ -1102,6 +1268,12 @@ def __repr__(self):
self.custom_model_count, self.custom_model_limit
)[:1024]

def to_dict(self):
return {
"custom_model_count": self.custom_model_count,
"custom_model_limit": self.custom_model_limit
}


class CustomFormModelProperties(object):
"""Optional model properties.
Expand All @@ -1123,6 +1295,11 @@ def __repr__(self):
self.is_composed_model
)

def to_dict(self):
return {
"is_composed_model": self.is_composed_model
}


class TextAppearance(object):
"""An object representing the appearance of the text line.
Expand All @@ -1147,6 +1324,11 @@ def _from_generated(cls, appearance):
def __repr__(self):
return "TextAppearance(style={})".format(repr(self.style))

def to_dict(self):
return {
"style": self.style.to_dict() if self.style else None
}


class TextStyle(object):
"""An object representing the style of the text line.
Expand All @@ -1164,3 +1346,6 @@ def __init__(self, **kwargs):

def __repr__(self):
return "TextStyle(name={}, confidence={})".format(self.name, self.confidence)

def to_dict(self):
return {"name": self.name, "confidence": self.confidence}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def test_compose_model_with_model_name(self, client, formrecognizer_storage_cont
self.assertEqual(composed_model.model_name, "my composed model")
self.assertComposedModelHasValues(composed_model, model_1, model_2)

composed_model_dict = composed_model.to_dict()
self.assertEqual(composed_model_dict.get("model_name"), "my composed model")
self.assertIsNotNone(composed_model_dict.get("model_id"))

@FormRecognizerPreparer()
@GlobalClientPreparer()
def test_compose_model_no_model_name(self, client, formrecognizer_storage_container_sas_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ async def test_compose_model_with_model_name(self, client, formrecognizer_storag
self.assertEqual(composed_model.model_name, "my composed model")
self.assertComposedModelHasValues(composed_model, model_1, model_2)

composed_model_dict = composed_model.to_dict()
self.assertEqual(composed_model_dict.get("model_name"), "my composed model")
self.assertIsNotNone(composed_model_dict.get("model_id"))

@FormRecognizerPreparer()
@GlobalClientPreparer()
async def test_compose_model_no_model_name(self, client, formrecognizer_storage_container_sas_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ def callback(raw_response, _, headers):
self.assertIsNotNone(recognized_form[0].model_id)
self.assertUnlabeledFormFieldDictTransformCorrect(recognized_form[0].fields, actual_fields, read_results)

recognized_form_dict = [v.to_dict() for v in recognized_form]
self.assertIsNone(recognized_form_dict[0].get("form_type_confidence"))
self.assertIsNotNone(recognized_form_dict[0].get("model_id"))
self.assertEqual(recognized_form_dict[0].get("form_type"), "form-0")

@FormRecognizerPreparer()
@GlobalClientPreparer()
def test_custom_form_multipage_unlabeled_transform(self, client, formrecognizer_multipage_storage_container_sas_url):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,11 @@ def callback(raw_response, _, headers):
self.assertIsNotNone(recognized_form[0].model_id)
self.assertUnlabeledFormFieldDictTransformCorrect(recognized_form[0].fields, actual_fields, read_results)

recognized_form_dict = [v.to_dict() for v in recognized_form]
self.assertIsNone(recognized_form_dict[0].get("form_type_confidence"))
self.assertIsNotNone(recognized_form_dict[0].get("model_id"))
self.assertEqual(recognized_form_dict[0].get("form_type"), "form-0")

@FormRecognizerPreparer()
@GlobalClientPreparer()
async def test_custom_forms_multipage_unlabeled_transform(self, client, formrecognizer_multipage_storage_container_sas_url):
Expand Down
Loading