Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ class Point(namedtuple("Point", "x y")):
def __new__(cls, x, y):
return super(Point, cls).__new__(cls, x, y)

def to_dict(self):
return {"x": self.x, "y": self.y}


class FormPageRange(namedtuple("FormPageRange", "first_page_number last_page_number")):
"""The 1-based page range of the form.
Expand All @@ -159,6 +162,12 @@ def __new__(cls, first_page_number, last_page_number):
cls, first_page_number, last_page_number
)

def to_dict(self):
return {
"first_page_number": self.first_page_number,
"last_page_number": self.last_page_number,
}


class FormElement(object):
"""Base type which includes properties for a form element.
Expand All @@ -183,6 +192,14 @@ def __init__(self, **kwargs):
self.text = kwargs.get("text", None)
self.kind = kwargs.get("kind", None)

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"page_number": self.page_number,
"kind": self.kind,
}


class RecognizedForm(object):
"""Represents a form that has been recognized by a trained or prebuilt model.
Expand Down Expand Up @@ -231,6 +248,16 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"fields": [v.to_dict() for v in self.fields] if self.fields else [],
Comment thread
kristapratico marked this conversation as resolved.
Outdated
"form_type": self.form_type,
"pages": [v.to_dict() for v in self.pages] if self.pages else [],
"model_id": self.model_id,
"form_type_confidence": self.form_type_confidence,
"page_range": self.page_range.to_dict() if self.page_range else None
}


class FormField(object):
"""Represents a field recognized in an input form.
Expand Down Expand Up @@ -302,6 +329,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"value_type": self.value_type,
"name": self.name,
"value": self.value,
Comment thread
kristapratico marked this conversation as resolved.
Outdated
"confidence": self.confidence,
"label_data": self.label_data.to_dict() if self.label_data else None,
"value_data": self.value_data.to_dict() if self.value_data else None,
}

class FieldData(object):
"""Contains the data for the form field. This includes the text,
Expand Down Expand Up @@ -371,6 +407,14 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"page_number": self.page_number,
"field_elements": [f.to_dict() for f in self.field_elements] if self.field_elements else []
}


class FormPage(object):
"""Represents a page recognized from the input document. Contains lines,
Expand Down Expand Up @@ -430,6 +474,17 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"page_number": self.page_number,
"text_angle": self.text_angle,
"width": self.width,
"height": self.height,
"unit": self.unit,
"tables": [table.to_dict() for table in self.tables] if self.tables else [],
"lines": [line.to_dict() for line in self.lines] if self.lines else [],
"selection_marks": [mark.to_dict() for mark in self.selection_marks] if self.selection_marks else []
}

class FormLine(FormElement):
"""An object representing an extracted line of text.
Expand Down Expand Up @@ -486,6 +541,16 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box] if self.bounding_box else [],
"words": [f.to_dict() for f in self.words] if self.words else [],
"page_number": self.page_number,
"kind": self.kind,
"appearance": self.appearance.to_dict() if self.appearance else None
}


class FormWord(FormElement):
"""Represents a word recognized from the input document.
Expand Down Expand Up @@ -523,6 +588,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box],
"confidence": self.confidence,
"page_number": self.page_number,
"kind": self.kind,
}


class FormSelectionMark(FormElement):
"""Information about the extracted selection mark.
Expand Down Expand Up @@ -557,12 +631,22 @@ def _from_generated(cls, mark, page):
)

def __repr__(self):
return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={})".format(
self.text, self.bounding_box, self.confidence, self.page_number, self.state
return "FormSelectionMark(text={}, bounding_box={}, confidence={}, page_number={}, state={}, kind={})".format(
self.text, self.bounding_box, self.confidence, self.page_number, self.state, self.kind
)[
:1024
]

def to_dict(self):
return {
"text": self.text,
"bounding_box": [f.to_dict() for f in self.bounding_box],
"confidence": self.confidence,
"state": self.state,
"page_number": self.page_number,
"kind": self.kind,
}


class FormTable(object):
"""Information about the extracted table contained on a page.
Expand Down Expand Up @@ -603,6 +687,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"page_number": self.page_number,
"row_count": self.row_count,
"column_count": self.column_count,
"cells": [cell.to_dict() for cell in self.cells],
"bounding_box": [box.to_dict() for box in self.bounding_box]
Comment thread
kristapratico marked this conversation as resolved.
Outdated
}


class FormTableCell(object): # pylint:disable=too-many-instance-attributes
"""Represents a cell contained in a table recognized from the input document.
Expand Down Expand Up @@ -688,6 +781,22 @@ def __repr__(self):
]
)

def to_dict(self):
return {
"text": self.text,
"row_index": self.row_index,
"column_index": self.column_index,
"row_span": self.row_span,
"column_span": self.column_span,
"confidence": self.confidence,
"is_header": self.is_header,
"is_footer": self.is_footer,
"page_number": self.page_number,
"bounding_box": [box.to_dict() for box in self.bounding_box],
"field_elements": [element.to_dict() for element in self.field_elements]
if self.field_elements else None
}


class CustomFormModel(object):
"""Represents a model trained from custom forms.
Expand Down Expand Up @@ -790,6 +899,18 @@ def __repr__(self):
]
)

def to_dict(self):
return {
"model_id": self.model_id,
"status": self.status,
"training_started_on": self.training_started_on,
"training_completed_on": self.training_completed_on,
"submodels": [submodel.to_dict() for submodel in self.submodels] if self.submodels else [],
"errors": [err.to_dict() for err in self.errors] if self.errors else [],
"training_documents": [doc.to_dict() for doc in self.training_documents] if self.training_documents else [],
"model_name": self.model_name,
"properties": self.properties.to_dict() if self.properties else None
}

class CustomFormSubmodel(object):
"""Represents a submodel that extracts fields from a specific type of form.
Expand Down Expand Up @@ -884,6 +1005,14 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"model_id": self.model_id,
"accuracy": self.accuracy,
"fields": [v.to_dict() for v in self.fields] if self.fields else [],
Comment thread
kristapratico marked this conversation as resolved.
Outdated
"form_type": self.form_type
}


class CustomFormModelField(object):
"""A field that the model will extract from forms it analyzes.
Expand Down Expand Up @@ -917,6 +1046,13 @@ def __repr__(self):
self.label, self.name, self.accuracy
)[:1024]

def to_dict(self):
return {
"label": self.label,
"accuracy": self.accuracy,
"name": self.name
}


class TrainingDocumentInfo(object):
"""Report for an individual document used for training
Expand Down Expand Up @@ -988,6 +1124,15 @@ def __repr__(self):
:1024
]

def to_dict(self):
return {
"name": self.name,
"status": self.status,
"page_count": self.page_count,
"errors": [err.to_dict() for err in self.errors],
"model_id": self.model_id
}


class FormRecognizerError(object):
"""Represents an error that occurred while training.
Expand All @@ -1013,6 +1158,12 @@ def __repr__(self):
self.code, self.message
)[:1024]

def to_dict(self):
return {
"code": self.code,
"message": self.message
}


class CustomFormModelInfo(object):
"""Custom model information.
Expand Down Expand Up @@ -1078,6 +1229,16 @@ def __repr__(self):
)[:1024]
)

def to_dict(self):
return {
"model_id": self.model_id,
"status": self.status,
"training_started_on": self.training_started_on,
"training_completed_on": self.training_completed_on,
"model_name": self.model_name,
"properties": self.properties.to_dict()
Comment thread
kristapratico marked this conversation as resolved.
Outdated
}


class AccountProperties(object):
"""Summary of all the custom models on the account.
Expand All @@ -1102,6 +1263,12 @@ def __repr__(self):
self.custom_model_count, self.custom_model_limit
)[:1024]

def to_dict(self):
return {
"custom_model_count": self.custom_model_count,
"custom_model_limit": self.custom_model_limit
}


class CustomFormModelProperties(object):
"""Optional model properties.
Expand All @@ -1123,6 +1290,11 @@ def __repr__(self):
self.is_composed_model
)

def to_dict(self):
return {
"is_composed_model": self.is_composed_model
}


class TextAppearance(object):
"""An object representing the appearance of the text line.
Expand All @@ -1147,6 +1319,11 @@ def _from_generated(cls, appearance):
def __repr__(self):
return "TextAppearance(style={})".format(repr(self.style))

def to_dict(self):
return {
"style": self.style.to_dict() if self.style else None
}


class TextStyle(object):
"""An object representing the style of the text line.
Expand All @@ -1164,3 +1341,6 @@ def __init__(self, **kwargs):

def __repr__(self):
return "TextStyle(name={}, confidence={})".format(self.name, self.confidence)

def to_dict(self):
return {"name": self.name, "confidence": self.confidence}
Loading