diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index e617cf579943..b9dec2a509f8 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -166,6 +166,11 @@ def __init__(self, **kwargs): self.page_range = kwargs.get("page_range", None) self.pages = kwargs.get("pages", None) + def __repr__(self): + return "RecognizedForm(form_type={}, fields={}, page_range={}, pages={})".format( + self.form_type, repr(self.fields), repr(self.page_range), repr(self.pages) + )[:1024] + class USReceipt(object): # pylint: disable=too-many-instance-attributes """Extracted fields found on the US sales receipt. Provides @@ -182,7 +187,7 @@ class USReceipt(object): # pylint: disable=too-many-instance-attributes :ivar list[~azure.ai.formrecognizer.USReceiptItem] receipt_items: The purchased items found on the receipt. :ivar ~azure.ai.formrecognizer.FormField subtotal: - The subtotal found on the receipt. + The subtotal found on the receipt :ivar ~azure.ai.formrecognizer.FormField tax: The tax value found on the receipt. :ivar ~azure.ai.formrecognizer.FormField tip: @@ -224,6 +229,17 @@ def __init__(self, **kwargs): self.form_type = kwargs.get("form_type", None) self.receipt_locale = kwargs.get("receipt_locale", "en-US") + def __repr__(self): + return "USReceipt(merchant_address={}, merchant_name={}, merchant_phone_number={}, " \ + "receipt_type={}, receipt_items={}, subtotal={}, tax={}, tip={}, total={}, "\ + "transaction_date={}, transaction_time={}, fields={}, page_range={}, pages={}, " \ + "form_type={}, receipt_locale={})".format( + repr(self.merchant_address), repr(self.merchant_name), repr(self.merchant_phone_number), + repr(self.receipt_type), repr(self.receipt_items), repr(self.subtotal), repr(self.tax), + repr(self.tip), repr(self.total), repr(self.transaction_date), repr(self.transaction_time), + repr(self.fields), repr(self.page_range), repr(self.pages), self.form_type, self.receipt_locale + )[:1024] + class FormField(object): """Represents a field recognized in an input form. @@ -263,6 +279,7 @@ def _from_generated(cls, field, value, read_result): page_number=value.page if value else None, ) + @classmethod def _from_generated_unlabeled(cls, field, idx, page, read_result): return cls( @@ -274,6 +291,11 @@ def _from_generated_unlabeled(cls, field, idx, page, read_result): page_number=page, ) + def __repr__(self): + return "FormField(label_data={}, value_data={}, name={}, value={}, confidence={}, page_number={})".format( + repr(self.label_data), repr(self.value_data), self.name, repr(self.value), self.confidence, self.page_number + )[:1024] + class FieldText(FormContent): """Represents the text that is part of a form field. This includes @@ -328,6 +350,11 @@ def _from_generated_unlabeled(cls, field, page, read_result): text_content=get_elements(field, read_result) if field.elements else None ) + def __repr__(self): + return "FieldText(page_number={}, text={}, bounding_box={}, text_content={})".format( + self.page_number, self.text, self.bounding_box, repr(self.text_content) + )[:1024] + class FormPage(object): """Represents a page recognized from the input document. Contains lines, @@ -377,6 +404,11 @@ def _from_generated(cls, read_result): lines=[FormLine._from_generated(line, page=page.page) for line in page.lines] if page.lines else None ) for page in read_result] + def __repr__(self): + return "FormPage(page_number={}, text_angle={}, width={}, height={}, unit={}, tables={}, lines={})".format( + self.page_number, self.text_angle, self.width, self.height, self.unit, repr(self.tables), repr(self.lines) + )[:1024] + class FormLine(FormContent): """An object representing an extracted line of text. @@ -411,6 +443,10 @@ def _from_generated(cls, line, page): words=[FormWord._from_generated(word, page) for word in line.words] if line.words else None ) + def __repr__(self): + return "FormLine(text={}, bounding_box={}, words={}, page_number={})".format( + self.text, self.bounding_box, repr(self.words), self.page_number + )[:1024] class FormWord(FormContent): """Represents a word recognized from the input document. @@ -445,6 +481,11 @@ def _from_generated(cls, word, page): page_number=page ) + def __repr__(self): + return "FormWord(text={}, bounding_box={}, confidence={}, page_number={})".format( + self.text, self.bounding_box, self.confidence, self.page_number + )[:1024] + class USReceiptType(object): """The type of the analyzed US receipt and the confidence @@ -466,6 +507,9 @@ def _from_generated(cls, item): type=item.value_string, confidence=item.confidence or 1.0) if item else None + def __repr__(self): + return "USReceiptType(type={}, confidence={})".format(self.type, self.confidence)[:1024] + class USReceiptItem(object): """A receipt item on a US sales receipt. @@ -500,6 +544,11 @@ def _from_generated(cls, items, read_result): except AttributeError: return [] + def __repr__(self): + return "USReceiptItem(name={}, quantity={}, price={}, total_price={})".format( + repr(self.name), repr(self.quantity), repr(self.price), repr(self.total_price) + )[:1024] + class FormTable(object): """Information about the extracted table contained on a page. @@ -517,6 +566,11 @@ def __init__(self, **kwargs): self.row_count = kwargs.get("row_count", None) self.column_count = kwargs.get("column_count", None) + def __repr__(self): + return "FormTable(cells={}, row_count={}, column_count={})".format( + repr(self.cells), self.row_count, self.column_count + )[:1024] + class FormTableCell(FormContent): """Represents a cell contained in a table recognized from the input document. @@ -576,6 +630,13 @@ def _from_generated(cls, cell, page, read_result): text_content=get_elements(cell, read_result) if cell.elements else None ) + def __repr__(self): + return "FormTableCell(text={}, row_index={}, column_index={}, row_span={}, column_span={}, " \ + "bounding_box={}, confidence={}, is_header={}, is_footer={}, page_number={}, text_content={})".format( + self.text, self.row_index, self.column_index, self.row_span, self.column_span, self.bounding_box, + self.confidence, self.is_header, self.is_footer, self.page_number, repr(self.text_content) + )[:1024] + class CustomFormModel(object): """Represents a model trained from custom forms. @@ -621,6 +682,13 @@ def _from_generated(cls, model): if model.train_result else None ) + def __repr__(self): + return "CustomFormModel(model_id={}, status={}, created_on={}, last_modified={}, models={}, " \ + "errors={}, training_documents={})".format( + self.model_id, self.status, self.created_on, self.last_modified, repr(self.models), + repr(self.errors), repr(self.training_documents) + )[:1024] + class CustomFormSubModel(object): """Represents a submodel that extracts fields from a specific type of form. @@ -656,6 +724,11 @@ def _from_generated_labeled(cls, model): form_type="form-" + model.model_info.model_id )] if model.train_result else None + def __repr__(self): + return "CustomFormSubModel(accuracy={}, fields={}, form_type={})".format( + self.accuracy, repr(self.fields), self.form_type + )[:1024] + class CustomFormModelField(object): """A field that the model will extract from forms it analyzes. @@ -685,6 +758,11 @@ def _from_generated_unlabeled(cls, fields): ) for idx, field_name in enumerate(fields) } + def __repr__(self): + return "CustomFormModelField(label={}, name={}, accuracy={})".format( + self.label, self.name, self.accuracy + )[:1024] + class TrainingDocumentInfo(object): """Report for an individual document used for training @@ -717,6 +795,11 @@ def _from_generated(cls, train_result): errors=FormRecognizerError._from_generated(doc.errors) ) for doc in train_result.training_documents] if train_result.training_documents else None + def __repr__(self): + return "TrainingDocumentInfo(document_name={}, status={}, page_count={}, errors={})".format( + self.document_name, self.status, self.page_count, repr(self.errors) + )[:1024] + class FormRecognizerError(object): """Represents an error that occurred while training. @@ -733,6 +816,9 @@ def __init__(self, **kwargs): def _from_generated(cls, err): return [cls(code=error.code, message=error.message) for error in err] if err else [] + def __repr__(self): + return "FormRecognizerError(code={}, message={})".format(self.code, self.message)[:1024] + class CustomFormModelInfo(object): """Custom model information. @@ -762,6 +848,11 @@ def _from_generated(cls, model): last_modified=model.last_updated_date_time ) + def __repr__(self): + return "CustomFormModelInfo(model_id={}, status={}, created_on={}, last_modified={})".format( + self.model_id, self.status, self.created_on, self.last_modified + )[:1024] + class AccountProperties(object): """Summary of all the custom models on the account. @@ -780,3 +871,8 @@ def _from_generated(cls, model): custom_model_count=model.count, custom_model_limit=model.limit, ) + + def __repr__(self): + return "AccountProperties(custom_model_count={}, custom_model_limit={})".format( + self.custom_model_count, self.custom_model_limit + )[:1024] diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_repr.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_repr.py new file mode 100644 index 000000000000..5cf69159bf9b --- /dev/null +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_repr.py @@ -0,0 +1,223 @@ +# coding=utf-8 +# -------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for +# license information. +# -------------------------------------------------------------------------- + +import pytest +import datetime +from azure.ai.formrecognizer import _models + +# All features return a tuple of the object and the repr of the obejct + +# Adding in assert for each pytest fixture so it's easier to narrow down where the problem is + +@pytest.fixture +def bounding_box(): + model = [ + _models.Point(1, 2), + _models.Point(3, 4), + _models.Point(5, 6), + _models.Point(7, 8) + ] + model_repr = '[Point(x=1, y=2), Point(x=3, y=4), Point(x=5, y=6), Point(x=7, y=8)]' + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_word(bounding_box): + model = _models.FormWord(text="Word", bounding_box=bounding_box[0], confidence=0.5, page_number=1) + model_repr = "FormWord(text=Word, bounding_box={}, confidence=0.5, page_number=1)".format(bounding_box[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + + +@pytest.fixture +def form_line(bounding_box, form_word): + model = _models.FormLine(text="Word Word", bounding_box=bounding_box[0], words=[form_word[0], form_word[0]], page_number=1) + model_repr = "FormLine(text=Word Word, bounding_box={}, words=[{}, {}], page_number=1)".format(bounding_box[1], form_word[1], form_word[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_table_cell(bounding_box, form_word): + model = _models.FormTableCell( + text="Cell", row_index=3, column_index=4, row_span=2, column_span=3, bounding_box=bounding_box[0], + confidence=0.7, is_header=True, is_footer=False, page_number=3, text_content=[form_word[0]] + ) + model_repr = "FormTableCell(text=Cell, row_index=3, column_index=4, row_span=2, column_span=3, bounding_box={}, confidence=0.7, " \ + "is_header=True, is_footer=False, page_number=3, text_content=[{}])".format(bounding_box[1], form_word[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_table(form_table_cell): + model = _models.FormTable(cells=[form_table_cell[0], form_table_cell[0]], row_count=3, column_count=4) + model_repr = "FormTable(cells=[{}, {}], row_count=3, column_count=4)".format(form_table_cell[1], form_table_cell[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def field_text(bounding_box, form_word, form_line): + model = _models.FieldText(page_number=1, text="This is text.", bounding_box=bounding_box[0], text_content=[form_word[0], form_line[0]]) + model_repr = "FieldText(page_number=1, text=This is text., bounding_box={}, text_content=[{}, {}])".format(bounding_box[1], form_word[1], form_line[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_field_two(field_text): + model = _models.FormField(label_data=field_text[0], value_data=field_text[0], name="form_field_two", value="value", confidence=0, page_number=1) + model_repr = "FormField(label_data={}, value_data={}, name=form_field_two, value='value', confidence=0, page_number=1)".format(field_text[1], field_text[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_field_one(field_text, form_field_two): + model = _models.FormField(label_data=field_text[0], value_data=field_text[0], name="form_field_one", value=form_field_two[0], confidence=1.0, page_number=5) + model_repr = "FormField(label_data={}, value_data={}, name=form_field_one, value={}, confidence=1.0, page_number=5)".format(field_text[1], field_text[1], form_field_two[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def page_range(): + model = _models.PageRange(first_page=1, last_page=100) + model_repr = "PageRange(first_page=1, last_page=100)" + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_page(form_table, form_line): + model = _models.FormPage(page_number=1, text_angle=180, width=5, height=5.5, unit=_models.LengthUnit.pixel, tables=[form_table[0]], lines=[form_line[0]]) + model_repr = "FormPage(page_number=1, text_angle=180, width=5, height=5.5, unit=pixel, tables=[{}], lines=[{}])".format( + form_table[1], form_line[1] + )[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def us_receipt_type(): + model = _models.USReceiptType(type="Itemized", confidence=1.0) + model_repr = "USReceiptType(type=Itemized, confidence=1.0)" + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def us_receipt_item(form_field_two): + model = _models.USReceiptItem(name=form_field_two[0], quantity=form_field_two[0], price=form_field_two[0], total_price=form_field_two[0]) + model_repr = "USReceiptItem(name={}, quantity={}, price={}, total_price={})".format(form_field_two[1], form_field_two[1], form_field_two[1], form_field_two[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def custom_form_model_field(): + model = _models.CustomFormModelField(label="label", name="name", accuracy=0.99) + model_repr = "CustomFormModelField(label=label, name=name, accuracy=0.99)" + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def custom_form_sub_model(custom_form_model_field): + model = _models.CustomFormSubModel(accuracy=0.99, fields={"name": custom_form_model_field[0]}, form_type="Itemized") + model_repr = "CustomFormSubModel(accuracy=0.99, fields={{'name': {}}}, form_type=Itemized)".format(custom_form_model_field[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def form_recognizer_error(): + model = _models.FormRecognizerError(code=404, message="Resource Not Found") + model_repr = "FormRecognizerError(code=404, message=Resource Not Found)" + assert repr(model) == model_repr + return model, model_repr + +@pytest.fixture +def training_document_info(form_recognizer_error): + model = _models.TrainingDocumentInfo(document_name="document_name", status=_models.TrainingStatus.partially_succeeded, page_count=5, errors=[form_recognizer_error[0]]) + model_repr = "TrainingDocumentInfo(document_name=document_name, status=partiallySucceeded, page_count=5, errors=[{}])".format(form_recognizer_error[1])[:1024] + assert repr(model) == model_repr + return model, model_repr + + +class TestRepr(): + # Not inheriting form FormRecognizerTest because that doesn't allow me to define pytest fixtures in the same file + # Not worth moving pytest fixture definitions to conftest since all I would use is assertEqual and I can just use assert + def test_recognized_form(self, form_field_one, page_range, form_page, us_receipt_type, us_receipt_item): + model = _models.RecognizedForm(form_type="receipt", fields={"one": form_field_one[0]}, page_range=page_range[0], pages=[form_page[0]]) + model_repr = "RecognizedForm(form_type=receipt, fields={{'one': {}}}, page_range={}, pages=[{}])".format( + form_field_one[1], page_range[1], form_page[1] + )[:1024] + assert repr(model) == model_repr + + def test_us_receipt(self, form_field_one, form_field_two, us_receipt_type, us_receipt_item, page_range, form_page): + model = _models.USReceipt( + merchant_address=form_field_one[0], + merchant_name=form_field_two[0], + merchant_phone_number=form_field_one[0], + receipt_type=us_receipt_type[0], + receipt_items=[us_receipt_item[0], us_receipt_item[0]], + subtotal=form_field_two[0], + tax=form_field_one[0], + tip=form_field_two[0], + total=form_field_one[0], + transaction_date=form_field_two[0], + transaction_time=form_field_one[0], + fields={ + "one": form_field_one[0] + }, + page_range=page_range[0], + pages=[form_page[0]], + form_type="test", + receipt_locale="en-US" + ) + model_repr="USReceipt(merchant_address={}, merchant_name={}, merchant_phone_number={}, receipt_type={}, receipt_items=[{}, {}], subtotal={}, " \ + "tax={}, tip={}, total={}, transaction_date={}, transaction_time={}, fields={{'one': {}}}, page_range={}, pages=[{}], " \ + "form_type=test, receipt_locale=en-US)".format( + form_field_one[1], + form_field_two[1], + form_field_one[1], + us_receipt_type[1], + us_receipt_item[1], + us_receipt_item[1], + form_field_two[1], + form_field_one[1], + form_field_two[1], + form_field_one[1], + form_field_two[1], + form_field_one[1], + form_field_one[1], + page_range[1], + form_page[1] + )[:1024] + + + assert repr(model) == model_repr + + def test_custom_form_model(self, custom_form_sub_model, form_recognizer_error, training_document_info): + model = _models.CustomFormModel( + model_id=1, + status=_models.CustomFormModelStatus.creating, + created_on=datetime.datetime(1, 1, 1), + last_modified=datetime.datetime(1, 1, 1), + models=[custom_form_sub_model[0], custom_form_sub_model[0]], + errors=[form_recognizer_error[0]], + training_documents=[training_document_info[0], training_document_info[0]] + ) + + model_repr = "CustomFormModel(model_id=1, status=creating, created_on=0001-01-01 00:00:00, " \ + "last_modified=0001-01-01 00:00:00, models=[{}, {}], errors=[{}], training_documents=[{}, {}])".format( + custom_form_sub_model[1], custom_form_sub_model[1], form_recognizer_error[1], training_document_info[1], training_document_info[1] + )[:1024] + + assert repr(model) == model_repr + + def test_custom_form_model_info(self): + model = _models.CustomFormModelInfo( + model_id=1, status=_models.CustomFormModelStatus.ready, created_on=datetime.datetime(1, 1, 1), last_modified=datetime.datetime(1, 1, 1) + ) + model_repr = "CustomFormModelInfo(model_id=1, status=ready, created_on=0001-01-01 00:00:00, last_modified=0001-01-01 00:00:00)"[:1024] + assert repr(model) == model_repr + + def test_account_properties(self): + model = _models.AccountProperties(custom_model_count=100, custom_model_limit=1000) + model_repr = "AccountProperties(custom_model_count=100, custom_model_limit=1000)" + assert repr(model) == model_repr