Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ async def get_bounding_boxes(self):
print("Form has type: {}".format(form.form_type))
for name, field in form.fields.items():
# each field is of type FormField
# The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.
print("...Field '{}' has label '{}' with value '{}' within bounding box '{}', with a confidence score of {}".format(
name,
field.label_data.text if field.label_data else name,
Expand All @@ -82,9 +81,8 @@ async def get_bounding_boxes(self):
"...Cell[{}][{}] has text '{}' with confidence {} based on the following words: ".format(
cell.row_index, cell.column_index, cell.text, cell.confidence
))
# field_elements is only populated if you set include_field_elements to True in your call
# to begin_recognize_custom_forms
# It is a heterogeneous list of FormWord and FormLine.
# field_elements is only populated if you set include_field_elements=True
# It is a heterogeneous list of FormWord, FormLine, and FormSelectionMark
for element in cell.field_elements:
if element.kind == "word":
print("......Word '{}' within bounding box '{}' has a confidence of {}".format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
FILE: sample_recognize_content_async.py

DESCRIPTION:
This sample demonstrates how to extract text and content information from a document
This sample demonstrates how to extract text, selection marks, and content information from a document
given through a file.

USAGE:
Expand All @@ -35,7 +35,7 @@ class RecognizeContentSampleAsync(object):

async def recognize_content(self):
path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
"..", "..", "./sample_forms/forms/Invoice_1.pdf"))
"..", "..", "./sample_forms/forms/selection_mark_form.pdf"))
# [START recognize_content_async]
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer.aio import FormRecognizerClient
Expand Down Expand Up @@ -68,7 +68,7 @@ async def recognize_content(self):
cell.text,
format_bounding_box(cell.bounding_box)
))
# [END recognize_content_async]

for line_idx, line in enumerate(content.lines):
print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
line_idx,
Expand All @@ -78,6 +78,7 @@ async def recognize_content(self):
))
for word in line.words:
print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))

for selection_mark in content.selection_marks:
print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
Expand All @@ -86,6 +87,8 @@ async def recognize_content(self):
))
print("----------------------------------------")

# [END recognize_content_async]


async def main():
sample = RecognizeContentSampleAsync()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async def recognize_custom_forms(self):
field.label_data.text,
field.confidence
))
# The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.

print("...Label '{}' has value '{}' with a confidence score of {}".format(
field.label_data.text if field.label_data else name, field.value, field.confidence
))
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def get_bounding_boxes(self):
print("Form has type: {}".format(form.form_type))
for name, field in form.fields.items():
# each field is of type FormField
# The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.
print("...Field '{}' has label '{}' with value '{}' within bounding box '{}', with a confidence score of {}".format(
name,
field.label_data.text if field.label_data else name,
Expand All @@ -79,9 +78,8 @@ def get_bounding_boxes(self):
print("...Cell[{}][{}] has text '{}' with confidence {} based on the following words: ".format(
cell.row_index, cell.column_index, cell.text, cell.confidence
))
# field_elements is only populated if you set include_field_elements to True in your call
# to begin_recognize_custom_forms
# It is a heterogeneous list of FormWord and FormLine.
# field_elements is only populated if you set include_field_elements=True
# It is a heterogeneous list of FormWord, FormLine, and FormSelectionMark
for element in cell.field_elements:
if element.kind == "word":
print("......Word '{}' within bounding box '{}' has a confidence of {}".format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
FILE: sample_recognize_content.py

DESCRIPTION:
This sample demonstrates how to extract text and content information from a document
This sample demonstrates how to extract text, selection marks, and content information from a document
given through a file.
USAGE:
python sample_recognize_content.py
Expand All @@ -33,7 +33,7 @@ class RecognizeContentSample(object):

def recognize_content(self):
path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__),
"..", "./sample_forms/forms/Invoice_1.pdf"))
"..", "./sample_forms/forms/selection_mark_form.pdf"))
# [START recognize_content]
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
Expand Down Expand Up @@ -62,7 +62,7 @@ def recognize_content(self):
cell.text,
format_bounding_box(cell.bounding_box)
))
# [END recognize_content]

for line_idx, line in enumerate(content.lines):
print("Line # {} has word count '{}' and text '{}' within bounding box '{}'".format(
line_idx,
Expand All @@ -72,6 +72,7 @@ def recognize_content(self):
))
for word in line.words:
print("...Word '{}' has a confidence of {}".format(word.text, word.confidence))

for selection_mark in content.selection_marks:
print("Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(
selection_mark.state,
Expand All @@ -80,6 +81,8 @@ def recognize_content(self):
))
print("----------------------------------------")

# [END recognize_content]


if __name__ == '__main__':
sample = RecognizeContentSample()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def recognize_custom_forms(self):
field.label_data.text,
field.confidence
))
# The value of the field can also be a Dict[str, FormField], or a List[FormField] - in our sample, it is not.

print("...Label '{}' has value '{}' with a confidence score of {}".format(
field.label_data.text if field.label_data else name, field.value, field.confidence
))
Expand Down