Azure · catalinaperalta · Nov 3, 2021 · Nov 2, 2021 · Nov 2, 2021 · Nov 2, 2021
@@ -348,8 +348,32 @@ for style in result.styles:
         print("Document contains handwritten content: ")
         print(",".join([result.content[span.offset:span.offset + span.length] for span in style.spans]))
 
-print("----Selection marks found in document----")
 for page in result.pages:
+    print("----Analyzing document from page #{}----".format(page.page_number))
+    print(
+        "Page has width: {} and height: {}, measured with unit: {}".format(
+            page.width, page.height, page.unit
+        )
+    )
+
+    for line_idx, line in enumerate(page.lines):
+        words = line.get_words()
+        print(
+            "...Line # {} has {} words and text '{}' within bounding box '{}'".format(
+                line_idx,
+                len(words),
+                line.content,
+                format_bounding_box(line.bounding_box),
+            )
+        )
+
+        for word in words:
+            print(
+                "......Word '{}' has a confidence of {}".format(
+                    word.content, word.confidence
+                )
+            )
+
     for selection_mark in page.selection_marks:
         print(
             "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format(

@@ -101,20 +101,22 @@ async def analyze_general_documents():
         )
 
         for line_idx, line in enumerate(page.lines):
+            words = line.get_words()
             print(
-                "Line # {} has text content '{}' within bounding box '{}'".format(
+                "...Line # {} has {} words and text '{}' within bounding box '{}'".format(
                     line_idx,
+                    len(words),
                     line.content,
                     format_bounding_box(line.bounding_box),
                 )
             )
 
-        for word in page.words:
-            print(
-                "...Word '{}' has a confidence of {}".format(
-                    word.content, word.confidence
+            for word in words:
+                print(
+                    "......Word '{}' has a confidence of {}".format(
+                        word.content, word.confidence
+                    )
                 )
-            )
 
         for selection_mark in page.selection_marks:
             print(
@@ -131,6 +133,11 @@ async def analyze_general_documents():
                 table_idx, table.row_count, table.column_count
             )
         )
+        print(
+            "Table # {} has {} lines and {} words".format(
+                table_idx, len(table.get_lines()), len(table.get_words())
+            )
+        )
         for region in table.bounding_regions:
             print(
                 "Table # {} location on page: {} is {}".format(

@@ -78,20 +78,22 @@ async def analyze_layout_async():
         )
 
         for line_idx, line in enumerate(page.lines):
+            words = line.get_words()
             print(
-                "Line # {} has text content '{}' within bounding box '{}'".format(
+                "...Line # {} has word count {} and text '{}' within bounding box '{}'".format(
                     line_idx,
+                    len(words),
                     line.content,
                     format_bounding_box(line.bounding_box),
                 )
             )
 
-        for word in page.words:
-            print(
-                "...Word '{}' has a confidence of {}".format(
-                    word.content, word.confidence
+            for word in words:
+                print(
+                    "......Word '{}' has a confidence of {}".format(
+                        word.content, word.confidence
+                    )
                 )
-            )
 
         for selection_mark in page.selection_marks:
             print(

@@ -0,0 +1,136 @@
+# coding: utf-8
+
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+"""
+FILE: sample_get_document_elements_async.py
+
+DESCRIPTION:
+    This sample demonstrates how to get related document elements from the result of calling
+    `begin_analyze_document()`.
+
+USAGE:
+    python sample_get_document_elements_async.py
+
+    Set the environment variables with your own values before running the sample:
+    1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
+    2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
+"""
+
+import os
+import asyncio
+
+def format_bounding_region(bounding_regions):
+    if not bounding_regions:
+        return "N/A"
+    return ", ".join("Page #{}: {}".format(region.page_number, format_bounding_box(region.bounding_box)) for region in bounding_regions)
+
+def format_bounding_box(bounding_box):
+    if not bounding_box:
+        return "N/A"
+    return ", ".join(["[{}, {}]".format(p.x, p.y) for p in bounding_box])
+
+
+async def get_document_elements_async():
+    path_to_sample_documents = os.path.abspath(
+        os.path.join(
+            os.path.abspath(__file__),
+            "..",
+            "..",
+            "..",
+            "./sample_forms/forms/Form_1.jpg",
+        )
+    )
+
+    from azure.core.credentials import AzureKeyCredential
+    from azure.ai.formrecognizer.aio import DocumentAnalysisClient
+
+    endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
+    key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
+
+    document_analysis_client = DocumentAnalysisClient(
+        endpoint=endpoint, credential=AzureKeyCredential(key)
+    )
+    async with document_analysis_client:
+        with open(path_to_sample_documents, "rb") as f:
+            poller = await document_analysis_client.begin_analyze_document(
+                "prebuilt-document", document=f
+            )
+        result = await poller.result()
+
+    print("----Getting words in key-value pairs found in document----")
+    for kv_pair in result.key_value_pairs:
+        if kv_pair.key:
+            print(
+                    "Key '{}' found within '{}' bounding regions".format(
+                        kv_pair.key.content,
+                        format_bounding_region(kv_pair.key.bounding_regions),
+                    )
+                )
+            words = kv_pair.key.get_words()
+            print(
+                    "Key  has {} word(s):".format(
+                        len(words),
+                    )
+                )
+            for word in words:
+                print(
+                        "...found '{}' word with confidence {}".format(
+                            word.content,
+                            word.confidence,
+                        )
+                    )
+
+    print("----Getting words in entities found in document----")
+    for entity in result.entities:
+        print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category))
+        # NOTE: Calling get_words() here will return a list of the DocumentWords that make up the entity.
+        # These words can be processed just like any other DocumentWord instance.
+        words = entity.get_words()
+        for word in words:
+            print(
+                    "...contains '{}' with confidence {}".format(
+                        word.content,
+                        word.confidence,
+                    )
+                )
+
+    print("----Getting lines in tables found in document----")
+    for table_idx, table in enumerate(result.tables):
+        print(
+            "Table # {} has {} rows and {} columns".format(
+                table_idx, table.row_count, table.column_count
+            )
+        )
+        print(
+            "Table # {} has {} lines and {} words".format(
+                table_idx, len(table.get_lines()), len(table.get_words())
+            )
+        )
+        for line in table.get_lines():
+            print(
+                    "...found '{}' line".format(
+                        line.content,
+                    )
+                )
+            for word in line.get_words():
+                print(
+                    "......contains '{}' with confidence {}".format(
+                        word.content,
+                        word.confidence,
+                    )
+                )
+    print("----------------------------------------")
+
+
+async def main():
+    await get_document_elements_async()
+
+
+if __name__ == "__main__":
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(main())
@@ -97,20 +97,22 @@ def analyze_general_documents():
         )
 
         for line_idx, line in enumerate(page.lines):
+            words = line.get_words()
             print(
-                "...Line # {} has text content '{}' within bounding box '{}'".format(
+                "...Line # {} has {} words and text '{}' within bounding box '{}'".format(
                     line_idx,
+                    len(words),
                     line.content,
                     format_bounding_box(line.bounding_box),
                 )
             )
 
-        for word in page.words:
-            print(
-                "...Word '{}' has a confidence of {}".format(
-                    word.content, word.confidence
+            for word in words:
+                print(
+                    "......Word '{}' has a confidence of {}".format(
+                        word.content, word.confidence
+                    )
                 )
-            )
 
         for selection_mark in page.selection_marks:
             print(
@@ -127,6 +129,11 @@ def analyze_general_documents():
                 table_idx, table.row_count, table.column_count
             )
         )
+        print(
+            "Table # {} has {} lines and {} words".format(
+                table_idx, len(table.get_lines()), len(table.get_words())
+            )
+        )
         for region in table.bounding_regions:
             print(
                 "Table # {} location on page: {} is {}".format(

@@ -75,20 +75,22 @@ def analyze_layout():
         )
 
         for line_idx, line in enumerate(page.lines):
+            words = line.get_words()
             print(
-                "...Line # {} has text content '{}' within bounding box '{}'".format(
+                "...Line # {} has word count {} and text '{}' within bounding box '{}'".format(
                     line_idx,
+                    len(words),
                     line.content,
                     format_bounding_box(line.bounding_box),
                 )
             )
 
-        for word in page.words:
-            print(
-                "...Word '{}' has a confidence of {}".format(
-                    word.content, word.confidence
+            for word in words:
+                print(
+                    "......Word '{}' has a confidence of {}".format(
+                        word.content, word.confidence
+                    )
                 )
-            )
 
         for selection_mark in page.selection_marks:
             print(