Adds document text detection tutorial. (#868)

* Adds document text detection tutorial. * Feedback from review * Less whitespace and fewer hanging indents
GoogleCloudPlatform · Mar 22, 2017 · ba737c3 · ba737c3
1 parent d5faacf
commit ba737c3
Show file tree

Hide file tree

Showing 7 changed files with 280 additions and 0 deletions.
diff --git a/vision/cloud-client/document_text/.gitignore b/vision/cloud-client/document_text/.gitignore
@@ -0,0 +1 @@
+output-text.jpg
diff --git a/vision/cloud-client/document_text/README.rst b/vision/cloud-client/document_text/README.rst
@@ -0,0 +1,110 @@
+.. This file is automatically generated. Do not edit this file directly.
+
+Google Cloud Vision API Python Samples
+===============================================================================
+
+This directory contains samples for Google Cloud Vision API. `Google Cloud Vision API`_ allows developers to easily integrate vision detection features within applications, including image labeling, face and landmark detection, optical character recognition (OCR), and tagging of explicit content
+
+
+
+
+.. _Google Cloud Vision API: https://cloud.google.com/vision/docs 
+
+Setup
+-------------------------------------------------------------------------------
+
+
+Authentication
+++++++++++++++
+
+Authentication is typically done through `Application Default Credentials`_,
+which means you do not have to change the code to authenticate as long as
+your environment has credentials. You have a few options for setting up
+authentication:
+
+#. When running locally, use the `Google Cloud SDK`_
+
+    .. code-block:: bash
+
+        gcloud beta auth application-default login
+
+
+#. When running on App Engine or Compute Engine, credentials are already
+   set-up. However, you may need to configure your Compute Engine instance
+   with `additional scopes`_.
+
+#. You can create a `Service Account key file`_. This file can be used to
+   authenticate to Google Cloud Platform services from any environment. To use
+   the file, set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable to
+   the path to the key file, for example:
+
+    .. code-block:: bash
+
+        export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account.json
+
+.. _Application Default Credentials: https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow
+.. _additional scopes: https://cloud.google.com/compute/docs/authentication#using
+.. _Service Account key file: https://developers.google.com/identity/protocols/OAuth2ServiceAccount#creatinganaccount
+
+Install Dependencies
+++++++++++++++++++++
+
+#. Install `pip`_ and `virtualenv`_ if you do not already have them.
+
+#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+.
+
+    .. code-block:: bash
+
+        $ virtualenv env
+        $ source env/bin/activate
+
+#. Install the dependencies needed to run the samples.
+
+    .. code-block:: bash
+
+        $ pip install -r requirements.txt
+
+.. _pip: https://pip.pypa.io/
+.. _virtualenv: https://virtualenv.pypa.io/
+
+Samples
+-------------------------------------------------------------------------------
+
+Document Text tutorial
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+To run this sample:
+
+.. code-block:: bash
+
+    $ python doctext.py
+
+    usage: doctext.py [-h] image_file
+    
+    positional arguments:
+      image_file  The image for text detection.
+    
+    optional arguments:
+      -h, --help  show this help message and exit
+
+
+
+
+The client library
+-------------------------------------------------------------------------------
+
+This sample uses the `Google Cloud Client Library for Python`_.
+You can read the documentation for more details on API usage and use GitHub
+to `browse the source`_ and  `report issues`_.
+
+.. Google Cloud Client Library for Python:
+    https://googlecloudplatform.github.io/google-cloud-python/
+.. browse the source:
+    https://github.com/GoogleCloudPlatform/google-cloud-python
+.. report issues:
+    https://github.com/GoogleCloudPlatform/google-cloud-python/issues
+
+
+.. _Google Cloud SDK: https://cloud.google.com/sdk/
diff --git a/vision/cloud-client/document_text/README.rst.in b/vision/cloud-client/document_text/README.rst.in
@@ -0,0 +1,22 @@
+# This file is used to generate README.rst
+
+product:
+  name: Google Cloud Vision API
+  short_name: Cloud Vision API
+  url: https://cloud.google.com/vision/docs
+  description: >
+    `Google Cloud Vision API`_ allows developers to easily integrate vision
+    detection features within applications, including image labeling, face and
+    landmark detection, optical character recognition (OCR), and tagging of
+    explicit content.
+
+setup:
+- auth
+- install_deps
+
+samples:
+- name: Document Text tutorial
+  file: doctext.py
+  show_help: True
+
+cloud_client_library: true
diff --git a/vision/cloud-client/document_text/doctext.py b/vision/cloud-client/document_text/doctext.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Outlines document text given an image.
+
+Example:
+    python doctext.py resources/text_menu.jpg
+"""
+# [START full_tutorial]
+# [START imports]
+import argparse
+from enum import Enum
+import io
+
+from google.cloud import vision
+from PIL import Image, ImageDraw
+# [END imports]
+
+
+class FeatureType(Enum):
+    PAGE = 1
+    BLOCK = 2
+    PARA = 3
+    WORD = 4
+    SYMBOL = 5
+
+
+def draw_boxes(image, blocks, color):
+    """Draw a border around the image using the hints in the vector list."""
+    # [START draw_blocks]
+    draw = ImageDraw.Draw(image)
+
+    for block in blocks:
+        draw.polygon([
+            block.vertices[0].x, block.vertices[0].y,
+            block.vertices[1].x, block.vertices[1].y,
+            block.vertices[2].x, block.vertices[2].y,
+            block.vertices[3].x, block.vertices[3].y], None, color)
+    return image
+    # [END draw_blocks]
+
+
+def get_document_bounds(image_file, feature):
+    # [START detect_bounds]
+    """Returns document bounds given an image."""
+    vision_client = vision.Client()
+
+    bounds = []
+
+    with io.open(image_file, 'rb') as image_file:
+        content = image_file.read()
+
+    image = vision_client.image(content=content)
+    document = image.detect_full_text()
+
+    # Collect specified feature bounds by enumerating all document features
+    for page in document.pages:
+        for block in page.blocks:
+            for paragraph in block.paragraphs:
+                for word in paragraph.words:
+                    for symbol in word.symbols:
+                        if (feature == FeatureType.SYMBOL):
+                            bounds.append(symbol.bounding_box)
+
+                    if (feature == FeatureType.WORD):
+                        bounds.append(word.bounding_box)
+
+                if (feature == FeatureType.PARA):
+                    bounds.append(paragraph.bounding_box)
+
+            if (feature == FeatureType.BLOCK):
+                bounds.append(block.bounding_box)
+
+        if (feature == FeatureType.PAGE):
+            bounds.append(block.bounding_box)
+
+    return bounds
+    # [END detect_bounds]
+
+
+def render_doc_text(filein, fileout):
+    # [START render_doc_text]
+    image = Image.open(filein)
+    bounds = get_document_bounds(filein, FeatureType.PAGE)
+    draw_boxes(image, bounds, 'blue')
+    bounds = get_document_bounds(filein, FeatureType.PARA)
+    draw_boxes(image, bounds, 'red')
+    bounds = get_document_bounds(filein, FeatureType.WORD)
+    draw_boxes(image, bounds, 'yellow')
+
+    if fileout is not 0:
+        image.save(fileout)
+    else:
+        image.show()
+    # [END render_doc_text]
+
+
+if __name__ == '__main__':
+    # [START run_crop]
+    parser = argparse.ArgumentParser()
+    parser.add_argument('detect_file', help='The image for text detection.')
+    parser.add_argument('-out_file', help='Optional output file', default=0)
+    args = parser.parse_args()
+
+    parser = argparse.ArgumentParser()
+    render_doc_text(args.detect_file, args.out_file)
+    # [END run_crop]
+# [END full_tutorial]
diff --git a/vision/cloud-client/document_text/doctext_test.py b/vision/cloud-client/document_text/doctext_test.py
@@ -0,0 +1,24 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import doctext
+
+
+def test_text(cloud_config, capsys):
+    """Checks the output image for drawing the crop hint is created."""
+    doctext.render_doc_text('resources/text_menu.jpg', 'output-text.jpg')
+    out, _ = capsys.readouterr()
+    assert os.path.isfile('output-text.jpg')
diff --git a/vision/cloud-client/document_text/requirements.txt b/vision/cloud-client/document_text/requirements.txt
@@ -0,0 +1,2 @@
+google-cloud-vision==0.23.2
+pillow==4.0.0
diff --git a/vision/cloud-client/document_text/resources/text_menu.jpg b/vision/cloud-client/document_text/resources/text_menu.jpg