Merge pull request #2299 from daspecster/vision-text-detection

daspecster · web-flow · commit 51d11da4e195 · 2016-09-14T21:55:47.000-04:00
Vision text detection
diff --git a/docs/vision-usage.rst b/docs/vision-usage.rst
@@ -209,11 +209,13 @@ Detecting text with ORC from an image.
     >>> from google.cloud import vision
     >>> client = vision.Client()
     >>> image = client.image('./image.jpg')
-    >>> text = image.detect_text()
-    >>> text.locale
+    >>> texts = image.detect_text()
+    >>> texts[0].locale
     'en'
-    >>> text.description
-    'the full text of the image.'
+    >>> texts[0].description
+    'some text in the image'
+    >>> texts[1].description
+    'some other text in the image'
 
 Image Properties
 ~~~~~~~~~~~~~~~~
diff --git a/google/cloud/vision/entity.py b/google/cloud/vision/entity.py
@@ -28,6 +28,10 @@ class EntityAnnotation(object):
     :type description: str
     :param description: Description of entity detected in an image.
 
+    :type locale: str
+    :param locale: The language code for the locale in which the entity textual
+                   description (next field) is expressed.
+
     :type locations: list of
                      :class:`~google.cloud.vision.geometry.LocationInformation`.
     :param locations: List of ``LocationInformation`` instances.
@@ -38,9 +42,10 @@ class EntityAnnotation(object):
     :type score: float
     :param score: Overall score of the result. Range [0, 1].
     """
-    def __init__(self, bounds, description, locations, mid, score):
+    def __init__(self, bounds, description, locale, locations, mid, score):
         self._bounds = bounds
         self._description = description
+        self._locale = locale
         self._locations = locations
         self._mid = mid
         self._score = score
@@ -52,17 +57,19 @@ def from_api_repr(cls, response):
         :type response: dict
         :param response: Dictionary response from Vision API with entity data.
 
-        :rtype: :class:`~google.cloud.vision.entiy.EntityAnnotation`
+        :rtype: :class:`~google.cloud.vision.entity.EntityAnnotation`
         :returns: Instance of ``EntityAnnotation``.
         """
         bounds = Bounds.from_api_repr(response.get('boundingPoly'))
         description = response['description']
+
+        locale = response.get('locale', None)
         locations = [LocationInformation.from_api_repr(location)
                      for location in response.get('locations', [])]
-        mid = response['mid']
-        score = response['score']
+        mid = response.get('mid', None)
+        score = response.get('score', None)
 
-        return cls(bounds, description, locations, mid, score)
+        return cls(bounds, description, locale, locations, mid, score)
 
     @property
     def bounds(self):
@@ -82,6 +89,15 @@ def description(self):
         """
         return self._description
 
+    @property
+    def locale(self):
+        """The language code for text discovered in an image.
+
+        :rtype: str
+        :returns: String language code of text found in the image.
+        """
+        return self._locale
+
     @property
     def locations(self):
         """Location coordinates landmarks detected.
diff --git a/google/cloud/vision/image.py b/google/cloud/vision/image.py
@@ -97,6 +97,7 @@ def _detect_annotation(self, feature):
             'LABEL_DETECTION': 'labelAnnotations',
             'LANDMARK_DETECTION': 'landmarkAnnotations',
             'LOGO_DETECTION': 'logoAnnotations',
+            'TEXT_DETECTION': 'textAnnotations',
         }
         detected_objects = []
         result = self.client.annotate(self, [feature])
@@ -160,3 +161,16 @@ def detect_logos(self, limit=10):
         """
         feature = Feature(FeatureTypes.LOGO_DETECTION, limit)
         return self._detect_annotation(feature)
+
+    def detect_text(self, limit=10):
+        """Detect text in an image.
+
+        :type limit: int
+        :param limit: The maximum instances of text to find.
+
+        :rtype: list
+        :returns: List of
+                  :class:`~google.cloud.vision.entity.EntityAnnotation`.
+        """
+        feature = Feature(FeatureTypes.TEXT_DETECTION, limit)
+        return self._detect_annotation(feature)
diff --git a/unit_tests/vision/_fixtures.py b/unit_tests/vision/_fixtures.py
@@ -1568,3 +1568,83 @@
         }]
     }]
 }
+
+
+TEXT_DETECTION_RESPONSE = {
+    'responses': [
+        {
+            'textAnnotations': [
+                {
+                    'locale': 'en',
+                    'description': 'Google CloudPlatform\n',
+                    'boundingPoly': {
+                        'vertices': [
+                            {
+                                'x': 129,
+                                'y': 694
+                            },
+                            {
+                                'x': 1375,
+                                'y': 694
+                            },
+                            {
+                                'x': 1375,
+                                'y': 835
+                            },
+                            {
+                                'x': 129,
+                                'y': 835
+                            }
+                        ]
+                    }
+                },
+                {
+                    'description': 'Google',
+                    'boundingPoly': {
+                        'vertices': [
+                            {
+                                'x': 129,
+                                'y': 694
+                            },
+                            {
+                                'x': 535,
+                                'y': 694
+                            },
+                            {
+                                'x': 535,
+                                'y': 835
+                            },
+                            {
+                                'x': 129,
+                                'y': 835
+                            }
+                        ]
+                    }
+                },
+                {
+                    'description': 'CloudPlatform',
+                    'boundingPoly': {
+                        'vertices': [
+                            {
+                                'x': 567,
+                                'y': 694
+                            },
+                            {
+                                'x': 1375,
+                                'y': 694
+                            },
+                            {
+                                'x': 1375,
+                                'y': 835
+                            },
+                            {
+                                'x': 567,
+                                'y': 835
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    ]
+}
diff --git a/unit_tests/vision/test_client.py b/unit_tests/vision/test_client.py
@@ -207,6 +207,27 @@ def test_logo_detection_from_content(self):
                          image_request['image']['content'])
         self.assertEqual(5, image_request['features'][0]['maxResults'])
 
+    def test_text_detection_from_source(self):
+        from google.cloud.vision.entity import EntityAnnotation
+        from unit_tests.vision._fixtures import (TEXT_DETECTION_RESPONSE as
+                                                 RETURNED)
+        credentials = _Credentials()
+        client = self._makeOne(project=self.PROJECT, credentials=credentials)
+        client.connection = _Connection(RETURNED)
+
+        image = client.image(source_uri=_IMAGE_SOURCE)
+        text = image.detect_text(limit=3)
+        self.assertEqual(3, len(text))
+        self.assertTrue(isinstance(text[0], EntityAnnotation))
+        image_request = client.connection._requested[0]['data']['requests'][0]
+        self.assertEqual(_IMAGE_SOURCE,
+                         image_request['image']['source']['gcs_image_uri'])
+        self.assertEqual(3, image_request['features'][0]['maxResults'])
+        self.assertEqual('en', text[0].locale)
+        self.assertEqual('Google CloudPlatform\n', text[0].description)
+        self.assertEqual('Google', text[1].description)
+        self.assertEqual(694, text[0].bounds.vertices[0].y_coordinate)
+
 
 class TestVisionRequest(unittest.TestCase):
     def _getTargetClass(self):