Skip to content

Commit 51d11da

Browse files
authored
Merge pull request #2299 from daspecster/vision-text-detection
Vision text detection
2 parents 143b0a1 + b227338 commit 51d11da

File tree

5 files changed

+142
-9
lines changed

5 files changed

+142
-9
lines changed

docs/vision-usage.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,13 @@ Detecting text with ORC from an image.
209209
>>> from google.cloud import vision
210210
>>> client = vision.Client()
211211
>>> image = client.image('./image.jpg')
212-
>>> text = image.detect_text()
213-
>>> text.locale
212+
>>> texts = image.detect_text()
213+
>>> texts[0].locale
214214
'en'
215-
>>> text.description
216-
'the full text of the image.'
215+
>>> texts[0].description
216+
'some text in the image'
217+
>>> texts[1].description
218+
'some other text in the image'
217219
218220
Image Properties
219221
~~~~~~~~~~~~~~~~

google/cloud/vision/entity.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ class EntityAnnotation(object):
2828
:type description: str
2929
:param description: Description of entity detected in an image.
3030
31+
:type locale: str
32+
:param locale: The language code for the locale in which the entity textual
33+
description (next field) is expressed.
34+
3135
:type locations: list of
3236
:class:`~google.cloud.vision.geometry.LocationInformation`.
3337
:param locations: List of ``LocationInformation`` instances.
@@ -38,9 +42,10 @@ class EntityAnnotation(object):
3842
:type score: float
3943
:param score: Overall score of the result. Range [0, 1].
4044
"""
41-
def __init__(self, bounds, description, locations, mid, score):
45+
def __init__(self, bounds, description, locale, locations, mid, score):
4246
self._bounds = bounds
4347
self._description = description
48+
self._locale = locale
4449
self._locations = locations
4550
self._mid = mid
4651
self._score = score
@@ -52,17 +57,19 @@ def from_api_repr(cls, response):
5257
:type response: dict
5358
:param response: Dictionary response from Vision API with entity data.
5459
55-
:rtype: :class:`~google.cloud.vision.entiy.EntityAnnotation`
60+
:rtype: :class:`~google.cloud.vision.entity.EntityAnnotation`
5661
:returns: Instance of ``EntityAnnotation``.
5762
"""
5863
bounds = Bounds.from_api_repr(response.get('boundingPoly'))
5964
description = response['description']
65+
66+
locale = response.get('locale', None)
6067
locations = [LocationInformation.from_api_repr(location)
6168
for location in response.get('locations', [])]
62-
mid = response['mid']
63-
score = response['score']
69+
mid = response.get('mid', None)
70+
score = response.get('score', None)
6471

65-
return cls(bounds, description, locations, mid, score)
72+
return cls(bounds, description, locale, locations, mid, score)
6673

6774
@property
6875
def bounds(self):
@@ -82,6 +89,15 @@ def description(self):
8289
"""
8390
return self._description
8491

92+
@property
93+
def locale(self):
94+
"""The language code for text discovered in an image.
95+
96+
:rtype: str
97+
:returns: String language code of text found in the image.
98+
"""
99+
return self._locale
100+
85101
@property
86102
def locations(self):
87103
"""Location coordinates landmarks detected.

google/cloud/vision/image.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ def _detect_annotation(self, feature):
9797
'LABEL_DETECTION': 'labelAnnotations',
9898
'LANDMARK_DETECTION': 'landmarkAnnotations',
9999
'LOGO_DETECTION': 'logoAnnotations',
100+
'TEXT_DETECTION': 'textAnnotations',
100101
}
101102
detected_objects = []
102103
result = self.client.annotate(self, [feature])
@@ -160,3 +161,16 @@ def detect_logos(self, limit=10):
160161
"""
161162
feature = Feature(FeatureTypes.LOGO_DETECTION, limit)
162163
return self._detect_annotation(feature)
164+
165+
def detect_text(self, limit=10):
166+
"""Detect text in an image.
167+
168+
:type limit: int
169+
:param limit: The maximum instances of text to find.
170+
171+
:rtype: list
172+
:returns: List of
173+
:class:`~google.cloud.vision.entity.EntityAnnotation`.
174+
"""
175+
feature = Feature(FeatureTypes.TEXT_DETECTION, limit)
176+
return self._detect_annotation(feature)

unit_tests/vision/_fixtures.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1568,3 +1568,83 @@
15681568
}]
15691569
}]
15701570
}
1571+
1572+
1573+
TEXT_DETECTION_RESPONSE = {
1574+
'responses': [
1575+
{
1576+
'textAnnotations': [
1577+
{
1578+
'locale': 'en',
1579+
'description': 'Google CloudPlatform\n',
1580+
'boundingPoly': {
1581+
'vertices': [
1582+
{
1583+
'x': 129,
1584+
'y': 694
1585+
},
1586+
{
1587+
'x': 1375,
1588+
'y': 694
1589+
},
1590+
{
1591+
'x': 1375,
1592+
'y': 835
1593+
},
1594+
{
1595+
'x': 129,
1596+
'y': 835
1597+
}
1598+
]
1599+
}
1600+
},
1601+
{
1602+
'description': 'Google',
1603+
'boundingPoly': {
1604+
'vertices': [
1605+
{
1606+
'x': 129,
1607+
'y': 694
1608+
},
1609+
{
1610+
'x': 535,
1611+
'y': 694
1612+
},
1613+
{
1614+
'x': 535,
1615+
'y': 835
1616+
},
1617+
{
1618+
'x': 129,
1619+
'y': 835
1620+
}
1621+
]
1622+
}
1623+
},
1624+
{
1625+
'description': 'CloudPlatform',
1626+
'boundingPoly': {
1627+
'vertices': [
1628+
{
1629+
'x': 567,
1630+
'y': 694
1631+
},
1632+
{
1633+
'x': 1375,
1634+
'y': 694
1635+
},
1636+
{
1637+
'x': 1375,
1638+
'y': 835
1639+
},
1640+
{
1641+
'x': 567,
1642+
'y': 835
1643+
}
1644+
]
1645+
}
1646+
}
1647+
]
1648+
}
1649+
]
1650+
}

unit_tests/vision/test_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,27 @@ def test_logo_detection_from_content(self):
207207
image_request['image']['content'])
208208
self.assertEqual(5, image_request['features'][0]['maxResults'])
209209

210+
def test_text_detection_from_source(self):
211+
from google.cloud.vision.entity import EntityAnnotation
212+
from unit_tests.vision._fixtures import (TEXT_DETECTION_RESPONSE as
213+
RETURNED)
214+
credentials = _Credentials()
215+
client = self._makeOne(project=self.PROJECT, credentials=credentials)
216+
client.connection = _Connection(RETURNED)
217+
218+
image = client.image(source_uri=_IMAGE_SOURCE)
219+
text = image.detect_text(limit=3)
220+
self.assertEqual(3, len(text))
221+
self.assertTrue(isinstance(text[0], EntityAnnotation))
222+
image_request = client.connection._requested[0]['data']['requests'][0]
223+
self.assertEqual(_IMAGE_SOURCE,
224+
image_request['image']['source']['gcs_image_uri'])
225+
self.assertEqual(3, image_request['features'][0]['maxResults'])
226+
self.assertEqual('en', text[0].locale)
227+
self.assertEqual('Google CloudPlatform\n', text[0].description)
228+
self.assertEqual('Google', text[1].description)
229+
self.assertEqual(694, text[0].bounds.vertices[0].y_coordinate)
230+
210231

211232
class TestVisionRequest(unittest.TestCase):
212233
def _getTargetClass(self):

0 commit comments

Comments
 (0)