Skip to content

Commit 91a1927

Browse files
authored
Merge pull request #2615 from daspecster/add-speech-sync-gapic
Add Speech GAPIC for sync_recognize.
2 parents 5088202 + 22f1b2b commit 91a1927

File tree

5 files changed

+350
-27
lines changed

5 files changed

+350
-27
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2016 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""GAX/GAPIC module for managing Speech API requests."""
16+
17+
from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
18+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext
19+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig
20+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
21+
22+
from google.cloud.speech.transcript import Transcript
23+
24+
25+
class GAPICSpeechAPI(object):
26+
"""Manage calls through GAPIC wrappers to the Speech API."""
27+
def __init__(self):
28+
self._gapic_api = SpeechApi()
29+
30+
def sync_recognize(self, sample, language_code=None, max_alternatives=None,
31+
profanity_filter=None, speech_context=None):
32+
"""Synchronous Speech Recognition.
33+
34+
.. _sync_recognize: https://cloud.google.com/speech/reference/\
35+
rest/v1beta1/speech/syncrecognize
36+
37+
See `sync_recognize`_.
38+
39+
:type sample: :class:`~google.cloud.speech.sample.Sample`
40+
:param sample: Instance of ``Sample`` containing audio information.
41+
42+
:type language_code: str
43+
:param language_code: (Optional) The language of the supplied audio as
44+
BCP-47 language tag. Example: ``'en-GB'``.
45+
If omitted, defaults to ``'en-US'``.
46+
47+
:type max_alternatives: int
48+
:param max_alternatives: (Optional) Maximum number of recognition
49+
hypotheses to be returned. The server may
50+
return fewer than maxAlternatives.
51+
Valid values are 0-30. A value of 0 or 1
52+
will return a maximum of 1. Defaults to 1
53+
54+
:type profanity_filter: bool
55+
:param profanity_filter: If True, the server will attempt to filter
56+
out profanities, replacing all but the
57+
initial character in each filtered word with
58+
asterisks, e.g. ``'f***'``. If False or
59+
omitted, profanities won't be filtered out.
60+
61+
:type speech_context: list
62+
:param speech_context: A list of strings (max 50) containing words and
63+
phrases "hints" so that the speech recognition
64+
is more likely to recognize them. This can be
65+
used to improve the accuracy for specific words
66+
and phrases. This can also be used to add new
67+
words to the vocabulary of the recognizer.
68+
69+
:rtype: list
70+
:returns: A list of dictionaries. One dict for each alternative. Each
71+
dictionary typically contains two keys (though not
72+
all will be present in all cases)
73+
74+
* ``transcript``: The detected text from the audio recording.
75+
* ``confidence``: The confidence in language detection, float
76+
between 0 and 1.
77+
78+
:raises: ValueError if more than one result is returned or no results.
79+
"""
80+
config = RecognitionConfig(
81+
encoding=sample.encoding, sample_rate=sample.sample_rate,
82+
language_code=language_code, max_alternatives=max_alternatives,
83+
profanity_filter=profanity_filter,
84+
speech_context=SpeechContext(phrases=speech_context))
85+
86+
audio = RecognitionAudio(content=sample.content,
87+
uri=sample.source_uri)
88+
api = self._gapic_api
89+
api_response = api.sync_recognize(config=config, audio=audio)
90+
if len(api_response.results) == 1:
91+
results = api_response.results.pop()
92+
alternatives = results.alternatives
93+
return [Transcript.from_pb(alternative)
94+
for alternative in alternatives]
95+
else:
96+
raise ValueError('More than one result or none returned from API.')

packages/google-cloud-speech/google/cloud/speech/client.py

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,33 @@
1515
"""Basic client for Google Cloud Speech API."""
1616

1717
from base64 import b64encode
18+
import os
1819

20+
from google.cloud.client import Client as BaseClient
1921
from google.cloud._helpers import _to_bytes
2022
from google.cloud._helpers import _bytes_to_unicode
21-
from google.cloud import client as client_module
23+
from google.cloud.environment_vars import DISABLE_GRPC
2224
from google.cloud.speech.connection import Connection
2325
from google.cloud.speech.encoding import Encoding
2426
from google.cloud.speech.operation import Operation
2527
from google.cloud.speech.sample import Sample
2628
from google.cloud.speech.transcript import Transcript
2729

30+
try:
31+
from google.cloud.speech._gax import GAPICSpeechAPI
32+
except ImportError: # pragma: NO COVER
33+
_HAVE_GAX = False
34+
GAPICSpeechAPI = None
35+
else:
36+
_HAVE_GAX = True
2837

29-
class Client(client_module.Client):
30-
"""Client to bundle configuration needed for API requests.
3138

32-
:type project: str
33-
:param project: The project which the client acts on behalf of. Will be
34-
passed when creating a dataset / job. If not passed,
35-
falls back to the default inferred from the environment.
39+
_DISABLE_GAX = os.getenv(DISABLE_GRPC, False)
40+
_USE_GAX = _HAVE_GAX and not _DISABLE_GAX
41+
42+
43+
class Client(BaseClient):
44+
"""Client to bundle configuration needed for API requests.
3645
3746
:type credentials: :class:`oauth2client.client.OAuth2Credentials` or
3847
:class:`NoneType`
@@ -45,9 +54,22 @@ class Client(client_module.Client):
4554
:param http: An optional HTTP object to make requests. If not passed, an
4655
``http`` object is created that is bound to the
4756
``credentials`` for the current object.
57+
58+
:type use_gax: bool
59+
:param use_gax: (Optional) Explicitly specifies whether
60+
to use the gRPC transport (via GAX) or HTTP. If unset,
61+
falls back to the ``GOOGLE_CLOUD_DISABLE_GRPC`` environment
62+
variable
4863
"""
64+
def __init__(self, credentials=None, http=None, use_gax=None):
65+
super(Client, self).__init__(credentials=credentials, http=http)
66+
if use_gax is None:
67+
self._use_gax = _USE_GAX
68+
else:
69+
self._use_gax = use_gax
4970

5071
_connection_class = Connection
72+
_speech_api = None
5173

5274
def async_recognize(self, sample, language_code=None,
5375
max_alternatives=None, profanity_filter=None,
@@ -139,6 +161,16 @@ def sample(content=None, source_uri=None, encoding=None,
139161
return Sample(content=content, source_uri=source_uri,
140162
encoding=encoding, sample_rate=sample_rate)
141163

164+
@property
165+
def speech_api(self):
166+
"""Helper for speech-related API calls."""
167+
if self._speech_api is None:
168+
if self._use_gax:
169+
self._speech_api = GAPICSpeechAPI()
170+
else:
171+
self._speech_api = _JSONSpeechAPI(self)
172+
return self._speech_api
173+
142174
def sync_recognize(self, sample, language_code=None,
143175
max_alternatives=None, profanity_filter=None,
144176
speech_context=None):
@@ -188,19 +220,82 @@ def sync_recognize(self, sample, language_code=None,
188220
* ``confidence``: The confidence in language detection, float
189221
between 0 and 1.
190222
"""
223+
api = self.speech_api
224+
return api.sync_recognize(sample, language_code, max_alternatives,
225+
profanity_filter, speech_context)
226+
227+
228+
class _JSONSpeechAPI(object):
229+
"""Speech API for interacting with the JSON/REST version of the API.
230+
231+
:type client: :class:`google.cloud.core.client.Client`
232+
:param client: Instance of a ``Client`` object.
233+
"""
234+
def __init__(self, client):
235+
self._client = client
236+
self._connection = client.connection
237+
238+
def sync_recognize(self, sample, language_code=None, max_alternatives=None,
239+
profanity_filter=None, speech_context=None):
240+
"""Synchronous Speech Recognition.
241+
242+
.. _sync_recognize: https://cloud.google.com/speech/reference/\
243+
rest/v1beta1/speech/syncrecognize
244+
245+
See `sync_recognize`_.
246+
247+
:type sample: :class:`~google.cloud.speech.sample.Sample`
248+
:param sample: Instance of ``Sample`` containing audio information.
249+
250+
:type language_code: str
251+
:param language_code: (Optional) The language of the supplied audio as
252+
BCP-47 language tag. Example: ``'en-GB'``.
253+
If omitted, defaults to ``'en-US'``.
254+
255+
:type max_alternatives: int
256+
:param max_alternatives: (Optional) Maximum number of recognition
257+
hypotheses to be returned. The server may
258+
return fewer than maxAlternatives.
259+
Valid values are 0-30. A value of 0 or 1
260+
will return a maximum of 1. Defaults to 1
261+
262+
:type profanity_filter: bool
263+
:param profanity_filter: If True, the server will attempt to filter
264+
out profanities, replacing all but the
265+
initial character in each filtered word with
266+
asterisks, e.g. ``'f***'``. If False or
267+
omitted, profanities won't be filtered out.
268+
269+
:type speech_context: list
270+
:param speech_context: A list of strings (max 50) containing words and
271+
phrases "hints" so that the speech recognition
272+
is more likely to recognize them. This can be
273+
used to improve the accuracy for specific words
274+
and phrases. This can also be used to add new
275+
words to the vocabulary of the recognizer.
276+
277+
:rtype: list
278+
:returns: A list of dictionaries. One dict for each alternative. Each
279+
dictionary typically contains two keys (though not
280+
all will be present in all cases)
191281
282+
* ``transcript``: The detected text from the audio recording.
283+
* ``confidence``: The confidence in language detection, float
284+
between 0 and 1.
285+
286+
:raises: ValueError if more than one result is returned or no results.
287+
"""
192288
data = _build_request_data(sample, language_code, max_alternatives,
193289
profanity_filter, speech_context)
194-
195-
api_response = self.connection.api_request(
290+
api_response = self._connection.api_request(
196291
method='POST', path='speech:syncrecognize', data=data)
197292

198293
if len(api_response['results']) == 1:
199294
result = api_response['results'][0]
200295
return [Transcript.from_api_repr(alternative)
201296
for alternative in result['alternatives']]
202297
else:
203-
raise ValueError('result in api should have length 1')
298+
raise ValueError('More than one result or none returned from API.')
204299

205300

206301
def _build_request_data(sample, language_code=None, max_alternatives=None,

packages/google-cloud-speech/google/cloud/speech/transcript.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,25 @@ def from_api_repr(cls, transcript):
3535
:type transcript: dict
3636
:param transcript: Dictionary response from the REST API.
3737
38-
:rtype: :class:`~Transcript`
38+
:rtype: :class:`Transcript`
3939
:returns: Instance of ``Transcript``.
4040
"""
4141
return cls(transcript['transcript'], transcript['confidence'])
4242

43+
@classmethod
44+
def from_pb(cls, transcript):
45+
"""Factory: construct ``Transcript`` from protobuf response.
46+
47+
:type transcript:
48+
:class:`google.cloud.speech.v1beta1.SpeechRecognitionAlternative`
49+
:param transcript: Instance of ``SpeechRecognitionAlternative``
50+
from protobuf.
51+
52+
:rtype: :class:`Transcript`
53+
:returns: Instance of ``Transcript``.
54+
"""
55+
return cls(transcript.transcript, transcript.confidence)
56+
4357
@property
4458
def transcript(self):
4559
"""Transcript text from audio.

packages/google-cloud-speech/setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151

5252
REQUIREMENTS = [
5353
'google-cloud-core >= 0.20.0',
54+
'gapic-google-cloud-speech-v1beta1 >= 0.11.1, < 0.12.0',
55+
'grpc-google-cloud-speech-v1beta1 >= 0.11.1, < 0.12.0',
5456
]
5557

5658
setup(

0 commit comments

Comments
 (0)