From 9031a0fe36470e495594f936a6845d1fd1b70a27 Mon Sep 17 00:00:00 2001 From: Thomas Schultz Date: Tue, 25 Oct 2016 19:13:09 -0400 Subject: [PATCH] Add Speech GAPIC for sync_recognize. --- speech/google/cloud/speech/_gax.py | 96 +++++++++++++++ speech/google/cloud/speech/client.py | 115 ++++++++++++++++-- speech/google/cloud/speech/transcript.py | 16 ++- speech/setup.py | 2 + speech/unit_tests/test_client.py | 148 ++++++++++++++++++++--- 5 files changed, 350 insertions(+), 27 deletions(-) create mode 100644 speech/google/cloud/speech/_gax.py diff --git a/speech/google/cloud/speech/_gax.py b/speech/google/cloud/speech/_gax.py new file mode 100644 index 000000000000..e2b57da12439 --- /dev/null +++ b/speech/google/cloud/speech/_gax.py @@ -0,0 +1,96 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""GAX/GAPIC module for managing Speech API requests.""" + +from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio + +from google.cloud.speech.transcript import Transcript + + +class GAPICSpeechAPI(object): + """Manage calls through GAPIC wrappers to the Speech API.""" + def __init__(self): + self._gapic_api = SpeechApi() + + def sync_recognize(self, sample, language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Synchronous Speech Recognition. + + .. _sync_recognize: https://cloud.google.com/speech/reference/\ + rest/v1beta1/speech/syncrecognize + + See `sync_recognize`_. + + :type sample: :class:`~google.cloud.speech.sample.Sample` + :param sample: Instance of ``Sample`` containing audio information. + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: list + :returns: A list of dictionaries. One dict for each alternative. Each + dictionary typically contains two keys (though not + all will be present in all cases) + + * ``transcript``: The detected text from the audio recording. + * ``confidence``: The confidence in language detection, float + between 0 and 1. + + :raises: ValueError if more than one result is returned or no results. + """ + config = RecognitionConfig( + encoding=sample.encoding, sample_rate=sample.sample_rate, + language_code=language_code, max_alternatives=max_alternatives, + profanity_filter=profanity_filter, + speech_context=SpeechContext(phrases=speech_context)) + + audio = RecognitionAudio(content=sample.content, + uri=sample.source_uri) + api = self._gapic_api + api_response = api.sync_recognize(config=config, audio=audio) + if len(api_response.results) == 1: + results = api_response.results.pop() + alternatives = results.alternatives + return [Transcript.from_pb(alternative) + for alternative in alternatives] + else: + raise ValueError('More than one result or none returned from API.') diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 3a708ce8391f..37bb755b8ee3 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -15,24 +15,33 @@ """Basic client for Google Cloud Speech API.""" from base64 import b64encode +import os +from google.cloud.client import Client as BaseClient from google.cloud._helpers import _to_bytes from google.cloud._helpers import _bytes_to_unicode -from google.cloud import client as client_module +from google.cloud.environment_vars import DISABLE_GRPC from google.cloud.speech.connection import Connection from google.cloud.speech.encoding import Encoding from google.cloud.speech.operation import Operation from google.cloud.speech.sample import Sample from google.cloud.speech.transcript import Transcript +try: + from google.cloud.speech._gax import GAPICSpeechAPI +except ImportError: # pragma: NO COVER + _HAVE_GAX = False + GAPICSpeechAPI = None +else: + _HAVE_GAX = True -class Client(client_module.Client): - """Client to bundle configuration needed for API requests. - :type project: str - :param project: The project which the client acts on behalf of. Will be - passed when creating a dataset / job. If not passed, - falls back to the default inferred from the environment. +_DISABLE_GAX = os.getenv(DISABLE_GRPC, False) +_USE_GAX = _HAVE_GAX and not _DISABLE_GAX + + +class Client(BaseClient): + """Client to bundle configuration needed for API requests. :type credentials: :class:`oauth2client.client.OAuth2Credentials` or :class:`NoneType` @@ -45,9 +54,22 @@ class Client(client_module.Client): :param http: An optional HTTP object to make requests. If not passed, an ``http`` object is created that is bound to the ``credentials`` for the current object. + + :type use_gax: bool + :param use_gax: (Optional) Explicitly specifies whether + to use the gRPC transport (via GAX) or HTTP. If unset, + falls back to the ``GOOGLE_CLOUD_DISABLE_GRPC`` environment + variable """ + def __init__(self, credentials=None, http=None, use_gax=None): + super(Client, self).__init__(credentials=credentials, http=http) + if use_gax is None: + self._use_gax = _USE_GAX + else: + self._use_gax = use_gax _connection_class = Connection + _speech_api = None def async_recognize(self, sample, language_code=None, max_alternatives=None, profanity_filter=None, @@ -139,6 +161,16 @@ def sample(content=None, source_uri=None, encoding=None, return Sample(content=content, source_uri=source_uri, encoding=encoding, sample_rate=sample_rate) + @property + def speech_api(self): + """Helper for speech-related API calls.""" + if self._speech_api is None: + if self._use_gax: + self._speech_api = GAPICSpeechAPI() + else: + self._speech_api = _JSONSpeechAPI(self) + return self._speech_api + def sync_recognize(self, sample, language_code=None, max_alternatives=None, profanity_filter=None, speech_context=None): @@ -188,11 +220,74 @@ def sync_recognize(self, sample, language_code=None, * ``confidence``: The confidence in language detection, float between 0 and 1. """ + api = self.speech_api + return api.sync_recognize(sample, language_code, max_alternatives, + profanity_filter, speech_context) + + +class _JSONSpeechAPI(object): + """Speech API for interacting with the JSON/REST version of the API. + + :type client: :class:`google.cloud.core.client.Client` + :param client: Instance of a ``Client`` object. + """ + def __init__(self, client): + self._client = client + self._connection = client.connection + + def sync_recognize(self, sample, language_code=None, max_alternatives=None, + profanity_filter=None, speech_context=None): + """Synchronous Speech Recognition. + + .. _sync_recognize: https://cloud.google.com/speech/reference/\ + rest/v1beta1/speech/syncrecognize + + See `sync_recognize`_. + + :type sample: :class:`~google.cloud.speech.sample.Sample` + :param sample: Instance of ``Sample`` containing audio information. + + :type language_code: str + :param language_code: (Optional) The language of the supplied audio as + BCP-47 language tag. Example: ``'en-GB'``. + If omitted, defaults to ``'en-US'``. + + :type max_alternatives: int + :param max_alternatives: (Optional) Maximum number of recognition + hypotheses to be returned. The server may + return fewer than maxAlternatives. + Valid values are 0-30. A value of 0 or 1 + will return a maximum of 1. Defaults to 1 + + :type profanity_filter: bool + :param profanity_filter: If True, the server will attempt to filter + out profanities, replacing all but the + initial character in each filtered word with + asterisks, e.g. ``'f***'``. If False or + omitted, profanities won't be filtered out. + + :type speech_context: list + :param speech_context: A list of strings (max 50) containing words and + phrases "hints" so that the speech recognition + is more likely to recognize them. This can be + used to improve the accuracy for specific words + and phrases. This can also be used to add new + words to the vocabulary of the recognizer. + + :rtype: list + :returns: A list of dictionaries. One dict for each alternative. Each + dictionary typically contains two keys (though not + all will be present in all cases) + * ``transcript``: The detected text from the audio recording. + * ``confidence``: The confidence in language detection, float + between 0 and 1. + + :raises: ValueError if more than one result is returned or no results. + """ data = _build_request_data(sample, language_code, max_alternatives, profanity_filter, speech_context) - - api_response = self.connection.api_request( + api_response = self._connection.api_request( method='POST', path='speech:syncrecognize', data=data) if len(api_response['results']) == 1: @@ -200,7 +295,7 @@ def sync_recognize(self, sample, language_code=None, return [Transcript.from_api_repr(alternative) for alternative in result['alternatives']] else: - raise ValueError('result in api should have length 1') + raise ValueError('More than one result or none returned from API.') def _build_request_data(sample, language_code=None, max_alternatives=None, diff --git a/speech/google/cloud/speech/transcript.py b/speech/google/cloud/speech/transcript.py index 2470871494e4..43a7f19c27d8 100644 --- a/speech/google/cloud/speech/transcript.py +++ b/speech/google/cloud/speech/transcript.py @@ -35,11 +35,25 @@ def from_api_repr(cls, transcript): :type transcript: dict :param transcript: Dictionary response from the REST API. - :rtype: :class:`~Transcript` + :rtype: :class:`Transcript` :returns: Instance of ``Transcript``. """ return cls(transcript['transcript'], transcript['confidence']) + @classmethod + def from_pb(cls, transcript): + """Factory: construct ``Transcript`` from protobuf response. + + :type transcript: + :class:`google.cloud.speech.v1beta1.SpeechRecognitionAlternative` + :param transcript: Instance of ``SpeechRecognitionAlternative`` + from protobuf. + + :rtype: :class:`Transcript` + :returns: Instance of ``Transcript``. + """ + return cls(transcript.transcript, transcript.confidence) + @property def transcript(self): """Transcript text from audio. diff --git a/speech/setup.py b/speech/setup.py index c02aeaad3e9d..536ed0c53782 100644 --- a/speech/setup.py +++ b/speech/setup.py @@ -51,6 +51,8 @@ REQUIREMENTS = [ 'google-cloud-core >= 0.20.0', + 'gapic-google-cloud-speech-v1beta1 >= 0.11.1, < 0.12.0', + 'grpc-google-cloud-speech-v1beta1 >= 0.11.1, < 0.12.0', ] setup( diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py index 4d1b67e25106..047bf6487a17 100644 --- a/speech/unit_tests/test_client.py +++ b/speech/unit_tests/test_client.py @@ -23,6 +23,7 @@ class TestClient(unittest.TestCase): def _getTargetClass(self): from google.cloud.speech.client import Client + return Client def _makeOne(self, *args, **kw): @@ -38,6 +39,12 @@ def test_ctor(self): self.assertTrue(client.connection.credentials is creds) self.assertTrue(client.connection.http is http) + def test_ctor_use_gax_preset(self): + creds = _Credentials() + http = object() + client = self._makeOne(credentials=creds, http=http, use_gax=True) + self.assertTrue(client._use_gax) + def test_create_sample_from_client(self): from google.cloud import speech from google.cloud.speech.sample import Sample @@ -60,11 +67,13 @@ def test_create_sample_from_client(self): self.assertEqual(content_sample.sample_rate, self.SAMPLE_RATE) self.assertEqual(content_sample.encoding, speech.Encoding.FLAC) - def test_sync_recognize_content_with_optional_parameters(self): + def test_sync_recognize_content_with_optional_params_no_gax(self): from base64 import b64encode from google.cloud._helpers import _to_bytes from google.cloud._helpers import _bytes_to_unicode + from google.cloud._testing import _Monkey + from google.cloud.speech import client as MUT from google.cloud import speech from google.cloud.speech.sample import Sample from google.cloud.speech.transcript import Transcript @@ -91,18 +100,19 @@ def test_sync_recognize_content_with_optional_parameters(self): } } credentials = _Credentials() - client = self._makeOne(credentials=credentials) + client = self._makeOne(credentials=credentials, use_gax=False) client.connection = _Connection(RETURNED) encoding = speech.Encoding.FLAC sample = Sample(content=self.AUDIO_CONTENT, encoding=encoding, sample_rate=self.SAMPLE_RATE) - response = client.sync_recognize(sample, - language_code='EN', - max_alternatives=2, - profanity_filter=True, - speech_context=self.HINTS) + with _Monkey(MUT, _USE_GAX=False): + response = client.sync_recognize(sample, + language_code='EN', + max_alternatives=2, + profanity_filter=True, + speech_context=self.HINTS) self.assertEqual(len(client.connection._requested), 1) req = client.connection._requested[0] @@ -118,7 +128,9 @@ def test_sync_recognize_content_with_optional_parameters(self): self.assertEqual(response[0].transcript, expected.transcript) self.assertEqual(response[0].confidence, expected.confidence) - def test_sync_recognize_source_uri_without_optional_parameters(self): + def test_sync_recognize_source_uri_without_optional_params_no_gax(self): + from google.cloud._testing import _Monkey + from google.cloud.speech import client as MUT from google.cloud import speech from google.cloud.speech.sample import Sample from google.cloud.speech.transcript import Transcript @@ -135,14 +147,15 @@ def test_sync_recognize_source_uri_without_optional_parameters(self): } } credentials = _Credentials() - client = self._makeOne(credentials=credentials) + client = self._makeOne(credentials=credentials, use_gax=False) client.connection = _Connection(RETURNED) encoding = speech.Encoding.FLAC sample = Sample(source_uri=self.AUDIO_SOURCE_URI, encoding=encoding, sample_rate=self.SAMPLE_RATE) - response = client.sync_recognize(sample) + with _Monkey(MUT, _USE_GAX=False): + response = client.sync_recognize(sample) self.assertEqual(len(client.connection._requested), 1) req = client.connection._requested[0] @@ -158,20 +171,65 @@ def test_sync_recognize_source_uri_without_optional_parameters(self): self.assertEqual(response[0].transcript, expected.transcript) self.assertEqual(response[0].confidence, expected.confidence) - def test_sync_recognize_with_empty_results(self): + def test_sync_recognize_with_empty_results_no_gax(self): + from google.cloud._testing import _Monkey + from google.cloud.speech import client as MUT from google.cloud import speech from google.cloud.speech.sample import Sample from unit_tests._fixtures import SYNC_RECOGNIZE_EMPTY_RESPONSE credentials = _Credentials() - client = self._makeOne(credentials=credentials) + client = self._makeOne(credentials=credentials, use_gax=False) client.connection = _Connection(SYNC_RECOGNIZE_EMPTY_RESPONSE) with self.assertRaises(ValueError): - sample = Sample(source_uri=self.AUDIO_SOURCE_URI, - encoding=speech.Encoding.FLAC, - sample_rate=self.SAMPLE_RATE) - client.sync_recognize(sample) + with _Monkey(MUT, _USE_GAX=False): + sample = Sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) + client.sync_recognize(sample) + + def test_sync_recognize_with_empty_results_gax(self): + from google.cloud._testing import _Monkey + from google.cloud.speech import _gax as MUT + from google.cloud import speech + from google.cloud.speech.sample import Sample + + credentials = _Credentials() + client = self._makeOne(credentials=credentials, use_gax=True) + client.connection = _Connection() + + with self.assertRaises(ValueError): + mock_no_results = _MockGAPICSpeechAPI + mock_no_results._results = [] + with _Monkey(MUT, SpeechApi=mock_no_results): + sample = Sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) + client.sync_recognize(sample) + + def test_sync_recognize_with_gax(self): + from google.cloud import speech + from google.cloud.speech import _gax as MUT + from google.cloud._testing import _Monkey + + creds = _Credentials() + client = self._makeOne(credentials=creds, use_gax=True) + client.connection = _Connection() + client._speech_api = None + + mock_no_results = _MockGAPICSpeechAPI + mock_no_results._results = [_MockGAPICSyncResult()] + + with _Monkey(MUT, SpeechApi=_MockGAPICSpeechAPI): + sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, + encoding=speech.Encoding.FLAC, + sample_rate=self.SAMPLE_RATE) + results = client.sync_recognize(sample) + self.assertEqual(results[0].transcript, + _MockGAPICAlternative.transcript) + self.assertEqual(results[0].confidence, + _MockGAPICAlternative.confidence) def test_async_supported_encodings(self): from google.cloud import speech @@ -192,6 +250,7 @@ def test_async_recognize(self): from google.cloud import speech from google.cloud.speech.operation import Operation from google.cloud.speech.sample import Sample + RETURNED = ASYNC_RECOGNIZE_RESPONSE credentials = _Credentials() @@ -206,6 +265,63 @@ def test_async_recognize(self): self.assertFalse(operation.complete) self.assertIsNone(operation.metadata) + def test_speech_api_with_gax(self): + from google.cloud.speech import _gax as MUT + from google.cloud._testing import _Monkey + from google.cloud.speech.client import GAPICSpeechAPI + + creds = _Credentials() + client = self._makeOne(credentials=creds, use_gax=True) + + with _Monkey(MUT, SpeechApi=_MockGAPICSpeechAPI): + self.assertIsNone(client._speech_api) + self.assertIsInstance(client.speech_api, GAPICSpeechAPI) + + def test_speech_api_without_gax(self): + from google.cloud.speech.client import _JSONSpeechAPI + + creds = _Credentials() + client = self._makeOne(credentials=creds, use_gax=False) + self.assertIsNone(client._speech_api) + self.assertIsInstance(client.speech_api, _JSONSpeechAPI) + + def test_speech_api_preset(self): + creds = _Credentials() + client = self._makeOne(credentials=creds) + fake_api = object() + client._speech_api = fake_api + + self.assertIs(client.speech_api, fake_api) + + +class _MockGAPICAlternative(object): + transcript = 'testing 1 2 3' + confidence = 0.95234356 + + +class _MockGAPICSyncResult(object): + alternatives = [_MockGAPICAlternative()] + + +class _MockGAPICSpeechResponse(object): + error = None + endpointer_type = None + results = [] + result_index = 0 + + +class _MockGAPICSpeechAPI(object): + _requests = None + _response = _MockGAPICSpeechResponse() + _results = [_MockGAPICSyncResult()] + + def sync_recognize(self, config, audio): + self.config = config + self.audio = audio + mock_response = self._response + mock_response.results = self._results + return mock_response + class _Credentials(object):