Skip to content

Commit b187310

Browse files
committed
Merge pull request #239 from GoogleCloudPlatform/speech-streaming
Add speech api streaming sample.
2 parents 7a19781 + 726e673 commit b187310

File tree

8 files changed

+232
-4
lines changed

8 files changed

+232
-4
lines changed

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,13 @@ env:
1616
- GOOGLE_CLIENT_SECRETS=${TRAVIS_BUILD_DIR}/testing/resources/client-secrets.json
1717
- GAE_ROOT=${HOME}/.cache/
1818
- secure: Orp9Et2TIwCG/Hf59aa0NUDF1pNcwcS4TFulXX175918cFREOzf/cNZNg+Ui585ZRFjbifZdc858tVuCVd8XlxQPXQgp7bwB7nXs3lby3LYg4+HD83Gaz7KOWxRLWVor6IVn8OxeCzwl6fJkdmffsTTO9csC4yZ7izHr+u7hiO4=
19+
addons:
20+
apt:
21+
packages:
22+
- portaudio19-dev
1923
before_install:
2024
- pip install --upgrade pip wheel virtualenv
25+
# for speech api sample
2126
- openssl aes-256-cbc -k "$secrets_password" -in secrets.tar.enc -out secrets.tar -d
2227
- tar xvf secrets.tar
2328
install:

nox.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ def session_tests(session, interpreter, extra_pytest_args=None):
8686
# allows users to run a particular test instead of all of them.
8787
for sample in (session.posargs or
8888
collect_sample_dirs('.', SESSION_TESTS_BLACKLIST)):
89+
# Install additional dependencies if they exist
90+
dirname = sample if os.path.isdir(sample) else os.path.dirname(sample)
91+
for reqfile in list_files(dirname, 'requirements*.txt'):
92+
session.install('-r', reqfile)
93+
8994
session.run(
9095
'py.test', sample,
9196
*pytest_args,

speech/api/README.md

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,36 @@ See the
3737
[Cloud Platform Auth Guide](https://cloud.google.com/docs/authentication#developer_workflow)
3838
for more information.
3939

40+
### Install the dependencies
41+
42+
* If you're running the `speechrest.py` sample:
43+
44+
```sh
45+
$ pip install requirements-speechrest.txt
46+
```
47+
48+
* If you're running the `speech_streaming.py` sample:
49+
50+
```sh
51+
$ pip install requirements-speech_streaming.txt
52+
```
53+
4054
## Run the example
4155
42-
```sh
43-
$ python speechrest.py resources/audio.raw
44-
```
56+
* To run the `speechrest.py` sample:
57+
58+
```sh
59+
$ python speechrest.py resources/audio.raw
60+
```
61+
62+
You should see a response with the transcription result.
63+
64+
* To run the `speech_streaming.py` sample:
65+
66+
```sh
67+
$ python speech_streaming.py
68+
```
4569
46-
You should see a response with the transcription result.
70+
The sample will run in a continuous loop, printing the data and metadata
71+
it receives from the Speech API, which includes alternative transcriptions
72+
of what it hears, and a confidence score. Say "exit" to exit the loop.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
gcloud==0.12.0
2+
grpcio==0.13.1
3+
PyAudio==0.2.9
4+
grpc-google-cloud-speech==1.0.0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
google-api-python-client==1.5.0

speech/api/resources/quit.raw

160 KB
Binary file not shown.

speech/api/speech_streaming.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/python
2+
3+
import contextlib
4+
import threading
5+
6+
from gcloud.credentials import get_credentials
7+
from google.cloud.speech.v1.cloud_speech_pb2 import * # noqa
8+
from google.rpc import code_pb2
9+
from grpc.beta import implementations
10+
import pyaudio
11+
12+
# Audio recording parameters
13+
RATE = 16000
14+
CHANNELS = 1
15+
CHUNK = RATE // 10 # 100ms
16+
17+
# Keep the request alive for this many seconds
18+
DEADLINE_SECS = 8 * 60 * 60
19+
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
20+
21+
22+
def make_channel(host, port):
23+
"""Creates an SSL channel with auth credentials from the environment."""
24+
# In order to make an https call, use an ssl channel with defaults
25+
ssl_channel = implementations.ssl_channel_credentials(None, None, None)
26+
27+
# Grab application default credentials from the environment
28+
creds = get_credentials().create_scoped([SPEECH_SCOPE])
29+
# Add a plugin to inject the creds into the header
30+
auth_header = (
31+
'Authorization',
32+
'Bearer ' + creds.get_access_token().access_token)
33+
auth_plugin = implementations.metadata_call_credentials(
34+
lambda _, cb: cb([auth_header], None),
35+
name='google_creds')
36+
37+
# compose the two together for both ssl and google auth
38+
composite_channel = implementations.composite_channel_credentials(
39+
ssl_channel, auth_plugin)
40+
41+
return implementations.secure_channel(host, port, composite_channel)
42+
43+
44+
@contextlib.contextmanager
45+
def record_audio(channels, rate, chunk):
46+
"""Opens a recording stream in a context manager."""
47+
audio_interface = pyaudio.PyAudio()
48+
audio_stream = audio_interface.open(
49+
format=pyaudio.paInt16, channels=channels, rate=rate,
50+
input=True, frames_per_buffer=chunk,
51+
)
52+
53+
yield audio_stream
54+
55+
audio_stream.stop_stream()
56+
audio_stream.close()
57+
audio_interface.terminate()
58+
59+
60+
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
61+
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
62+
63+
Args:
64+
stop_audio: A threading.Event object stops the recording when set.
65+
channels: How many audio channels to record.
66+
rate: The sampling rate.
67+
chunk: Buffer audio into chunks of this size before sending to the api.
68+
"""
69+
with record_audio(channels, rate, chunk) as audio_stream:
70+
# The initial request must contain metadata about the stream, so the
71+
# server knows how to interpret it.
72+
metadata = InitialRecognizeRequest(
73+
encoding='LINEAR16', sample_rate=rate)
74+
audio_request = AudioRequest(content=audio_stream.read(chunk))
75+
76+
yield RecognizeRequest(
77+
initial_request=metadata,
78+
audio_request=audio_request)
79+
80+
while not stop_audio.is_set():
81+
# Subsequent requests can all just have the content
82+
audio_request = AudioRequest(content=audio_stream.read(chunk))
83+
84+
yield RecognizeRequest(audio_request=audio_request)
85+
86+
87+
def listen_print_loop(recognize_stream):
88+
for resp in recognize_stream:
89+
if resp.error.code != code_pb2.OK:
90+
raise RuntimeError('Server error: ' + resp.error.message)
91+
92+
# Display the transcriptions & their alternatives
93+
for result in resp.results:
94+
print(result.alternatives)
95+
96+
# Exit recognition if any of the transcribed phrases could be
97+
# one of our keywords.
98+
if any(alt.confidence > .5 and
99+
(alt.transcript.strip() in ('exit', 'quit'))
100+
for result in resp.results
101+
for alt in result.alternatives):
102+
print('Exiting..')
103+
return
104+
105+
106+
def main():
107+
stop_audio = threading.Event()
108+
with beta_create_Speech_stub(
109+
make_channel('speech.googleapis.com', 443)) as service:
110+
try:
111+
listen_print_loop(
112+
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
113+
finally:
114+
# Stop the request stream once we're done with the loop - otherwise
115+
# it'll keep going in the thread that the grpc lib makes for it..
116+
stop_audio.set()
117+
118+
119+
if __name__ == '__main__':
120+
main()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import contextlib
15+
import io
16+
import re
17+
import sys
18+
19+
import pytest
20+
21+
import speech_streaming
22+
23+
24+
class MockAudioStream(object):
25+
def __init__(self, audio_filename, trailing_silence_secs=10):
26+
self.audio_filename = audio_filename
27+
self.silence = io.BytesIO('\0\0' * speech_streaming.RATE *
28+
trailing_silence_secs)
29+
30+
def __enter__(self):
31+
self.audio_file = open(self.audio_filename)
32+
return self
33+
34+
def __exit__(self, *args):
35+
self.audio_file.close()
36+
37+
def __call__(self, *args):
38+
return self
39+
40+
def read(self, num_frames):
41+
# audio is 16-bit samples, whereas python byte is 8-bit
42+
num_bytes = 2 * num_frames
43+
chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes)
44+
return chunk
45+
46+
47+
def mock_audio_stream(filename):
48+
@contextlib.contextmanager
49+
def mock_audio_stream(channels, rate, chunk):
50+
with open(filename, 'rb') as audio_file:
51+
yield audio_file
52+
53+
return mock_audio_stream
54+
55+
56+
@pytest.mark.skipif(
57+
sys.version_info >= (3, 0), reason="can't get grpc lib to work in python3")
58+
def test_main(resource, monkeypatch, capsys):
59+
monkeypatch.setattr(
60+
speech_streaming, 'record_audio',
61+
mock_audio_stream(resource('quit.raw')))
62+
monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5)
63+
64+
speech_streaming.main()
65+
out, err = capsys.readouterr()
66+
67+
assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)