diff --git a/speech/microphone/requirements.txt b/speech/microphone/requirements.txt index 910462f0a16..3ddf21be472 100644 --- a/speech/microphone/requirements.txt +++ b/speech/microphone/requirements.txt @@ -1,3 +1,4 @@ google-cloud-speech==1.3.1 pyaudio==0.2.11 six==1.13.0 + diff --git a/speech/microphone/transcribe_streaming_indefinite.py b/speech/microphone/transcribe_streaming_indefinite.py deleted file mode 100644 index c6f3ff97be3..00000000000 --- a/speech/microphone/transcribe_streaming_indefinite.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Google Cloud Speech API sample application using the streaming API. - -NOTE: This module requires the additional dependency `pyaudio`. To install -using pip: - - pip install pyaudio - -Example usage: - python transcribe_streaming_indefinite.py -""" - -# [START speech_transcribe_infinite_streaming] -from __future__ import division - -import time -import re -import sys - -from google.cloud import speech - -import pyaudio -from six.moves import queue - -# Audio recording parameters -STREAMING_LIMIT = 290000 -SAMPLE_RATE = 16000 -CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms - - -def get_current_time(): - return int(round(time.time() * 1000)) - - -def duration_to_secs(duration): - return duration.seconds + (duration.nanos / float(1e9)) - - -class ResumableMicrophoneStream: - """Opens a recording stream as a generator yielding the audio chunks.""" - def __init__(self, rate, chunk_size): - self._rate = rate - self._chunk_size = chunk_size - self._num_channels = 1 - self._max_replay_secs = 5 - - # Create a thread-safe buffer of audio data - self._buff = queue.Queue() - self.closed = True - self.start_time = get_current_time() - - # 2 bytes in 16 bit samples - self._bytes_per_sample = 2 * self._num_channels - self._bytes_per_second = self._rate * self._bytes_per_sample - - self._bytes_per_chunk = (self._chunk_size * self._bytes_per_sample) - self._chunks_per_second = ( - self._bytes_per_second // self._bytes_per_chunk) - - def __enter__(self): - self.closed = False - - self._audio_interface = pyaudio.PyAudio() - self._audio_stream = self._audio_interface.open( - format=pyaudio.paInt16, - channels=self._num_channels, - rate=self._rate, - input=True, - frames_per_buffer=self._chunk_size, - # Run the audio stream asynchronously to fill the buffer object. - # This is necessary so that the input device's buffer doesn't - # overflow while the calling thread makes network requests, etc. - stream_callback=self._fill_buffer, - ) - - return self - - def __exit__(self, type, value, traceback): - self._audio_stream.stop_stream() - self._audio_stream.close() - self.closed = True - # Signal the generator to terminate so that the client's - # streaming_recognize method will not block the process termination. - self._buff.put(None) - self._audio_interface.terminate() - - def _fill_buffer(self, in_data, *args, **kwargs): - """Continuously collect data from the audio stream, into the buffer.""" - self._buff.put(in_data) - return None, pyaudio.paContinue - - def generator(self): - while not self.closed: - if get_current_time() - self.start_time > STREAMING_LIMIT: - self.start_time = get_current_time() - break - # Use a blocking get() to ensure there's at least one chunk of - # data, and stop iteration if the chunk is None, indicating the - # end of the audio stream. - chunk = self._buff.get() - if chunk is None: - return - data = [chunk] - - # Now consume whatever other data's still buffered. - while True: - try: - chunk = self._buff.get(block=False) - if chunk is None: - return - data.append(chunk) - except queue.Empty: - break - - yield b''.join(data) - - -def listen_print_loop(responses, stream): - """Iterates through server responses and prints them. - - The responses passed is a generator that will block until a response - is provided by the server. - - Each response may contain multiple results, and each result may contain - multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we - print only the transcription for the top alternative of the top result. - - In this case, responses are provided for interim results as well. If the - response is an interim one, print a line feed at the end of it, to allow - the next result to overwrite it, until the response is a final one. For the - final one, print a newline to preserve the finalized transcription. - """ - responses = (r for r in responses if ( - r.results and r.results[0].alternatives)) - - num_chars_printed = 0 - for response in responses: - if not response.results: - continue - - # The `results` list is consecutive. For streaming, we only care about - # the first result being considered, since once it's `is_final`, it - # moves on to considering the next utterance. - result = response.results[0] - if not result.alternatives: - continue - - # Display the transcription of the top alternative. - top_alternative = result.alternatives[0] - transcript = top_alternative.transcript - - # Display interim results, but with a carriage return at the end of the - # line, so subsequent lines will overwrite them. - # - # If the previous result was longer than this one, we need to print - # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * (num_chars_printed - len(transcript)) - - if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') - sys.stdout.flush() - - num_chars_printed = len(transcript) - else: - print(transcript + overwrite_chars) - - # Exit recognition if any of the transcribed phrases could be - # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') - stream.closed = True - break - - num_chars_printed = 0 - - -def main(): - client = speech.SpeechClient() - config = speech.types.RecognitionConfig( - encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=SAMPLE_RATE, - language_code='en-US', - max_alternatives=1, - enable_word_time_offsets=True) - streaming_config = speech.types.StreamingRecognitionConfig( - config=config, - interim_results=True) - - mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) - - print('Say "Quit" or "Exit" to terminate the program.') - - with mic_manager as stream: - while not stream.closed: - audio_generator = stream.generator() - requests = (speech.types.StreamingRecognizeRequest( - audio_content=content) - for content in audio_generator) - - responses = client.streaming_recognize(streaming_config, - requests) - # Now, put the transcription responses to use. - listen_print_loop(responses, stream) - - -if __name__ == '__main__': - main() -# [END speech_transcribe_infinite_streaming] diff --git a/speech/microphone/transcribe_streaming_infinite.py b/speech/microphone/transcribe_streaming_infinite.py index 7cab25d10eb..f45b82cf90b 100644 --- a/speech/microphone/transcribe_streaming_infinite.py +++ b/speech/microphone/transcribe_streaming_infinite.py @@ -32,13 +32,12 @@ import re import sys -# uses result_end_time currently only avaialble in v1p1beta, will be in v1 soon -from google.cloud import speech_v1p1beta1 as speech +from google.cloud import speech import pyaudio from six.moves import queue # Audio recording parameters -STREAMING_LIMIT = 10000 +STREAMING_LIMIT = 240000 # 4 minutes SAMPLE_RATE = 16000 CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms