diff --git a/speech/cloud-client/requirements.txt b/speech/cloud-client/requirements.txt index 92970530560..a88345ee9a4 100644 --- a/speech/cloud-client/requirements.txt +++ b/speech/cloud-client/requirements.txt @@ -1 +1 @@ -google-cloud-speech==0.27.0 +google-cloud-speech==0.27.1 diff --git a/speech/cloud-client/transcribe_async.py b/speech/cloud-client/transcribe_async.py index 9e5a416a567..b25121217ff 100644 --- a/speech/cloud-client/transcribe_async.py +++ b/speech/cloud-client/transcribe_async.py @@ -79,7 +79,8 @@ def transcribe_gcs(gcs_uri): config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, - language_code='en-US') + language_code='en-US', + enable_word_time_offsets=True) operation = client.long_running_recognize(config, audio) @@ -96,6 +97,15 @@ def transcribe_gcs(gcs_uri): for alternative in alternatives: print('Transcript: {}'.format(alternative.transcript)) print('Confidence: {}'.format(alternative.confidence)) + + for word_info in alternative.words: + word = word_info.word + start_time = word_info.start_time + end_time = word_info.end_time + print('Word: {}, start_time: {}, end_time: {}'.format( + word, + start_time.seconds + start_time.nanos * 1e-9, + end_time.seconds + end_time.nanos * 1e-9)) # [END def_transcribe_gcs] diff --git a/speech/cloud-client/transcribe_async_test.py b/speech/cloud-client/transcribe_async_test.py index 7d66747eb44..286434d0609 100644 --- a/speech/cloud-client/transcribe_async_test.py +++ b/speech/cloud-client/transcribe_async_test.py @@ -33,3 +33,14 @@ def test_transcribe_gcs(capsys): out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) + + +def test_transcribe_gcs_word_time_offsets(capsys): + transcribe_async.transcribe_gcs( + 'gs://python-docs-samples-tests/speech/audio.flac') + out, err = capsys.readouterr() + + match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) + time = float(match.group(1)) + + assert time > 0