diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index 71d31abc973..9c58ee134dd 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -165,7 +165,11 @@ mvn exec:java -DRecognize -Dexec.args="word-level-conf gs://cloud-samples-tests/ ``` ## Infinite Streaming -Continuously stream audio to the speech API over multiple requests +Continuously stream audio to the speech API over multiple requests (by default en-US). ``` mvn exec:java -DInfiniteStreamRecognize ``` +If stream audio is in different language, you could also pass language code as a command line argument (for example, en-GB for english (Great Britian), en-US for english U.S., and more available in [this link](https://cloud.google.com/speech-to-text/docs/languages)). +``` +mvn exec:java -Dexec.args="-lang_code=en-US" -DInfiniteStreamRecognize +``` diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 81ebabe8835..c1df5dd1734 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -43,7 +43,11 @@ 1.9.0 - + + commons-cli + commons-cli + 1.3 + junit diff --git a/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java index f926fa7ea6c..1ab6059a594 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java @@ -17,6 +17,7 @@ package com.example.speech; // [START speech_transcribe_infinite_streaming] + import com.google.api.gax.rpc.ClientStream; import com.google.api.gax.rpc.ResponseObserver; import com.google.api.gax.rpc.StreamController; @@ -29,11 +30,13 @@ import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; import com.google.protobuf.ByteString; import com.google.protobuf.Duration; + import java.lang.Math; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.DataLine; @@ -54,7 +57,7 @@ public class InfiniteStreamRecognize { private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes private static int restartCounter = 0; - private static ArrayList audioInput = new ArrayList(); + private static ArrayList audioInput = new ArrayList(); private static ArrayList lastAudioInput = new ArrayList(); private static int resultEndTimeInMS = 0; private static int isFinalEndTime = 0; @@ -66,15 +69,33 @@ public class InfiniteStreamRecognize { private static ByteString tempByteString; public static void main(String... args) { + InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions.fromFlags(args); + if (options == null) { + // Could not parse. + System.out.println("Failed to parse options."); + System.exit(1); + } + try { - infiniteStreamingRecognize(); + infiniteStreamingRecognize(options.langCode); } catch (Exception e) { System.out.println("Exception caught: " + e); } } + public static String convertMillisToDate(double milliSeconds) { + long millis = (long) milliSeconds; + DecimalFormat format = new DecimalFormat(); + format.setMinimumIntegerDigits(2); + return String.format("%s:%s /", + format.format(TimeUnit.MILLISECONDS.toMinutes(millis)), + format.format(TimeUnit.MILLISECONDS.toSeconds(millis) + - TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis))) + ); + } + /** Performs infinite streaming speech recognition */ - public static void infiniteStreamingRecognize() throws Exception { + public static void infiniteStreamingRecognize(String languageCode) throws Exception { // Microphone Input buffering class MicBuffer implements Runnable { @@ -115,45 +136,41 @@ public void onStart(StreamController controller) { } public void onResponse(StreamingRecognizeResponse response) { - responses.add(response); - StreamingRecognitionResult result = response.getResultsList().get(0); - Duration resultEndTime = result.getResultEndTime(); - resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000) - + (resultEndTime.getNanos() / 1000000)); - + + (resultEndTime.getNanos() / 1000000)); double correctedTime = resultEndTimeInMS - bridgingOffset - + (STREAMING_LIMIT * restartCounter); - DecimalFormat format = new DecimalFormat("0.#"); + + (STREAMING_LIMIT * restartCounter); SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); if (result.getIsFinal()) { System.out.print(GREEN); System.out.print("\033[2K\r"); - System.out.printf("%s: %s\n", format.format(correctedTime), - alternative.getTranscript()); - + System.out.printf("%s: %s [confidence: %.2f]\n", + convertMillisToDate(correctedTime), + alternative.getTranscript(), + alternative.getConfidence() + ); isFinalEndTime = resultEndTimeInMS; lastTranscriptWasFinal = true; } else { System.out.print(RED); System.out.print("\033[2K\r"); - System.out.printf("%s: %s", format.format(correctedTime), - alternative.getTranscript()); - + System.out.printf("%s: %s", convertMillisToDate(correctedTime), + alternative.getTranscript() + ); lastTranscriptWasFinal = false; } } - public void onComplete() {} - - public void onError(Throwable t) {} + public void onComplete() { + } + public void onError(Throwable t) { + } }; - clientStream = client.streamingRecognizeCallable().splitCall(responseObserver); RecognitionConfig recognitionConfig = @@ -227,8 +244,8 @@ public void onError(Throwable t) {} request = StreamingRecognizeRequest.newBuilder() - .setStreamingConfig(streamingRecognitionConfig) - .build(); + .setStreamingConfig(streamingRecognitionConfig) + .build(); System.out.println(YELLOW); System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT); @@ -253,13 +270,12 @@ public void onError(Throwable t) {} bridgingOffset = finalRequestEndTime; } int chunksFromMS = (int) Math.floor((finalRequestEndTime - - bridgingOffset) / chunkTime); + - bridgingOffset) / chunkTime); // chunks from MS is number of chunks to resend bridgingOffset = (int) Math.floor((lastAudioInput.size() - - chunksFromMS) * chunkTime); + - chunksFromMS) * chunkTime); // set bridging offset for next request for (int i = chunksFromMS; i < lastAudioInput.size(); i++) { - request = StreamingRecognizeRequest.newBuilder() .setAudioContent(lastAudioInput.get(i)) @@ -288,5 +304,6 @@ public void onError(Throwable t) {} } } } + } // [END speech_transcribe_infinite_streaming] diff --git a/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java new file mode 100644 index 00000000000..5966c151b9e --- /dev/null +++ b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognizeOptions.java @@ -0,0 +1,56 @@ +/* + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class InfiniteStreamRecognizeOptions { + String langCode = "en-US"; //by default english US + + /** Construct an InfiniteStreamRecognizeOptions class from command line flags. */ + public static InfiniteStreamRecognizeOptions fromFlags(String[] args) { + Options options = new Options(); + options.addOption( + Option.builder() + .type(String.class) + .longOpt("lang_code") + .hasArg() + .desc("Language code") + .build()); + + CommandLineParser parser = new DefaultParser(); + CommandLine commandLine; + try { + commandLine = parser.parse(options, args); + InfiniteStreamRecognizeOptions res = new InfiniteStreamRecognizeOptions(); + + if (commandLine.hasOption("lang_code")) { + res.langCode = commandLine.getOptionValue("lang_code"); + } + return res; + } catch (ParseException e) { + System.err.println(e.getMessage()); + return null; + } + } + +}