2525import com .google .cloud .videointelligence .v1 .Feature ;
2626import com .google .cloud .videointelligence .v1 .LabelAnnotation ;
2727import com .google .cloud .videointelligence .v1 .LabelSegment ;
28+ import com .google .cloud .videointelligence .v1 .SpeechRecognitionAlternative ;
29+ import com .google .cloud .videointelligence .v1 .SpeechTranscription ;
30+ import com .google .cloud .videointelligence .v1 .SpeechTranscriptionConfig ;
2831import com .google .cloud .videointelligence .v1 .VideoAnnotationResults ;
32+ import com .google .cloud .videointelligence .v1 .VideoContext ;
2933import com .google .cloud .videointelligence .v1 .VideoIntelligenceServiceClient ;
3034import com .google .cloud .videointelligence .v1 .VideoSegment ;
35+ import com .google .cloud .videointelligence .v1 .WordInfo ;
3136import com .google .protobuf .ByteString ;
3237import java .io .IOException ;
3338import java .nio .file .Files ;
3439import java .nio .file .Path ;
3540import java .nio .file .Paths ;
41+ import java .util .concurrent .TimeUnit ;
42+
3643import org .apache .commons .codec .binary .Base64 ;
3744
3845
@@ -83,6 +90,9 @@ public static void argsHelper(String[] args) throws Exception {
8390 if (command .equals ("explicit-content" )) {
8491 analyzeExplicitContent (path );
8592 }
93+ if (command .equals ("speech-transcription" )) {
94+ speechTranscription (path );
95+ }
8696 }
8797
8898 /**
@@ -322,4 +332,69 @@ public static void analyzeExplicitContent(String gcsUri) throws Exception {
322332 // [END video_analyze_explicit_content]
323333 }
324334 }
325- }
335+
336+ /**
337+ * Transcribe speech from a video stored on GCS.
338+ *
339+ * @param gcsUri the path to the video file to analyze.
340+ */
341+ public static void speechTranscription (String gcsUri ) throws Exception {
342+ // [START video_speech_transcription_gcs]
343+ // Instantiate a com.google.cloud.videointelligence.v1.VideoIntelligenceServiceClient
344+ try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
345+ // Set the language code
346+ SpeechTranscriptionConfig config = SpeechTranscriptionConfig .newBuilder ()
347+ .setLanguageCode ("en-US" )
348+ .setEnableAutomaticPunctuation (true )
349+ .build ();
350+
351+ // Set the video context with the above configuration
352+ VideoContext context = VideoContext .newBuilder ()
353+ .setSpeechTranscriptionConfig (config )
354+ .build ();
355+
356+ // Create the request
357+ AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
358+ .setInputUri (gcsUri )
359+ .addFeatures (Feature .SPEECH_TRANSCRIPTION )
360+ .setVideoContext (context )
361+ .build ();
362+
363+ // asynchronously perform speech transcription on videos
364+ OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
365+ client .annotateVideoAsync (request );
366+
367+ System .out .println ("Waiting for operation to complete..." );
368+ // Display the results
369+ for (VideoAnnotationResults results : response .get (600 , TimeUnit .SECONDS )
370+ .getAnnotationResultsList ()) {
371+ for (SpeechTranscription speechTranscription : results .getSpeechTranscriptionsList ()) {
372+ try {
373+ // Print the transcription
374+ if (speechTranscription .getAlternativesCount () > 0 ) {
375+ SpeechRecognitionAlternative alternative = speechTranscription .getAlternatives (0 );
376+
377+ System .out .printf ("Transcript: %s\n " , alternative .getTranscript ());
378+ System .out .printf ("Confidence: %.2f\n " , alternative .getConfidence ());
379+
380+ System .out .println ("Word level information:" );
381+ for (WordInfo wordInfo : alternative .getWordsList ()) {
382+ double startTime = wordInfo .getStartTime ().getSeconds ()
383+ + wordInfo .getStartTime ().getNanos () / 1e9 ;
384+ double endTime = wordInfo .getEndTime ().getSeconds ()
385+ + wordInfo .getEndTime ().getNanos () / 1e9 ;
386+ System .out .printf ("\t %4.2fs - %4.2fs: %s\n " ,
387+ startTime , endTime , wordInfo .getWord ());
388+ }
389+ } else {
390+ System .out .println ("No transcription found" );
391+ }
392+ } catch (IndexOutOfBoundsException ioe ) {
393+ System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
394+ }
395+ }
396+ }
397+ }
398+ // [END video_speech_transcription_gcs]
399+ }
400+ }
0 commit comments