@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
5353 "\t java %s \" <command>\" \" <path-to-image>\" \n "
5454 + "Commands:\n "
5555 + "\t syncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n "
56+ + "\t | auto-punctuation | stream-punctuation\n "
5657 + "Path:\n \t A file path (ex: ./resources/audio.raw) or a URI "
5758 + "for a Cloud Storage resource (gs://...)\n " ,
5859 Recognize .class .getCanonicalName ());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
8889 } else {
8990 transcribeModelSelection (path );
9091 }
92+ } else if (command .equals ("auto-punctuation" )) {
93+ if (path .startsWith ("gs://" )) {
94+ transcribeGcsWithAutomaticPunctuation (path );
95+ } else {
96+ transcribeFileWithAutomaticPunctuation (path );
97+ }
98+ } else if (command .equals ("stream-punctuation" )) {
99+ streamingTranscribeWithAutomaticPunctuation (path );
91100 }
92101 }
93102
@@ -497,4 +506,176 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
497506 }
498507 // [END speech_transcribe_model_selection_gcs]
499508 }
509+
510+ // [START speech_sync_recognize_punctuation]
511+ /**
512+ * Performs transcription with automatic punctuation on raw PCM audio data.
513+ *
514+ * @param fileName the path to a PCM audio file to transcribe.
515+ */
516+ public static void transcribeFileWithAutomaticPunctuation (String fileName ) throws Exception {
517+ Path path = Paths .get (fileName );
518+ byte [] content = Files .readAllBytes (path );
519+
520+ try (SpeechClient speechClient = SpeechClient .create ()) {
521+ // Configure request with local raw PCM audio
522+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
523+ .setEncoding (AudioEncoding .LINEAR16 )
524+ .setLanguageCode ("en-US" )
525+ .setSampleRateHertz (16000 )
526+ .setEnableAutomaticPunctuation (true )
527+ .build ();
528+
529+ // Get the contents of the local audio file
530+ RecognitionAudio recognitionAudio = RecognitionAudio .newBuilder ()
531+ .setContent (ByteString .copyFrom (content ))
532+ .build ();
533+
534+ // Perform the transcription request
535+ RecognizeResponse recognizeResponse = speechClient .recognize (recConfig , recognitionAudio );
536+
537+ // Just print the first result here.
538+ SpeechRecognitionResult result = recognizeResponse .getResultsList ().get (0 );
539+
540+ // There can be several alternative transcripts for a given chunk of speech. Just use the
541+ // first (most likely) one here.
542+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
543+
544+ // Print out the result
545+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
546+ }
547+ }
548+ // [END speech_sync_recognize_punctuation]
549+
550+ // [START speech_async_recognize_gcs_punctuation]
551+ /**
552+ * Performs transcription on remote FLAC file and prints the transcription.
553+ *
554+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
555+ */
556+ public static void transcribeGcsWithAutomaticPunctuation (String gcsUri ) throws Exception {
557+ try (SpeechClient speechClient = SpeechClient .create ()) {
558+ // Configure request with raw PCM audio
559+ RecognitionConfig config = RecognitionConfig .newBuilder ()
560+ .setEncoding (AudioEncoding .FLAC )
561+ .setLanguageCode ("en-US" )
562+ .setSampleRateHertz (16000 )
563+ .setEnableAutomaticPunctuation (true )
564+ .build ();
565+
566+ // Set the remote path for the audio file
567+ RecognitionAudio audio = RecognitionAudio .newBuilder ()
568+ .setUri (gcsUri )
569+ .build ();
570+
571+ // Use non-blocking call for getting file transcription
572+ OperationFuture <LongRunningRecognizeResponse , LongRunningRecognizeMetadata > response =
573+ speechClient .longRunningRecognizeAsync (config , audio );
574+
575+ while (!response .isDone ()) {
576+ System .out .println ("Waiting for response..." );
577+ Thread .sleep (10000 );
578+ }
579+
580+ // Just print the first result here.
581+ SpeechRecognitionResult result = response .get ().getResultsList ().get (0 );
582+
583+ // There can be several alternative transcripts for a given chunk of speech. Just use the
584+ // first (most likely) one here.
585+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
586+
587+ // Print out the result
588+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
589+ }
590+ }
591+ // [END speech_async_recognize_gcs_punctuation]
592+
593+ // [START speech_stream_recognize_punctuation]
594+ /**
595+ * Performs streaming speech recognition on raw PCM audio data.
596+ *
597+ * @param fileName the path to a PCM audio file to transcribe.
598+ */
599+ public static void streamingTranscribeWithAutomaticPunctuation (String fileName ) throws Exception {
600+ Path path = Paths .get (fileName );
601+ byte [] data = Files .readAllBytes (path );
602+
603+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
604+ try (SpeechClient speech = SpeechClient .create ()) {
605+
606+ // Configure request with local raw PCM audio
607+ RecognitionConfig recConfig = RecognitionConfig .newBuilder ()
608+ .setEncoding (AudioEncoding .LINEAR16 )
609+ .setLanguageCode ("en-US" )
610+ .setSampleRateHertz (16000 )
611+ .setEnableAutomaticPunctuation (true )
612+ .build ();
613+
614+ // Build the streaming config with the audio config
615+ StreamingRecognitionConfig config = StreamingRecognitionConfig .newBuilder ()
616+ .setConfig (recConfig )
617+ .build ();
618+
619+ class ResponseApiStreamingObserver <T > implements ApiStreamObserver <T > {
620+ private final SettableFuture <List <T >> future = SettableFuture .create ();
621+ private final List <T > messages = new java .util .ArrayList <T >();
622+
623+ @ Override
624+ public void onNext (T message ) {
625+ messages .add (message );
626+ }
627+
628+ @ Override
629+ public void onError (Throwable t ) {
630+ future .setException (t );
631+ }
632+
633+ @ Override
634+ public void onCompleted () {
635+ future .set (messages );
636+ }
637+
638+ // Returns the SettableFuture object to get received messages / exceptions.
639+ public SettableFuture <List <T >> future () {
640+ return future ;
641+ }
642+ }
643+
644+ ResponseApiStreamingObserver <StreamingRecognizeResponse > responseObserver =
645+ new ResponseApiStreamingObserver <>();
646+
647+ BidiStreamingCallable <StreamingRecognizeRequest , StreamingRecognizeResponse > callable =
648+ speech .streamingRecognizeCallable ();
649+
650+ ApiStreamObserver <StreamingRecognizeRequest > requestObserver =
651+ callable .bidiStreamingCall (responseObserver );
652+
653+ // The first request must **only** contain the audio configuration:
654+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
655+ .setStreamingConfig (config )
656+ .build ());
657+
658+ // Subsequent requests must **only** contain the audio data.
659+ requestObserver .onNext (StreamingRecognizeRequest .newBuilder ()
660+ .setAudioContent (ByteString .copyFrom (data ))
661+ .build ());
662+
663+ // Mark transmission as completed after sending the data.
664+ requestObserver .onCompleted ();
665+
666+ List <StreamingRecognizeResponse > responses = responseObserver .future ().get ();
667+
668+ for (StreamingRecognizeResponse response : responses ) {
669+ // For streaming recognize, the results list has one is_final result (if available) followed
670+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
671+ // Just print the first result here.
672+ StreamingRecognitionResult result = response .getResultsList ().get (0 );
673+ // There can be several alternative transcripts for a given chunk of speech. Just use the
674+ // first (most likely) one here.
675+ SpeechRecognitionAlternative alternative = result .getAlternativesList ().get (0 );
676+ System .out .printf ("Transcript : %s\n " , alternative .getTranscript ());
677+ }
678+ }
679+ }
680+ // [END speech_stream_recognize_punctuation]
500681}
0 commit comments