| 
24 | 24 | import com.google.cloud.speech.v1p1beta1.RecognitionAudio;  | 
25 | 25 | import com.google.cloud.speech.v1p1beta1.RecognitionConfig;  | 
26 | 26 | import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;  | 
 | 27 | +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata;  | 
 | 28 | +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType;  | 
 | 29 | +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance;  | 
 | 30 | +import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType;  | 
27 | 31 | import com.google.cloud.speech.v1p1beta1.RecognizeResponse;  | 
28 | 32 | import com.google.cloud.speech.v1p1beta1.SpeechClient;  | 
29 | 33 | import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;  | 
@@ -53,7 +57,7 @@ public static void main(String... args) throws Exception {  | 
53 | 57 |           "\tjava %s \"<command>\" \"<path-to-image>\"\n"  | 
54 | 58 |           + "Commands:\n"  | 
55 | 59 |           + "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n"  | 
56 |  | -          + "\t| auto-punctuation | stream-punctuation\n"  | 
 | 60 | +          + "\t| auto-punctuation | stream-punctuation | enhanced-model | metadata\n"  | 
57 | 61 |           + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "  | 
58 | 62 |           + "for a Cloud Storage resource (gs://...)\n",  | 
59 | 63 |           Recognize.class.getCanonicalName());  | 
@@ -97,6 +101,10 @@ public static void main(String... args) throws Exception {  | 
97 | 101 |       }  | 
98 | 102 |     } else if (command.equals("stream-punctuation")) {  | 
99 | 103 |       streamingTranscribeWithAutomaticPunctuation(path);  | 
 | 104 | +    } else if (command.equals("enhanced-model")) {  | 
 | 105 | +      transcribeFileWithEnhancedModel(path);  | 
 | 106 | +    } else if (command.equals("metadata")) {  | 
 | 107 | +      transcribeFileWithMetadata(path);  | 
100 | 108 |     }  | 
101 | 109 |   }  | 
102 | 110 | 
 
  | 
@@ -678,4 +686,97 @@ public SettableFuture<List<T>> future() {  | 
678 | 686 |     }  | 
679 | 687 |   }  | 
680 | 688 |   // [END speech_stream_recognize_punctuation]  | 
 | 689 | + | 
 | 690 | +  // [START speech_transcribe_file_with_enhanced_model]  | 
 | 691 | +  /**  | 
 | 692 | +   * Transcribe the given audio file using an enhanced model.  | 
 | 693 | +   *  | 
 | 694 | +   * @param fileName the path to an audio file.  | 
 | 695 | +   */  | 
 | 696 | +  public static void transcribeFileWithEnhancedModel(String fileName) throws Exception {  | 
 | 697 | +    Path path = Paths.get(fileName);  | 
 | 698 | +    byte[] content = Files.readAllBytes(path);  | 
 | 699 | + | 
 | 700 | +    try (SpeechClient speechClient = SpeechClient.create()) {  | 
 | 701 | +      // Get the contents of the local audio file  | 
 | 702 | +      RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()  | 
 | 703 | +          .setContent(ByteString.copyFrom(content))  | 
 | 704 | +          .build();  | 
 | 705 | + | 
 | 706 | +      // Configure request to enable enhanced models  | 
 | 707 | +      RecognitionConfig config = RecognitionConfig.newBuilder()  | 
 | 708 | +          .setEncoding(AudioEncoding.LINEAR16)  | 
 | 709 | +          .setLanguageCode("en-US")  | 
 | 710 | +          .setSampleRateHertz(8000)  | 
 | 711 | +          // Enhanced models are only available to projects that  | 
 | 712 | +          // opt in for audio data collection.  | 
 | 713 | +          .setUseEnhanced(true)  | 
 | 714 | +          // A model must be specified to use enhanced model.  | 
 | 715 | +          .setModel("phone_call")  | 
 | 716 | +          .build();  | 
 | 717 | + | 
 | 718 | +      // Perform the transcription request  | 
 | 719 | +      RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);  | 
 | 720 | + | 
 | 721 | +      // Print out the results  | 
 | 722 | +      for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {  | 
 | 723 | +        // There can be several alternative transcripts for a given chunk of speech. Just use the  | 
 | 724 | +        // first (most likely) one here.  | 
 | 725 | +        SpeechRecognitionAlternative alternative = result.getAlternatives(0);  | 
 | 726 | +        System.out.format("Transcript: %s\n\n", alternative.getTranscript());  | 
 | 727 | +      }  | 
 | 728 | +    }  | 
 | 729 | +  }  | 
 | 730 | +  // [END speech_transcribe_file_with_enhanced_model]  | 
 | 731 | + | 
 | 732 | +  // [START speech_transcribe_file_with_metadata]  | 
 | 733 | +  /**  | 
 | 734 | +   * Transcribe the given audio file and include recognition metadata in the request.  | 
 | 735 | +   *  | 
 | 736 | +   * @param fileName the path to an audio file.  | 
 | 737 | +   */  | 
 | 738 | +  public static void transcribeFileWithMetadata(String fileName) throws Exception {  | 
 | 739 | +    Path path = Paths.get(fileName);  | 
 | 740 | +    byte[] content = Files.readAllBytes(path);  | 
 | 741 | + | 
 | 742 | +    try (SpeechClient speechClient = SpeechClient.create()) {  | 
 | 743 | +      // Get the contents of the local audio file  | 
 | 744 | +      RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()  | 
 | 745 | +          .setContent(ByteString.copyFrom(content))  | 
 | 746 | +          .build();  | 
 | 747 | + | 
 | 748 | +      // Construct a recognition metadata object.  | 
 | 749 | +      // Most metadata fields are specified as enums that can be found  | 
 | 750 | +      // in speech.enums.RecognitionMetadata  | 
 | 751 | +      RecognitionMetadata metadata = RecognitionMetadata.newBuilder()  | 
 | 752 | +          .setInteractionType(InteractionType.DISCUSSION)  | 
 | 753 | +          .setMicrophoneDistance(MicrophoneDistance.NEARFIELD)  | 
 | 754 | +          .setRecordingDeviceType(RecordingDeviceType.SMARTPHONE)  | 
 | 755 | +          .setRecordingDeviceName("Pixel 2 XL") // Some metadata fields are free form strings  | 
 | 756 | +          // And some are integers, for instance the 6 digit NAICS code  | 
 | 757 | +          // https://www.naics.com/search/  | 
 | 758 | +          .setIndustryNaicsCodeOfAudio(519190)  | 
 | 759 | +          .build();  | 
 | 760 | + | 
 | 761 | +      // Configure request to enable enhanced models  | 
 | 762 | +      RecognitionConfig config = RecognitionConfig.newBuilder()  | 
 | 763 | +          .setEncoding(AudioEncoding.LINEAR16)  | 
 | 764 | +          .setLanguageCode("en-US")  | 
 | 765 | +          .setSampleRateHertz(8000)  | 
 | 766 | +          .setMetadata(metadata) // Add the metadata to the config  | 
 | 767 | +          .build();  | 
 | 768 | + | 
 | 769 | +      // Perform the transcription request  | 
 | 770 | +      RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio);  | 
 | 771 | + | 
 | 772 | +      // Print out the results  | 
 | 773 | +      for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) {  | 
 | 774 | +        // There can be several alternative transcripts for a given chunk of speech. Just use the  | 
 | 775 | +        // first (most likely) one here.  | 
 | 776 | +        SpeechRecognitionAlternative alternative = result.getAlternatives(0);  | 
 | 777 | +        System.out.format("Transcript: %s\n\n", alternative.getTranscript());  | 
 | 778 | +      }  | 
 | 779 | +    }  | 
 | 780 | +  }  | 
 | 781 | +  // [END speech_transcribe_file_with_metadata]  | 
681 | 782 | }  | 
0 commit comments