Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Auto-Punctuation samples to speech #1079

Merged
merged 7 commits into from
Apr 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions speech/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,33 @@ mvn exec:java -DRecognize -Dexec.args="wordoffsets gs://cloud-samples-tests/spee
```

## Model Selection
Synchronously transcribe a audio file
Synchronously transcribe an audio file
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@waprin Fixed

```
mvn exec:java -DRecognize -Dexec.args="model-selection ./resources/Google_Gnome.wav"
```

Asynchronously transcribe a audio file hosted on GCS
Asynchronously transcribe an audio file hosted on GCS
```
mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav"
```

Perform streaming speech transcription on an audio file
```
mvn exec:java -DRecognize -Dexec.args="streamrecognize ./resources/Google_Gnome.wav"
```

## Auto Punctuation
Synchronously transcribe and punctuate an audio file
```
mvn exec:java -DRecognize -Dexec.args="auto-punctuation ./resources/audio.raw"
```

Asynchronously transcribe and punctuate an audio file hosted on GCS
```
mvn exec:java -DRecognize -Dexec.args="auto-punctuation gs://cloud-samples-tests/speech/brooklyn.flac"
```

Performing streaming speech transcription and punctuation on an audio file
```
mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
```
2 changes: 1 addition & 1 deletion speech/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
<version>0.40.0-alpha</version>
<version>0.42.0-alpha</version>
</dependency>
<!-- [END dependencies] -->

Expand Down
181 changes: 181 additions & 0 deletions speech/cloud-client/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n"
+ "\t| auto-punctuation | stream-punctuation\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
} else {
transcribeModelSelection(path);
}
} else if (command.equals("auto-punctuation")) {
if (path.startsWith("gs://")) {
transcribeGcsWithAutomaticPunctuation(path);
} else {
transcribeFileWithAutomaticPunctuation(path);
}
} else if (command.equals("stream-punctuation")) {
streamingTranscribeWithAutomaticPunctuation(path);
}
}

Expand Down Expand Up @@ -497,4 +506,176 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
// [END speech_transcribe_model_selection_gcs]
}

// [START speech_sync_recognize_punctuation]
/**
* Performs transcription with automatic punctuation on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] content = Files.readAllBytes(path);

try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setEnableAutomaticPunctuation(true)
.build();

// Get the contents of the local audio file
RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()
.setContent(ByteString.copyFrom(content))
.build();

// Perform the transcription request
RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);

// Just print the first result here.
SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);

// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
// [END speech_sync_recognize_punctuation]

// [START speech_async_recognize_gcs_punctuation]
/**
* Performs transcription on remote FLAC file and prints the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
*/
public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
try (SpeechClient speechClient = SpeechClient.create()) {
// Configure request with raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setEnableAutomaticPunctuation(true)
.build();

// Set the remote path for the audio file
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
speechClient.longRunningRecognizeAsync(config, audio);

while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(10000);
}

// Just print the first result here.
SpeechRecognitionResult result = response.get().getResultsList().get(0);

// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);

// Print out the result
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
// [END speech_async_recognize_gcs_punctuation]

// [START speech_stream_recognize_punctuation]
/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);

// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
try (SpeechClient speech = SpeechClient.create()) {

// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.setEnableAutomaticPunctuation(true)
.build();

// Build the streaming config with the audio config
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
.setConfig(recConfig)
.build();

class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();

@Override
public void onNext(T message) {
messages.add(message);
}

@Override
public void onError(Throwable t) {
future.setException(t);
}

@Override
public void onCompleted() {
future.set(messages);
}

// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}

ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
new ResponseApiStreamingObserver<>();

BidiStreamingCallable<StreamingRecognizeRequest, StreamingRecognizeResponse> callable =
speech.streamingRecognizeCallable();

ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
callable.bidiStreamingCall(responseObserver);

// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(config)
.build());

// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data))
.build());

// Mark transmission as completed after sending the data.
requestObserver.onCompleted();

List<StreamingRecognizeResponse> responses = responseObserver.future().get();

for (StreamingRecognizeResponse response : responses) {
// For streaming recognize, the results list has one is_final result (if available) followed
// by a number of in-progress results (if iterim_results is true) for subsequent utterances.
// Just print the first result here.
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}
}
// [END speech_stream_recognize_punctuation]
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,25 @@ public void testGcsModelSelection() throws Exception {
assertThat(got).contains("OK Google");
assertThat(got).contains("the weather outside is sunny");
}

@Test
public void testAutoPunctuation() throws Exception {
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
}

@Test
public void testGcsAutoPunctuation() throws Exception {
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
}

@Test
public void testStreamAutoPunctuation() throws Exception {
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
}
}