Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infinite Stream recognition #1297

Merged
merged 4 commits into from
Dec 14, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions speech/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,5 +135,33 @@
</plugins>
</build>
</profile>
<profile>
<id>InfiniteStreamRecognize</id>
<activation>
<property>
<name>InfiniteStreamRecognize</name>
</property>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<executions>
<execution>
<goals>
<goal>java</goal>
</goals>
</execution>
</executions>
<configuration>
<mainClass>com.example.speech.InfiniteStreamRecognize</mainClass>
<cleanupDaemonThreads>false</cleanupDaemonThreads>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
/*
* Copyright 2018 Google Inc.
nirupa-kumar marked this conversation as resolved.
Show resolved Hide resolved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

// [START speech_transcribe_infinite_streaming]
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.DataLine.Info;
import javax.sound.sampled.TargetDataLine;

public class InfiniteStreamRecognize {

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

public static void main(String... args) {
try {
infiniteStreamingRecognize();
} catch (Exception e) {
System.out.println("Exception caught: " + e);
}
}

/** Performs infinite streaming speech recognition */
public static void infiniteStreamingRecognize() throws Exception {

// Microphone Input buffering
class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
while (targetDataLine.isOpen()) {
try {
int numBytesRead = targetDataLine.read(data, 0, data.length);
if ((numBytesRead <= 0) && (targetDataLine.isOpen())) {
continue;
}
sharedQueue.put(data.clone());
} catch (InterruptedException e) {
System.out.println("Microphone input buffering interrupted : " + e.getMessage());
}
}
}
}

// Creating microphone input buffer thread
MicBuffer micrunnable = new MicBuffer();
Thread micThread = new Thread(micrunnable);
ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {
ClientStream<StreamingRecognizeRequest> clientStream;
responseObserver =
new ResponseObserver<StreamingRecognizeResponse>() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}

public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}

public void onComplete() {
System.out.println("Done");
}

public void onError(Throwable t) {
System.out.println(t);
}
};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

RecognitionConfig recognitionConfig =
RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build(); // The first request in a streaming call has to be a config

clientStream.send(request);

try {
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo =
new Info(
TargetDataLine.class,
audioFormat); // Set the system information to read from the microphone audio
// stream

if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
micThread.start();

long startTime = System.currentTimeMillis();

while (true) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {

clientStream.closeSend();
clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

startTime = System.currentTimeMillis();

} else {
request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.build();
}

clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
}
}
}
// [END speech_transcribe_infinite_streaming]
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import java.util.ArrayList;
import java.util.List;

import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
nirupa-kumar marked this conversation as resolved.
Show resolved Hide resolved
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
Expand All @@ -64,7 +66,7 @@ public static void main(String... args) throws Exception {
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ "\t| wordoffsets | auto-punctuation | stream-punctuation \n"
+ "\t| enhanced-model | model-selection\n"
+ "\t| enhanced-model | model-selection \n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -830,4 +832,5 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
}
// [END speech_transcribe_model_selection_gcs]

}