Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Infinite streaming sample #1422

Merged
merged 7 commits into from
May 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import com.google.protobuf.Duration;
import java.lang.Math;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
Expand All @@ -39,11 +42,29 @@

public class InfiniteStreamRecognize {

private static final int STREAMING_LIMIT = 10000; // 10 seconds

public static final String RED = "\033[0;31m";
public static final String GREEN = "\033[0;32m";
public static final String YELLOW = "\033[0;33m";

// Creating shared object
private static volatile BlockingQueue<byte[]> sharedQueue = new LinkedBlockingQueue();
private static TargetDataLine targetDataLine;
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
private static int finalRequestEndTime = 0;
private static boolean newStream = true;
private static double bridgingOffset = 0;
private static boolean lastTranscriptWasFinal = false;
private static StreamController referenceToStreamController;
private static ByteString tempByteString;

public static void main(String... args) {
try {
infiniteStreamingRecognize();
Expand All @@ -60,6 +81,7 @@ class MicBuffer implements Runnable {

@Override
public void run() {
System.out.println(YELLOW);
System.out.println("Start speaking...Press Ctrl-C to stop");
targetDataLine.start();
byte[] data = new byte[BYTES_PER_BUFFER];
Expand Down Expand Up @@ -88,24 +110,48 @@ public void run() {

ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}
public void onStart(StreamController controller) {
referenceToStreamController = controller;
}

public void onResponse(StreamingRecognizeResponse response) {

responses.add(response);

StreamingRecognitionResult result = response.getResultsList().get(0);
// There can be several alternative transcripts for a given chunk of speech. Just
// use the first (most likely) one here.

Duration resultEndTime = result.getResultEndTime();

resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));

double correctedTime = resultEndTimeInMS - bridgingOffset
+ (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");

SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime),
alternative.getTranscript());

public void onComplete() {
System.out.println("Done");
}
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime),
alternative.getTranscript());

public void onError(Throwable t) {
System.out.println(t);
lastTranscriptWasFinal = false;
}
}

public void onComplete() {}

public void onError(Throwable t) {}

};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
Expand All @@ -116,8 +162,12 @@ public void onError(Throwable t) {
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();

StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
StreamingRecognitionConfig.newBuilder()
.setConfig(recognitionConfig)
.setInterimResults(true)
.build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
Expand Down Expand Up @@ -151,23 +201,84 @@ public void onError(Throwable t) {

long estimatedTime = System.currentTimeMillis() - startTime;

if (estimatedTime >= 55000) {
if (estimatedTime >= STREAMING_LIMIT) {

clientStream.closeSend();
referenceToStreamController.cancel(); // remove Observer

if (resultEndTimeInMS > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTimeInMS = 0;

lastAudioInput = null;
lastAudioInput = audioInput;
audioInput = new ArrayList<ByteString>();

restartCounter++;

if (!lastTranscriptWasFinal) {
System.out.print('\n');
}

newStream = true;

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();

System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);

startTime = System.currentTimeMillis();

} else {

if ((newStream) && (lastAudioInput.size() > 0)) {
// if this is the first audio from a new request
// calculate amount of unfinalized audio from last request
// resend the audio to the speech client before incoming audio
double chunkTime = STREAMING_LIMIT / lastAudioInput.size();
// ms length of each chunk in previous request audio arrayList
if (chunkTime != 0) {
if (bridgingOffset < 0) {
// bridging Offset accounts for time of resent audio
// calculated from last request
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime
blechdom marked this conversation as resolved.
Show resolved Hide resolved
- bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size()
- chunksFromMS) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(lastAudioInput.get(i))
.build();
clientStream.send(request);
}
}
newStream = false;
}

tempByteString = ByteString.copyFrom(sharedQueue.take());

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(sharedQueue.take()))
.setAudioContent(tempByteString)
.build();

audioInput.add(tempByteString);

}

clientStream.send(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,6 @@ public void onError(Throwable t) {
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
}
request =
StreamingRecognizeRequest.newBuilder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,22 @@ public void testStreamRecognize() throws Exception {
@Test
public void testAutoPunctuation() throws Exception {
Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@blechdom Hey, sorry just seeing this, can you revert these test changes?

These are testing the Auto Punctuation API and should be verifying capitalization and symbols. Otherwise, we need to notify the product team of an issue on their backend.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nnegrey Sorry, that was my fault. I pushed the changes to unblock the tests. Is it just the Auto Punctuation API you want reverted?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, are they failing? I can go check with the product team if they made a change to something.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed a bug, if they respond and fix, I can revert the samples to the old state and leave them as is for now.

Thanks!

}

@Test
public void testGcsAutoPunctuation() throws Exception {
Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
}

@Test
public void testStreamAutoPunctuation() throws Exception {
Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
String got = bout.toString();
assertThat(got).contains("How old is the Brooklyn Bridge?");
String got = bout.toString().toLowerCase();
assertThat(got).contains("how old is the brooklyn bridge");
}

@Test
Expand Down