From b4c64887f5a348ab1da1e115cc42272c4e679fb1 Mon Sep 17 00:00:00 2001 From: Nirupa Anantha Kumar Date: Fri, 14 Dec 2018 07:56:50 -0800 Subject: [PATCH] Infinite Stream recognition (#1297) * Infinite Stream recognition * Infinite Stream recognition - Updates after review * Infinite Stream recognition - Updates after review * Infinite Stream recognition - Updates after review --- speech/cloud-client/pom.xml | 30 ++- .../speech/InfiniteStreamRecognize.java | 181 ++++++++++++++++++ 2 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 671921c11e9..3ab052ac99c 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -26,7 +26,7 @@ com.google.cloud.samples shared-configuration - 1.0.9 + 1.0.10 @@ -135,5 +135,33 @@ + + InfiniteStreamRecognize + + + InfiniteStreamRecognize + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + java + + + + + com.example.speech.InfiniteStreamRecognize + false + + + + + diff --git a/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java new file mode 100644 index 00000000000..7844e23af5e --- /dev/null +++ b/speech/cloud-client/src/main/java/com/example/speech/InfiniteStreamRecognize.java @@ -0,0 +1,181 @@ +/* + * Copyright 2018 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_infinite_streaming] +import com.google.api.gax.rpc.ClientStream; +import com.google.api.gax.rpc.ResponseObserver; +import com.google.api.gax.rpc.StreamController; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1.StreamingRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1.StreamingRecognizeResponse; +import com.google.protobuf.ByteString; +import java.util.ArrayList; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.DataLine.Info; +import javax.sound.sampled.TargetDataLine; + +public class InfiniteStreamRecognize { + + // Creating shared object + private static volatile BlockingQueue sharedQueue = new LinkedBlockingQueue(); + private static TargetDataLine targetDataLine; + private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes + + public static void main(String... args) { + try { + infiniteStreamingRecognize(); + } catch (Exception e) { + System.out.println("Exception caught: " + e); + } + } + + /** Performs infinite streaming speech recognition */ + public static void infiniteStreamingRecognize() throws Exception { + + // Microphone Input buffering + class MicBuffer implements Runnable { + + @Override + public void run() { + System.out.println("Start speaking...Press Ctrl-C to stop"); + targetDataLine.start(); + byte[] data = new byte[BYTES_PER_BUFFER]; + while (targetDataLine.isOpen()) { + try { + int numBytesRead = targetDataLine.read(data, 0, data.length); + if ((numBytesRead <= 0) && (targetDataLine.isOpen())) { + continue; + } + sharedQueue.put(data.clone()); + } catch (InterruptedException e) { + System.out.println("Microphone input buffering interrupted : " + e.getMessage()); + } + } + } + } + + // Creating microphone input buffer thread + MicBuffer micrunnable = new MicBuffer(); + Thread micThread = new Thread(micrunnable); + ResponseObserver responseObserver = null; + try (SpeechClient client = SpeechClient.create()) { + ClientStream clientStream; + responseObserver = + new ResponseObserver() { + + ArrayList responses = new ArrayList<>(); + + public void onStart(StreamController controller) {} + + public void onResponse(StreamingRecognizeResponse response) { + responses.add(response); + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just + // use the first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + + public void onComplete() { + System.out.println("Done"); + } + + public void onError(Throwable t) { + System.out.println(t); + } + }; + + clientStream = client.streamingRecognizeCallable().splitCall(responseObserver); + + RecognitionConfig recognitionConfig = + RecognitionConfig.newBuilder() + .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .build(); + StreamingRecognitionConfig streamingRecognitionConfig = + StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build(); + + StreamingRecognizeRequest request = + StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingRecognitionConfig) + .build(); // The first request in a streaming call has to be a config + + clientStream.send(request); + + try { + // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true, + // bigEndian: false + AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false); + DataLine.Info targetInfo = + new Info( + TargetDataLine.class, + audioFormat); // Set the system information to read from the microphone audio + // stream + + if (!AudioSystem.isLineSupported(targetInfo)) { + System.out.println("Microphone not supported"); + System.exit(0); + } + // Target data line captures the audio stream the microphone produces. + targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo); + targetDataLine.open(audioFormat); + micThread.start(); + + long startTime = System.currentTimeMillis(); + + while (true) { + + long estimatedTime = System.currentTimeMillis() - startTime; + + if (estimatedTime >= 55000) { + + clientStream.closeSend(); + clientStream = client.streamingRecognizeCallable().splitCall(responseObserver); + + request = + StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingRecognitionConfig) + .build(); + + startTime = System.currentTimeMillis(); + + } else { + request = + StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(sharedQueue.take())) + .build(); + } + + clientStream.send(request); + } + } catch (Exception e) { + System.out.println(e); + } + } + } +} +// [END speech_transcribe_infinite_streaming]