diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md
index 83565a21e04..aa07100c8dd 100644
--- a/speech/cloud-client/README.md
+++ b/speech/cloud-client/README.md
@@ -45,3 +45,15 @@ Build your project with:
java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
com.example.speech.Recognize asyncrecognize gs://cloud-samples-tests/speech/vr.flac
```
+
+### Synchronously transcribe an audio file and print word offsets
+```
+ java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
+ com.example.speech.Recognize wordoffsets ./resources/audio.raw
+```
+
+### Asynchronously transcribe a remote audio file and print word offsets
+```
+ java -cp target/speech-google-cloud-samples-1.0.0-jar-with-dependencies.jar \
+ com.example.speech.Recognize wordoffsets gs://cloud-samples-tests/speech/vr.flac
+```
diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml
index b887baeae18..9d097a566d1 100644
--- a/speech/cloud-client/pom.xml
+++ b/speech/cloud-client/pom.xml
@@ -21,9 +21,10 @@
- com.google.cloud.samples
- shared-configuration
- 1.0.5
+ doc-samples
+ com.google.cloud
+ 1.0.0
+ ../..
diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index 619c402c49e..c571ad6714e 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -50,7 +50,7 @@ public static void main(String... args) throws Exception {
System.out.printf(
"\tjava %s \"\" \"\"\n"
+ "Commands:\n"
- + "\tsyncrecognize | asyncrecognize | streamrecognize\n"
+ + "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
@@ -66,6 +66,12 @@ public static void main(String... args) throws Exception {
} else {
syncRecognizeFile(path);
}
+ } else if (command.equals("wordoffsets")) {
+ if (path.startsWith("gs://")) {
+ asyncRecognizeWords(path);
+ } else {
+ syncRecognizeWords(path);
+ }
} else if (command.equals("asyncrecognize")) {
if (path.startsWith("gs://")) {
asyncRecognizeGcs(path);
@@ -113,6 +119,51 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
speech.close();
}
+ /**
+ * Performs sync recognize and prints word time offsets.
+ *
+ * @param fileName the path to a PCM audio file to transcribe get offsets on.
+ */
+ public static void syncRecognizeWords(String fileName) throws Exception, IOException {
+ SpeechClient speech = SpeechClient.create();
+
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+ ByteString audioBytes = ByteString.copyFrom(data);
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig config = RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableWordTimeOffsets(true)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder()
+ .setContent(audioBytes)
+ .build();
+
+ // Use blocking call to get audio transcript
+ RecognizeResponse response = speech.recognize(config, audio);
+ List results = response.getResultsList();
+
+ for (SpeechRecognitionResult result: results) {
+ List alternatives = result.getAlternativesList();
+ for (SpeechRecognitionAlternative alternative: alternatives) {
+ System.out.printf("Transcription: %s%n", alternative.getTranscript());
+ for (WordInfo wordInfo: alternative.getWordsList()) {
+ System.out.println(wordInfo.getWord());
+ System.out.printf("\t%s.%s sec - %s.%s sec\n",
+ wordInfo.getStartTime().getSeconds(),
+ wordInfo.getStartTime().getNanos() / 100000000,
+ wordInfo.getEndTime().getSeconds(),
+ wordInfo.getEndTime().getNanos() / 100000000);
+ }
+ }
+ }
+ speech.close();
+ }
+
+
/**
* Performs speech recognition on remote FLAC file and prints the transcription.
*
@@ -193,11 +244,11 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
/**
* Performs non-blocking speech recognition on remote FLAC file and prints
- * the transcription.
+ * the transcription as well as word time offsets.
*
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
- public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
+ public static void asyncRecognizeWords(String gcsUri) throws Exception, IOException {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();
@@ -240,6 +291,47 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
speech.close();
}
+ /**
+ * Performs non-blocking speech recognition on remote FLAC file and prints
+ * the transcription.
+ *
+ * @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
+ */
+ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ SpeechClient speech = SpeechClient.create();
+
+ // Configure remote file request for Linear16
+ RecognitionConfig config = RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .build();
+ RecognitionAudio audio = RecognitionAudio.newBuilder()
+ .setUri(gcsUri)
+ .build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speech.longRunningRecognizeAsync(config, audio);
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ List results = response.get().getResultsList();
+
+ for (SpeechRecognitionResult result: results) {
+ List alternatives = result.getAlternativesList();
+ for (SpeechRecognitionAlternative alternative: alternatives) {
+ System.out.printf("Transcription: %s\n",alternative.getTranscript());
+ }
+ }
+ speech.close();
+ }
+
+
/**
* Performs streaming speech recognition on raw PCM audio data.
*
diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
index c2417fe7cfc..7e2c4862fda 100644
--- a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
+++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
@@ -62,6 +62,14 @@ public void testRecognizeFile() throws Exception {
assertThat(got).contains("how old is the Brooklyn Bridge");
}
+ @Test
+ public void testRecognizeWordoffset() throws Exception {
+ Recognize.syncRecognizeWords(fileName);
+ String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
+ assertThat(got).contains("\t0.0 sec -");
+ }
+
@Test
public void testRecognizeGcs() throws Exception {
Recognize.syncRecognizeGcs(gcsPath);
@@ -85,8 +93,9 @@ public void testAsyncRecognizeGcs() throws Exception {
@Test
public void testAsyncWordoffset() throws Exception {
- Recognize.asyncRecognizeGcs(gcsPath);
+ Recognize.asyncRecognizeWords(gcsPath);
String got = bout.toString();
+ assertThat(got).contains("how old is the Brooklyn Bridge");
assertThat(got).contains("\t0.0 sec -");
}