From dabc77edb2dbcc5ca8e362c4e5a6fb366888f597 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Thu, 6 Apr 2017 14:52:15 -0700 Subject: [PATCH 1/4] Update samples to match new Speech API --- package.json | 2 +- speech/package.json | 2 +- speech/quickstart.js | 5 +- speech/recognize.js | 95 ++++++++++++++++++--------- speech/system-test/quickstart.test.js | 3 +- 5 files changed, 70 insertions(+), 37 deletions(-) diff --git a/package.json b/package.json index 3ff76bd581..1e345cdc57 100644 --- a/package.json +++ b/package.json @@ -83,7 +83,7 @@ "@google-cloud/pubsub": "0.8.0", "@google-cloud/resource": "0.6.0", "@google-cloud/spanner": "0.1.0", - "@google-cloud/speech": "0.6.0", + "@google-cloud/speech": "0.8.0", "@google-cloud/storage": "0.7.0", "@google-cloud/translate": "0.8.0", "@google-cloud/videointelligence": "https://storage.googleapis.com/videointelligence-alpha/videointelligence-nodejs.tar.gz", diff --git a/speech/package.json b/speech/package.json index d1c6f0ba65..2284446be3 100644 --- a/speech/package.json +++ b/speech/package.json @@ -8,7 +8,7 @@ "test": "cd ..; npm run st -- --verbose speech/system-test/*.test.js" }, "dependencies": { - "@google-cloud/speech": "0.6.0", + "@google-cloud/speech": "0.8.0", "@google-cloud/storage": "0.7.0", "node-record-lpcm16": "0.2.0", "yargs": "6.6.0" diff --git a/speech/quickstart.js b/speech/quickstart.js index bc2cde2bd4..48c58b2c1d 100644 --- a/speech/quickstart.js +++ b/speech/quickstart.js @@ -30,10 +30,11 @@ const speechClient = Speech({ // The name of the audio file to transcribe const fileName = './resources/audio.raw'; -// The audio file's encoding and sample rate +// The audio file's encoding, sample rate in hertz, and BCP-47 language code const options = { encoding: 'LINEAR16', - sampleRate: 16000 + sampleRateHertz: 16000, + languageCode: 'en-US' }; // Detects speech in the audio file diff --git a/speech/recognize.js b/speech/recognize.js index ce17701b2b..dcf56d3083 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -23,7 +23,7 @@ 'use strict'; -function syncRecognize (filename, encoding, sampleRate) { +function syncRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_sync_recognize] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); @@ -37,12 +37,16 @@ function syncRecognize (filename, encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode }; // Detects speech in the audio file @@ -55,7 +59,7 @@ function syncRecognize (filename, encoding, sampleRate) { // [END speech_sync_recognize] } -function syncRecognizeGCS (gcsUri, encoding, sampleRate) { +function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [START speech_sync_recognize_gcs] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); @@ -69,12 +73,16 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode }; // Detects speech in the audio file @@ -87,7 +95,7 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRate) { // [END speech_sync_recognize_gcs] } -function asyncRecognize (filename, encoding, sampleRate) { +function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); @@ -101,12 +109,16 @@ function asyncRecognize (filename, encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode }; // Detects speech in the audio file. This creates a recognition job that you @@ -123,7 +135,7 @@ function asyncRecognize (filename, encoding, sampleRate) { // [END speech_async_recognize] } -function asyncRecognizeGCS (gcsUri, encoding, sampleRate) { +function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize_gcs] // Imports the Google Cloud client library const Speech = require('@google-cloud/speech'); @@ -137,12 +149,16 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode }; // Detects speech in the audio file. This creates a recognition job that you @@ -159,7 +175,7 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRate) { // [END speech_async_recognize_gcs] } -function streamingRecognize (filename, encoding, sampleRate) { +function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) { // [START speech_streaming_recognize] const fs = require('fs'); @@ -175,13 +191,17 @@ function streamingRecognize (filename, encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { config: { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode } }; @@ -197,7 +217,7 @@ function streamingRecognize (filename, encoding, sampleRate) { // [END speech_streaming_recognize] } -function streamingMicRecognize (encoding, sampleRate) { +function streamingMicRecognize (encoding, sampleRateHertz, languageCode) { // [START speech_streaming_mic_recognize] const record = require('node-record-lpcm16'); @@ -210,13 +230,17 @@ function streamingMicRecognize (encoding, sampleRate) { // The encoding of the audio file, e.g. 'LINEAR16' // const encoding = 'LINEAR16'; - // The sample rate of the audio file, e.g. 16000 - // const sampleRate = 16000; + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; const request = { config: { encoding: encoding, - sampleRate: sampleRate + sampleRateHertz: sampleRateHertz, + languageCode: languageCode } }; @@ -227,7 +251,7 @@ function streamingMicRecognize (encoding, sampleRate) { // Start recording and send the microphone input to the Speech API record.start({ - sampleRate: sampleRate, + sampleRateHertz: sampleRateHertz, threshold: 0 }).pipe(recognizeStream); @@ -241,37 +265,37 @@ require(`yargs`) `sync `, `Detects speech in a local audio file.`, {}, - (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate) + (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .command( `sync-gcs `, `Detects speech in an audio file located in a Google Cloud Storage bucket.`, {}, - (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) + (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .command( `async `, `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, {}, - (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate) + (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .command( `async-gcs `, `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, {}, - (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) + (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .command( `stream `, `Detects speech in a local audio file by streaming it to the Speech API.`, {}, - (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate) + (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .command( `listen`, `Detects speech in a microphone input stream.`, {}, - (opts) => streamingMicRecognize(opts.encoding, opts.sampleRate) + (opts) => streamingMicRecognize(opts.encoding, opts.sampleRateHertz, opts.languageCode) ) .options({ encoding: { @@ -281,12 +305,19 @@ require(`yargs`) requiresArg: true, type: 'string' }, - sampleRate: { + sampleRateHertz: { alias: 'r', default: 16000, global: true, requiresArg: true, type: 'number' + }, + languageCode: { + alias: 'l', + default: 'en-US', + global: true, + requiresArg: true, + type: 'string' } }) .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`) diff --git a/speech/system-test/quickstart.test.js b/speech/system-test/quickstart.test.js index 1b0407e6cf..c3f5c607eb 100644 --- a/speech/system-test/quickstart.test.js +++ b/speech/system-test/quickstart.test.js @@ -24,7 +24,8 @@ const speech = proxyquire(`@google-cloud/speech`, {})(); const fileName = path.join(__dirname, `../resources/audio.raw`); const config = { encoding: `LINEAR16`, - sampleRate: 16000 + sampleRateHertz: 16000, + languageCode: `en-US` }; test.before(stubConsole); From 2d5dd9d112db0267a549f7147a22ef7672de708b Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Sun, 9 Apr 2017 23:59:35 -0700 Subject: [PATCH 2/4] Remove asyncRecognizeLocal --- speech/README.md | 1 - speech/recognize.js | 46 ---------------------------- speech/system-test/recognize.test.js | 5 --- 3 files changed, 52 deletions(-) diff --git a/speech/README.md b/speech/README.md index e568e3f053..88caa26d65 100644 --- a/speech/README.md +++ b/speech/README.md @@ -35,7 +35,6 @@ __Usage:__ `node recognize.js --help` Commands: sync Detects speech in a local audio file. sync-gcs Detects speech in an audio file located in a Google Cloud Storage bucket. - async Creates a job to detect speech in a local audio file, and waits for the job to complete. async-gcs Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete. stream Detects speech in a local audio file by streaming it to the Speech API. diff --git a/speech/recognize.js b/speech/recognize.js index dcf56d3083..1e6a0df3c9 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -95,46 +95,6 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [END speech_sync_recognize_gcs] } -function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { - // [START speech_async_recognize] - // Imports the Google Cloud client library - const Speech = require('@google-cloud/speech'); - - // Instantiates a client - const speech = Speech(); - - // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw - // const filename = '/path/to/audio.raw'; - - // The encoding of the audio file, e.g. 'LINEAR16' - // const encoding = 'LINEAR16'; - - // The sample rate of the audio file in hertz, e.g. 16000 - // const sampleRateHertz = 16000; - - // The BCP-47 language code to use, e.g. 'en-US' - // const languageCode = 'en-US'; - - const request = { - encoding: encoding, - sampleRateHertz: sampleRateHertz, - languageCode: languageCode - }; - - // Detects speech in the audio file. This creates a recognition job that you - // can wait for now, or get its result later. - speech.startRecognition(filename, request) - .then((results) => { - const operation = results[0]; - // Get a Promise represention of the final result of the job - return operation.promise(); - }) - .then((transcription) => { - console.log(`Transcription: ${transcription}`); - }); - // [END speech_async_recognize] -} - function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize_gcs] // Imports the Google Cloud client library @@ -273,12 +233,6 @@ require(`yargs`) {}, (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) - .command( - `async `, - `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, - {}, - (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) - ) .command( `async-gcs `, `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 9ba06fd864..50f0e221f2 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -50,11 +50,6 @@ test(`should run sync recognize on a GCS file`, async (t) => { t.true(output.includes(`Transcription: ${text}`)); }); -test(`should run async recognize on a local file`, async (t) => { - const output = await runAsync(`${cmd} async ${filepath}`, cwd); - t.true(output.includes(`Transcription: ${text}`)); -}); - test(`should run async recognize on a GCS file`, async (t) => { const output = await runAsync(`${cmd} async-gcs gs://${bucketName}/${filename}`, cwd); t.true(output.includes(`Transcription: ${text}`)); From e5e5ecdb2e4860867a9bfb171758a717441d94b5 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Tue, 11 Apr 2017 10:08:22 -0700 Subject: [PATCH 3/4] Revert "Remove asyncRecognizeLocal", since the prod/eng team added it back This reverts commit 2d5dd9d112db0267a549f7147a22ef7672de708b. --- speech/README.md | 1 + speech/recognize.js | 46 ++++++++++++++++++++++++++++ speech/system-test/recognize.test.js | 5 +++ 3 files changed, 52 insertions(+) diff --git a/speech/README.md b/speech/README.md index 88caa26d65..e568e3f053 100644 --- a/speech/README.md +++ b/speech/README.md @@ -35,6 +35,7 @@ __Usage:__ `node recognize.js --help` Commands: sync Detects speech in a local audio file. sync-gcs Detects speech in an audio file located in a Google Cloud Storage bucket. + async Creates a job to detect speech in a local audio file, and waits for the job to complete. async-gcs Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete. stream Detects speech in a local audio file by streaming it to the Speech API. diff --git a/speech/recognize.js b/speech/recognize.js index 1e6a0df3c9..dcf56d3083 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -95,6 +95,46 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [END speech_sync_recognize_gcs] } +function asyncRecognize (filename, encoding, sampleRateHertz, languageCode) { + // [START speech_async_recognize] + // Imports the Google Cloud client library + const Speech = require('@google-cloud/speech'); + + // Instantiates a client + const speech = Speech(); + + // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw + // const filename = '/path/to/audio.raw'; + + // The encoding of the audio file, e.g. 'LINEAR16' + // const encoding = 'LINEAR16'; + + // The sample rate of the audio file in hertz, e.g. 16000 + // const sampleRateHertz = 16000; + + // The BCP-47 language code to use, e.g. 'en-US' + // const languageCode = 'en-US'; + + const request = { + encoding: encoding, + sampleRateHertz: sampleRateHertz, + languageCode: languageCode + }; + + // Detects speech in the audio file. This creates a recognition job that you + // can wait for now, or get its result later. + speech.startRecognition(filename, request) + .then((results) => { + const operation = results[0]; + // Get a Promise represention of the final result of the job + return operation.promise(); + }) + .then((transcription) => { + console.log(`Transcription: ${transcription}`); + }); + // [END speech_async_recognize] +} + function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) { // [START speech_async_recognize_gcs] // Imports the Google Cloud client library @@ -233,6 +273,12 @@ require(`yargs`) {}, (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode) ) + .command( + `async `, + `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, + {}, + (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode) + ) .command( `async-gcs `, `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 50f0e221f2..9ba06fd864 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -50,6 +50,11 @@ test(`should run sync recognize on a GCS file`, async (t) => { t.true(output.includes(`Transcription: ${text}`)); }); +test(`should run async recognize on a local file`, async (t) => { + const output = await runAsync(`${cmd} async ${filepath}`, cwd); + t.true(output.includes(`Transcription: ${text}`)); +}); + test(`should run async recognize on a GCS file`, async (t) => { const output = await runAsync(`${cmd} async-gcs gs://${bucketName}/${filename}`, cwd); t.true(output.includes(`Transcription: ${text}`)); From 88c00cc5f7d5483f02e5dab9cfef7854cb4aecb8 Mon Sep 17 00:00:00 2001 From: Ace Nassri Date: Tue, 11 Apr 2017 10:14:57 -0700 Subject: [PATCH 4/4] Update dependencies --- package.json | 2 +- speech/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 1e345cdc57..0945f08e18 100644 --- a/package.json +++ b/package.json @@ -83,7 +83,7 @@ "@google-cloud/pubsub": "0.8.0", "@google-cloud/resource": "0.6.0", "@google-cloud/spanner": "0.1.0", - "@google-cloud/speech": "0.8.0", + "@google-cloud/speech": "0.9.0", "@google-cloud/storage": "0.7.0", "@google-cloud/translate": "0.8.0", "@google-cloud/videointelligence": "https://storage.googleapis.com/videointelligence-alpha/videointelligence-nodejs.tar.gz", diff --git a/speech/package.json b/speech/package.json index 2284446be3..e090223433 100644 --- a/speech/package.json +++ b/speech/package.json @@ -8,7 +8,7 @@ "test": "cd ..; npm run st -- --verbose speech/system-test/*.test.js" }, "dependencies": { - "@google-cloud/speech": "0.8.0", + "@google-cloud/speech": "0.9.0", "@google-cloud/storage": "0.7.0", "node-record-lpcm16": "0.2.0", "yargs": "6.6.0"