diff --git a/speech/README.md b/speech/README.md index dcc90673ae..19ec303cda 100644 --- a/speech/README.md +++ b/speech/README.md @@ -14,7 +14,7 @@ recognition technologies into developer applications. * [Setup](#setup) * [Samples](#samples) - * [Recognition](#recognition) + * [Recognize](#recognize) ## Setup @@ -28,32 +28,32 @@ recognition technologies into developer applications. ## Samples -### Recognition +### Recognize -View the [documentation][recognition_docs] or the [source code][recognition_code]. +View the [documentation][recognize_docs] or the [source code][recognize_code]. -__Run the sample:__ +__Usage:__ `node recognize --help` -Usage: `node recognize ` +``` +Commands: + sync Detects speech in an audio file. + async Creates a job to detect speech in an audio file, and waits for the job to complete. + stream Detects speech in an audio file by streaming it to the Speech API. + listen Detects speech in a microphone input stream. -Example: +Options: + --help Show help [boolean] - node recognize resources/audio.raw +Examples: + node recognize sync ./resources/audio.raw Detects speech in "./resources/audio.raw". + node recognize async ./resources/audio.raw Creates a job to detect speech in "./resources/audio.raw", and waits for + the job to complete. + node recognize stream ./resources/audio.raw Detects speech in "./resources/audio.raw" by streaming it to the Speech + API. + node recognize listen ./resources/audio.raw Detects speech in a microphone input stream. -[recognition_docs]: https://cloud.google.com/speech/ -[recognition_code]: recognize.js +For more information, see https://cloud.google.com/speech/docs +``` -### Recognition (Streaming) - -View the [documentation][recognition_streaming_docs] or the [source code][recognition_streaming_code]. - -__Run the sample:__ - -Usage: `node recognize_streaming ` - -Example: - - node recognize_streaming resources/audio.raw - -[recognition_streaming_docs]: https://cloud.google.com/speech/ -[recognition_streaming_code]: recognize_streaming.js +[recognize_docs]: https://cloud.google.com/speech/docs +[recognize_code]: recognize.js diff --git a/speech/package.json b/speech/package.json index c0df797e2d..37cafd9fdd 100644 --- a/speech/package.json +++ b/speech/package.json @@ -9,13 +9,14 @@ "system-test": "mocha -R spec -t 120000 --require intelli-espower-loader ../system-test/_setup.js system-test/*.test.js" }, "dependencies": { - "async": "^1.5.2", - "google-auto-auth": "^0.2.4", - "google-proto-files": "^0.3.0", - "googleapis": "^12.0.0", - "grpc": "^0.15.0" + "@google-cloud/speech": "^0.1.1", + "node-record-lpcm16": "^0.1.4", + "yargs": "^5.0.0" }, "devDependencies": { - "mocha": "^2.5.3" + "mocha": "^3.0.2" + }, + "engines": { + "node": ">=4.3.2" } } diff --git a/speech/recognize.js b/speech/recognize.js index 4d33f0a5a1..2dcccdc5f1 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -11,106 +11,140 @@ // See the License for the specific language governing permissions and // limitations under the License. -'use strict'; +/** + * This application demonstrates how to perform basic recognize operations with + * with the Google Cloud Speech API. + * + * For more information, see the README.md under /speech and the documentation + * at https://cloud.google.com/speech/docs. + */ -// [START app] -// [START import_libraries] -var google = require('googleapis'); -var async = require('async'); -var fs = require('fs'); +'use strict'; -// Get a reference to the speech service -var speech = google.speech('v1beta1').speech; -// [END import_libraries] +const fs = require('fs'); +const record = require('node-record-lpcm16'); +const speech = require('@google-cloud/speech')(); -// [START authenticating] -function getAuthClient (callback) { - // Acquire credentials - google.auth.getApplicationDefault(function (err, authClient) { +// [START speech_sync_recognize] +function syncRecognize (filename, callback) { + // Detect speech in the audio file, e.g. "./resources/audio.raw" + speech.recognize(filename, { + encoding: 'LINEAR16', + sampleRate: 16000 + }, (err, results) => { if (err) { - return callback(err); - } - - // The createScopedRequired method returns true when running on GAE or a - // local developer machine. In that case, the desired scopes must be passed - // in manually. When the code is running in GCE or a Managed VM, the scopes - // are pulled from the GCE metadata server. - // See https://cloud.google.com/compute/docs/authentication for more - // information. - if (authClient.createScopedRequired && authClient.createScopedRequired()) { - // Scopes can be specified either as an array or as a single, - // space-delimited string. - authClient = authClient.createScoped([ - 'https://www.googleapis.com/auth/cloud-platform' - ]); + callback(err); + return; } - return callback(null, authClient); + console.log('Results:', results); + callback(); }); } -// [END authenticating] +// [END speech_sync_recognize] -// [START construct_request] -function prepareRequest (inputFile, callback) { - fs.readFile(inputFile, function (err, audioFile) { +// [START speech_async_recognize] +function asyncRecognize (filename, callback) { + // Detect speech in the audio file, e.g. "./resources/audio.raw" + speech.startRecognition(filename, { + encoding: 'LINEAR16', + sampleRate: 16000 + }, (err, operation) => { if (err) { - return callback(err); + callback(err); + return; } - console.log('Got audio file!'); - var encoded = new Buffer(audioFile).toString('base64'); - var payload = { - config: { - encoding: 'LINEAR16', - sampleRate: 16000 - }, - audio: { - content: encoded - } - }; - return callback(null, payload); + + operation + .on('error', callback) + .on('complete', (results) => { + console.log('Results:', results); + callback(); + }); }); } -// [END construct_request] - -function main (inputFile, callback) { - var requestPayload; - - async.waterfall([ - function (cb) { - prepareRequest(inputFile, cb); - }, - function (payload, cb) { - requestPayload = payload; - getAuthClient(cb); - }, - // [START send_request] - function sendRequest (authClient, cb) { - console.log('Analyzing speech...'); - speech.syncrecognize({ - auth: authClient, - resource: requestPayload - }, function (err, result) { - if (err) { - return cb(err); - } - console.log('result:', JSON.stringify(result, null, 2)); - cb(null, result); - }); +// [END speech_async_recognize] + +// [START speech_streaming_recognize] +function streamingRecognize (filename, callback) { + const options = { + config: { + encoding: 'LINEAR16', + sampleRate: 16000 } - // [END send_request] - ], callback); + }; + + // Create a recognize stream + const recognizeStream = speech.createRecognizeStream(options) + .on('error', callback) + .on('data', (data) => { + console.log('Data received: %j', data); + callback(); + }); + + // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw" + fs.createReadStream(filename).pipe(recognizeStream); } +// [END speech_streaming_recognize] -// [START run_application] -if (module === require.main) { - if (process.argv.length < 3) { - console.log('Usage: node recognize '); - process.exit(); - } - var inputFile = process.argv[2]; - main(inputFile, console.log); +// [START speech_streaming_mic_recognize] +function streamingMicRecognize (filename) { + const options = { + config: { + encoding: 'LINEAR16', + sampleRate: 16000 + } + }; + + // Create a recognize stream + const recognizeStream = speech.createRecognizeStream(options) + .on('error', console.error) + .on('data', (data) => process.stdout.write(data.results)); + + // Start recording and send the microphone input to the Speech API + record.start({ sampleRate: 16000 }).pipe(recognizeStream); + + console.log('Listening, press Ctrl+C to stop.'); } -// [END run_application] -// [END app] +// [END speech_streaming_mic_recognize] + +// The command-line program +var cli = require('yargs'); +var utils = require('../utils'); -exports.main = main; +var program = module.exports = { + syncRecognize: syncRecognize, + asyncRecognize: asyncRecognize, + streamingRecognize: streamingRecognize, + streamingMicRecognize: streamingMicRecognize, + main: function (args) { + // Run the command-line program + cli.help().strict().parse(args).argv; + } +}; + +cli + .demand(1) + .command('sync ', 'Detects speech in an audio file.', {}, function (options) { + program.syncRecognize(options.filename, utils.makeHandler(false)); + }) + .command('async ', 'Creates a job to detect speech in an audio file, and waits for the job to complete.', {}, function (options) { + program.asyncRecognize(options.filename, utils.makeHandler(false)); + }) + .command('stream ', 'Detects speech in an audio file by streaming it to the Speech API.', {}, function (options) { + program.streamingRecognize(options.filename, utils.makeHandler(false)); + }) + .command('listen', 'Detects speech in a microphone input stream.', {}, function () { + program.streamingMicRecognize(); + }) + .example('node $0 sync ./resources/audio.raw', 'Detects speech in "./resources/audio.raw".') + .example('node $0 async ./resources/audio.raw', 'Creates a job to detect speech in "./resources/audio.raw", and waits for the job to complete.') + .example('node $0 stream ./resources/audio.raw', 'Detects speech in "./resources/audio.raw" by streaming it to the Speech API.') + .example('node $0 listen', 'Detects speech in a microphone input stream.') + .wrap(120) + .recommendCommands() + .epilogue('For more information, see https://cloud.google.com/speech/docs'); + +if (module === require.main) { + program.main(process.argv.slice(2)); +} diff --git a/speech/recognize_streaming.js b/speech/recognize_streaming.js deleted file mode 100644 index cf4afd59ad..0000000000 --- a/speech/recognize_streaming.js +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2016, Google, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -var async = require('async'); -var fs = require('fs'); -var path = require('path'); -var grpc = require('grpc'); -var googleProtoFiles = require('google-proto-files'); -var googleAuth = require('google-auto-auth'); -var Transform = require('stream').Transform; - -// [START proto] -var PROTO_ROOT_DIR = googleProtoFiles('..'); - -var protoDescriptor = grpc.load({ - root: PROTO_ROOT_DIR, - file: path.relative(PROTO_ROOT_DIR, googleProtoFiles.speech.v1beta1) -}, 'proto', { - binaryAsBase64: true, - convertFieldsToCamelCase: true -}); -var speechProto = protoDescriptor.google.cloud.speech.v1beta1; -// [END proto] - -// [START authenticating] -function getSpeechService (host, callback) { - var googleAuthClient = googleAuth({ - scopes: [ - 'https://www.googleapis.com/auth/cloud-platform' - ] - }); - - googleAuthClient.getAuthClient(function (err, authClient) { - if (err) { - return callback(err); - } - - var credentials = grpc.credentials.combineChannelCredentials( - grpc.credentials.createSsl(), - grpc.credentials.createFromGoogleCredential(authClient) - ); - - console.log('Loading speech service...'); - var stub = new speechProto.Speech(host, credentials); - return callback(null, stub); - }); -} -// [END authenticating] - -function main (inputFile, host, callback) { - async.waterfall([ - function (cb) { - getSpeechService(host, cb); - }, - // [START send_request] - function sendRequest (speechService, cb) { - console.log('Analyzing speech...'); - var responses = []; - var call = speechService.streamingRecognize(); - - // Listen for various responses - call.on('error', cb); - call.on('data', function (recognizeResponse) { - if (recognizeResponse) { - responses.push(recognizeResponse); - if (recognizeResponse.results && recognizeResponse.results.length) { - console.log(JSON.stringify(recognizeResponse.results, null, 2)); - } - } - }); - call.on('end', function () { - cb(null, responses); - }); - - // Write the initial recognize reqeust - call.write({ - streamingConfig: { - config: { - encoding: 'LINEAR16', - sampleRate: 16000 - }, - interimResults: false, - singleUtterance: false - } - }); - - var toRecognizeRequest = new Transform({ objectMode: true }); - toRecognizeRequest._transform = function (chunk, encoding, done) { - done(null, { - audioContent: chunk - }); - }; - - // Stream the audio to the Speech API - fs.createReadStream(inputFile) - .pipe(toRecognizeRequest) - .pipe(call); - } - // [END send_request] - ], callback); -} - -// [START run_application] -if (module === require.main) { - if (process.argv.length < 3) { - console.log('Usage: node recognize_streaming [speech_api_host]'); - process.exit(); - } - var inputFile = process.argv[2]; - var host = process.argv[3]; - main(inputFile, host || 'speech.googleapis.com', console.log); -} -// [END run_application] - -exports.main = main; diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js index 870caeebfa..944e61e932 100644 --- a/speech/system-test/recognize.test.js +++ b/speech/system-test/recognize.test.js @@ -13,23 +13,24 @@ 'use strict'; -var path = require('path'); -var recognizeExample = require('../recognize'); +const path = require(`path`); +const run = require(`../../utils`).run; -describe('speech:recognize', function () { - it('should recognize speech in audio', function (done) { - recognizeExample.main( - path.join(__dirname, '../resources/audio.raw'), - function (err, result) { - assert.ifError(err); - assert(result); - assert(Array.isArray(result.results)); - assert(result.results.length === 1); - assert(result.results[0].alternatives); - assert(console.log.calledWith('Got audio file!')); - assert(console.log.calledWith('Analyzing speech...')); - done(); - } - ); +const cmd = `node recognize.js`; +const cwd = path.join(__dirname, `..`); +const filename = `./resources/audio.raw`; +const text = `how old is the Brooklyn Bridge`; + +describe(`speech:recognize`, () => { + it(`should run sync recognize`, () => { + assert.equal(run(`${cmd} sync ${filename}`, cwd), `Results: ${text}`); + }); + + it(`should run async recognize`, () => { + assert.equal(run(`${cmd} async ${filename}`, cwd), `Results: ${text}`); + }); + + it(`should run streaming recognize`, () => { + assert.notEqual(run(`${cmd} stream ${filename}`, cwd).indexOf(text), -1); }); }); diff --git a/speech/system-test/recognize_streaming.test.js b/speech/system-test/recognize_streaming.test.js deleted file mode 100644 index b155cf426e..0000000000 --- a/speech/system-test/recognize_streaming.test.js +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2016, Google, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -var path = require('path'); -var recognizeExample = require('../recognize_streaming'); - -describe('speech:recognize_streaming', function () { - it('should recognize audio', function (done) { - recognizeExample.main( - path.join(__dirname, '../resources/audio.raw'), - process.env.SPEECH_API_HOST || 'speech.googleapis.com', - function (err, results) { - assert.ifError(err); - assert(results); - assert(results.length === 3); - assert(results[0].results); - assert(results[1].results); - assert(results[2].results); - assert(results[2].results.length === 1); - assert(console.log.calledWith('Loading speech service...')); - assert(console.log.calledWith('Analyzing speech...')); - done(); - } - ); - }); -}); diff --git a/speech/test/recognize.test.js b/speech/test/recognize.test.js index 00135c8b99..89747c0245 100644 --- a/speech/test/recognize.test.js +++ b/speech/test/recognize.test.js @@ -1,18 +1,40 @@ -// Copyright 2016, Google, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/** + * Copyright 2016, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ 'use strict'; -describe('speech:recognize', function () { - it('should be tested'); +const proxyquire = require(`proxyquire`).noCallThru(); + +describe(`speech:recognize`, () => { + it(`should handle errors`, () => { + const filename = `audio.raw`; + const error = new Error(`error`); + const callback = sinon.spy(); + const speechMock = { + recognize: sinon.stub().yields(error), + startRecognition: sinon.stub().yields(error) + }; + const SpeechMock = sinon.stub().returns(speechMock); + const program = proxyquire(`../recognize`, { + '@google-cloud/speech': SpeechMock + }); + + program.syncRecognize(filename, callback); + program.asyncRecognize(filename, callback); + + assert.equal(callback.callCount, 2); + assert.equal(callback.alwaysCalledWithExactly(error), true); + }); }); diff --git a/speech/test/recognize_streaming.test.js b/speech/test/recognize_streaming.test.js deleted file mode 100644 index 3d2836574c..0000000000 --- a/speech/test/recognize_streaming.test.js +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2016, Google, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -describe('speech:recognize_streaming', function () { - it('should be tested'); -}); diff --git a/utils/index.js b/utils/index.js index 40abf17fc2..05bd0affde 100644 --- a/utils/index.js +++ b/utils/index.js @@ -13,6 +13,12 @@ 'use strict'; +var execSync = require('child_process').execSync; + +exports.run = function (cmd, cwd) { + return execSync(cmd, { cwd: cwd }).toString().trim(); +}; + exports.pick = function (obj, field) { if (Array.isArray(field)) { var _obj = {}; @@ -34,6 +40,7 @@ exports.prettyPick = function (obj, field) { exports.makeHandler = function (print, field) { return function (err, result) { if (err) { + console.log(err); throw err; } if (print === false) {