Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"@google-cloud/pubsub": "0.8.0",
"@google-cloud/resource": "0.6.0",
"@google-cloud/spanner": "0.1.0",
"@google-cloud/speech": "0.6.0",
"@google-cloud/speech": "0.8.0",
"@google-cloud/storage": "0.7.0",
"@google-cloud/translate": "0.8.0",
"@google-cloud/videointelligence": "https://storage.googleapis.com/videointelligence-alpha/videointelligence-nodejs.tar.gz",
Expand Down
1 change: 0 additions & 1 deletion speech/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ __Usage:__ `node recognize.js --help`
Commands:
sync <filename> Detects speech in a local audio file.
sync-gcs <gcsUri> Detects speech in an audio file located in a Google Cloud Storage bucket.
async <filename> Creates a job to detect speech in a local audio file, and waits for the job to complete.
async-gcs <gcsUri> Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and
waits for the job to complete.
stream <filename> Detects speech in a local audio file by streaming it to the Speech API.
Expand Down
2 changes: 1 addition & 1 deletion speech/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"test": "cd ..; npm run st -- --verbose speech/system-test/*.test.js"
},
"dependencies": {
"@google-cloud/speech": "0.6.0",
"@google-cloud/speech": "0.8.0",
"@google-cloud/storage": "0.7.0",
"node-record-lpcm16": "0.2.0",
"yargs": "6.6.0"
Expand Down
5 changes: 3 additions & 2 deletions speech/quickstart.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ const speechClient = Speech({
// The name of the audio file to transcribe
const fileName = './resources/audio.raw';

// The audio file's encoding and sample rate
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const options = {
encoding: 'LINEAR16',
sampleRate: 16000
sampleRateHertz: 16000,
languageCode: 'en-US'
};

// Detects speech in the audio file
Expand Down
123 changes: 54 additions & 69 deletions speech/recognize.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

'use strict';

function syncRecognize (filename, encoding, sampleRate) {
function syncRecognize (filename, encoding, sampleRateHertz, languageCode) {
// [START speech_sync_recognize]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');
Expand All @@ -37,12 +37,16 @@ function syncRecognize (filename, encoding, sampleRate) {
// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;
// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
encoding: encoding,
sampleRate: sampleRate
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};

// Detects speech in the audio file
Expand All @@ -55,7 +59,7 @@ function syncRecognize (filename, encoding, sampleRate) {
// [END speech_sync_recognize]
}

function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
function syncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
// [START speech_sync_recognize_gcs]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');
Expand All @@ -69,12 +73,16 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;
// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
encoding: encoding,
sampleRate: sampleRate
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};

// Detects speech in the audio file
Expand All @@ -87,43 +95,7 @@ function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
// [END speech_sync_recognize_gcs]
}

function asyncRecognize (filename, encoding, sampleRate) {
// [START speech_async_recognize]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');

// Instantiates a client
const speech = Speech();

// The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
// const filename = '/path/to/audio.raw';

// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;

const request = {
encoding: encoding,
sampleRate: sampleRate
};

// Detects speech in the audio file. This creates a recognition job that you
// can wait for now, or get its result later.
speech.startRecognition(filename, request)
.then((results) => {
const operation = results[0];
// Get a Promise represention of the final result of the job
return operation.promise();
})
.then((transcription) => {
console.log(`Transcription: ${transcription}`);
});
// [END speech_async_recognize]
}

function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
function asyncRecognizeGCS (gcsUri, encoding, sampleRateHertz, languageCode) {
// [START speech_async_recognize_gcs]
// Imports the Google Cloud client library
const Speech = require('@google-cloud/speech');
Expand All @@ -137,12 +109,16 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;
// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
encoding: encoding,
sampleRate: sampleRate
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
};

// Detects speech in the audio file. This creates a recognition job that you
Expand All @@ -159,7 +135,7 @@ function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
// [END speech_async_recognize_gcs]
}

function streamingRecognize (filename, encoding, sampleRate) {
function streamingRecognize (filename, encoding, sampleRateHertz, languageCode) {
// [START speech_streaming_recognize]
const fs = require('fs');

Expand All @@ -175,13 +151,17 @@ function streamingRecognize (filename, encoding, sampleRate) {
// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;
// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
config: {
encoding: encoding,
sampleRate: sampleRate
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
}
};

Expand All @@ -197,7 +177,7 @@ function streamingRecognize (filename, encoding, sampleRate) {
// [END speech_streaming_recognize]
}

function streamingMicRecognize (encoding, sampleRate) {
function streamingMicRecognize (encoding, sampleRateHertz, languageCode) {
// [START speech_streaming_mic_recognize]
const record = require('node-record-lpcm16');

Expand All @@ -210,13 +190,17 @@ function streamingMicRecognize (encoding, sampleRate) {
// The encoding of the audio file, e.g. 'LINEAR16'
// const encoding = 'LINEAR16';

// The sample rate of the audio file, e.g. 16000
// const sampleRate = 16000;
// The sample rate of the audio file in hertz, e.g. 16000
// const sampleRateHertz = 16000;

// The BCP-47 language code to use, e.g. 'en-US'
// const languageCode = 'en-US';

const request = {
config: {
encoding: encoding,
sampleRate: sampleRate
sampleRateHertz: sampleRateHertz,
languageCode: languageCode
}
};

Expand All @@ -227,7 +211,7 @@ function streamingMicRecognize (encoding, sampleRate) {

// Start recording and send the microphone input to the Speech API
record.start({
sampleRate: sampleRate,
sampleRateHertz: sampleRateHertz,
threshold: 0
}).pipe(recognizeStream);

Expand All @@ -241,37 +225,31 @@ require(`yargs`)
`sync <filename>`,
`Detects speech in a local audio file.`,
{},
(opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate)
(opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.command(
`sync-gcs <gcsUri>`,
`Detects speech in an audio file located in a Google Cloud Storage bucket.`,
{},
(opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
)
.command(
`async <filename>`,
`Creates a job to detect speech in a local audio file, and waits for the job to complete.`,
{},
(opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate)
(opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.command(
`async-gcs <gcsUri>`,
`Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`,
{},
(opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
(opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.command(
`stream <filename>`,
`Detects speech in a local audio file by streaming it to the Speech API.`,
{},
(opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate)
(opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.command(
`listen`,
`Detects speech in a microphone input stream.`,
{},
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRate)
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRateHertz, opts.languageCode)
)
.options({
encoding: {
Expand All @@ -281,12 +259,19 @@ require(`yargs`)
requiresArg: true,
type: 'string'
},
sampleRate: {
sampleRateHertz: {
alias: 'r',
default: 16000,
global: true,
requiresArg: true,
type: 'number'
},
languageCode: {
alias: 'l',
default: 'en-US',
global: true,
requiresArg: true,
type: 'string'
}
})
.example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
Expand Down
3 changes: 2 additions & 1 deletion speech/system-test/quickstart.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ const speech = proxyquire(`@google-cloud/speech`, {})();
const fileName = path.join(__dirname, `../resources/audio.raw`);
const config = {
encoding: `LINEAR16`,
sampleRate: 16000
sampleRateHertz: 16000,
languageCode: `en-US`
};

test.before(stubConsole);
Expand Down
5 changes: 0 additions & 5 deletions speech/system-test/recognize.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,6 @@ test(`should run sync recognize on a GCS file`, async (t) => {
t.true(output.includes(`Transcription: ${text}`));
});

test(`should run async recognize on a local file`, async (t) => {
const output = await runAsync(`${cmd} async ${filepath}`, cwd);
t.true(output.includes(`Transcription: ${text}`));
});

test(`should run async recognize on a GCS file`, async (t) => {
const output = await runAsync(`${cmd} async-gcs gs://${bucketName}/${filename}`, cwd);
t.true(output.includes(`Transcription: ${text}`));
Expand Down