diff --git a/.jscsrc b/.jscsrc index 0be5bbf66a1..c287b5ca557 100644 --- a/.jscsrc +++ b/.jscsrc @@ -26,5 +26,6 @@ "**/test/*/**/*", "**/node_modules/", "**/.coverage/**/*.js", + "**/smoke-test/**/*.js", ] } diff --git a/.jshintignore b/.jshintignore index deb103b74e2..dda5d62f5fc 100644 --- a/.jshintignore +++ b/.jshintignore @@ -4,3 +4,4 @@ **/node_modules/ **/coverage **/gapic*.js +**/smoke-test/**/* diff --git a/packages/speech/README.md b/packages/speech/README.md index cf0fab6e737..ad7bfc45c83 100644 --- a/packages/speech/README.md +++ b/packages/speech/README.md @@ -1,105 +1,59 @@ -# @google-cloud/speech ([Alpha][versioning]) -> Cloud Speech Client Library for Node.js +# Node.js Client for Google Cloud Speech API ([Beta](https://github.com/GoogleCloudPlatform/google-cloud-node#versioning)) -*Looking for more Google APIs than just Speech? You might want to check out [`google-cloud`][google-cloud].* +[Google Cloud Speech API][Product Documentation]: Google Cloud Speech API. +- [Client Library Documentation][] +- [Product Documentation][] -- [API Documentation][gcloud-speech-docs] -- [Official Documentation][cloud-speech-docs] +## Quick Start +In order to use this library, you first need to go through the following steps: +1. [Select or create a Cloud Platform project.](https://console.cloud.google.com/project) +2. [Enable the Google Cloud Speech API.](https://console.cloud.google.com/apis/api/speech) +3. [Setup Authentication.](https://googlecloudplatform.github.io/google-cloud-node/#/docs/google-cloud/master/guides/authentication) -```sh -$ npm install --save @google-cloud/speech +### Installation ``` -```js -var speech = require('@google-cloud/speech')({ - projectId: 'grape-spaceship-123', - keyFilename: '/path/to/keyfile.json' -}); - -// Detect the speech in an audio file. -speech.recognize('./audio.raw', { - encoding: 'LINEAR16', - sampleRateHertz: 16000 -}, function(err, transcript) { - // transcript = 'how old is the Brooklyn Bridge' -}); - -// Detect the speech in an audio file stream. -fs.createReadStream('./audio.raw') - .on('error', console.error) - .pipe(speech.createRecognizeStream({ - config: { - encoding: 'LINEAR16', - sampleRateHertz: 16000 - }, - singleUtterance: false, - interimResults: false - })) - .on('error', console.error) - .on('data', function(data) { - // data.results = "how old is the Brooklyn Bridge" - }); - -// Promises are also supported by omitting callbacks. -speech.recognize('./audio.raw', { - encoding: 'LINEAR16', - sampleRateHertz: 16000 -}).then(function(data) { - var transcript = data[0]; -}); - -// It's also possible to integrate with third-party Promise libraries. -var speech = require('@google-cloud/speech')({ - promise: require('bluebird') -}); -``` - - -## Authentication - -It's incredibly easy to get authenticated and start using Google's APIs. You can set your credentials on a global basis as well as on a per-API basis. See each individual API section below to see how you can auth on a per-API-basis. This is useful if you want to use different accounts for different Cloud services. - -### On Google Cloud Platform - -If you are running this client on Google Cloud Platform, we handle authentication for you with no configuration. You just need to make sure that when you [set up the GCE instance][gce-how-to], you add the correct scopes for the APIs you want to access. - -``` js -var speech = require('@google-cloud/speech')(); -// ...you're good to go! +$ npm install --save @google-cloud/speech ``` -### Elsewhere - -If you are not running this client on Google Cloud Platform, you need a Google Developers service account. To create a service account: - -1. Visit the [Google Developers Console][dev-console]. -2. Create a new project or click on an existing project. -3. Navigate to **APIs & auth** > **APIs section** and turn on the following APIs (you may need to enable billing in order to use these services): - * Google Cloud Speech API -4. Navigate to **APIs & auth** > **Credentials** and then: - * If you want to use a new service account key, click on **Create credentials** and select **Service account key**. After the account key is created, you will be prompted to download the JSON key file that the library uses to authenticate your requests. - * If you want to generate a new service account key for an existing service account, click on **Generate new JSON key** and download the JSON key file. - -``` js -var projectId = process.env.GCLOUD_PROJECT; // E.g. 'grape-spaceship-123' - -var speech = require('@google-cloud/speech')({ - projectId: projectId, - - // The path to your key file: - keyFilename: '/path/to/keyfile.json' - - // Or the contents of the key file: - credentials: require('./path/to/keyfile.json') -}); - -// ...you're good to go! +### Preview +#### SpeechClient +```js + var speech = require('@google-cloud/speech'); + + var client = speech({ + // optional auth parameters. + }); + + var languageCode = 'en-US'; + var sampleRateHertz = 44100; + var encoding = speech.v1.types.RecognitionConfig.AudioEncoding.FLAC; + var config = { + languageCode : languageCode, + sampleRateHertz : sampleRateHertz, + encoding : encoding + }; + var uri = 'gs://gapic-toolkit/hello.flac'; + var audio = { + uri : uri + }; + var request = { + config: config, + audio: audio + }; + client.recognize(request).then(function(responses) { + var response = responses[0]; + // doThingsWith(response) + }) + .catch(function(err) { + console.error(err); + }); ``` +### Next Steps +- Read the [Client Library Documentation][] for Google Cloud Speech API to see other available methods on the client. +- Read the [Google Cloud Speech API Product documentation][Product Documentation] to learn more about the product and see How-to Guides. +- View this [repository's main README](https://github.com/GoogleCloudPlatform/google-cloud-node/blob/master/README.md) to see the full list of Cloud APIs that we cover. -[versioning]: https://github.com/GoogleCloudPlatform/google-cloud-node#versioning -[google-cloud]: https://github.com/GoogleCloudPlatform/google-cloud-node/ -[gce-how-to]: https://cloud.google.com/compute/docs/authentication#using -[dev-console]: https://console.developers.google.com/project -[gcloud-speech-docs]: https://googlecloudplatform.github.io/google-cloud-node/#/docs/speech -[cloud-speech-docs]: https://cloud.google.com/speech +[Client Library Documentation]: https://googlecloudplatform.github.io/google-cloud-node/#/docs/speech +[Product Documentation]: https://cloud.google.com/speech \ No newline at end of file diff --git a/packages/speech/package.json b/packages/speech/package.json index a04fc9d710e..2588d654752 100644 --- a/packages/speech/package.json +++ b/packages/speech/package.json @@ -1,4 +1,5 @@ { + "repository": "GoogleCloudPlatform/google-cloud-node", "name": "@google-cloud/speech", "version": "0.9.4", "author": "Google Inc.", @@ -37,10 +38,8 @@ "files": [ "src", "AUTHORS", - "CONTRIBUTORS", "COPYING" ], - "repository": "googlecloudplatform/google-cloud-node", "keywords": [ "google apis client", "google api client", @@ -51,36 +50,26 @@ "google cloud", "cloud", "google speech", - "speech" + "speech", + "Google Cloud Speech API" ], "dependencies": { - "@google-cloud/common": "^0.13.0", - "@google-cloud/common-grpc": "^0.3.0", - "events-intercept": "^2.0.0", - "extend": "^3.0.0", - "google-gax": "^0.13.0", "google-proto-files": "^0.12.0", - "is": "^3.1.0", - "propprop": "^0.3.1", + "google-gax": "^0.13.2", + "extend": "^3.0.0", "pumpify": "^1.3.5", - "request": "^2.74.0", "stream-events": "^1.0.1", - "string-format-obj": "^1.1.0", - "through2": "^2.0.1" + "through2": "^2.0.3" }, "devDependencies": { - "@google-cloud/storage": "*", - "async": "^2.0.1", - "methmeth": "^1.1.0", - "mocha": "^3.0.2", - "proxyquire": "^1.7.10", - "tmp": "^0.0.31", - "uuid": "^3.0.1" + "mocha": "^3.2.0", + "power-assert": "^1.4.2", + "sinon": "^2.2.0" }, "scripts": { "publish-module": "node ../../scripts/publish.js speech", - "test": "mocha test/*.js", - "system-test": "mocha system-test/*.js --no-timeouts --bail" + "smoke-test": "mocha smoke-test/*.js --timeout 5000", + "test": "mocha test/*.js" }, "license": "Apache-2.0", "engines": { diff --git a/packages/speech/smoke-test/speech_smoke_test.js b/packages/speech/smoke-test/speech_smoke_test.js new file mode 100644 index 00000000000..7477fbaaa1b --- /dev/null +++ b/packages/speech/smoke-test/speech_smoke_test.js @@ -0,0 +1,50 @@ +/* + * Copyright 2017, Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +'use strict'; + +describe('SpeechSmokeTest', function() { + + it('successfully makes a call to the service', function(done) { + var speech = require('../src'); + + var client = speech.v1({ + // optional auth parameters. + }); + + var languageCode = 'en-US'; + var sampleRateHertz = 44100; + var encoding = speech.v1.types.RecognitionConfig.AudioEncoding.FLAC; + var config = { + languageCode : languageCode, + sampleRateHertz : sampleRateHertz, + encoding : encoding + }; + var uri = 'gs://gapic-toolkit/hello.flac'; + var audio = { + uri : uri + }; + var request = { + config: config, + audio: audio + }; + client.recognize(request).then(function(responses) { + var response = responses[0]; + console.log(response); + }) + .then(done) + .catch(done); + }); +}); \ No newline at end of file diff --git a/packages/speech/src/helpers.js b/packages/speech/src/helpers.js new file mode 100644 index 00000000000..11a178c586f --- /dev/null +++ b/packages/speech/src/helpers.js @@ -0,0 +1,113 @@ +/*! + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module speech/helpers + */ + +'use strict'; + +var pumpify = require('pumpify'); +var streamEvents = require('stream-events'); +var through = require('through2'); + +/*! + * Return a dictionary-like object with helpers to augment the Speech + * GAPIC. + * + * @return {Object} - An object with keys and functions which are placed + * onto the pure GAPIC. + */ +module.exports = () => { + var methods = {}; + + /** + * Performs bidirectional streaming speech recognition: receive results while + * sending audio. This method is only available via the gRPC API (not REST). + * + * @param {Object} config + * The configuration for the stream. This is appropriately wrapped and + * sent as the first argument. It should be an object conforming to the + * [StreamingRecognitionConfig]{@link StreamingRecognitionConfig} + * structure. + * @param {Object=} options + * Optional parameters. You can override the default settings for this + * call, e.g, timeout, retries, paginations, etc. See + * [gax.CallOptions]{@link https://googleapis.github.io/gax-nodejs/global.html#CallOptions} + * for the details. + * @returns {Stream} + * An object stream which is both readable and writable. It accepts + * [StreamingRecognizeRequest]{@link StreamingRecognizeRequest}-like + * objects for the write() method, and will emit objects representing + * [StreamingRecognizeResponse]{@link StreamingRecognizeResponse} on the + * 'data' event asynchronously. + * + * @example + * + * var stream = speech.streamingRecognize({ + * config: { + * encoding: 'LINEAR16', + * languageCode: 'en-us', + * sampleRateHertz: 44100, + * }, + * }).on('data', function(response) { + * // doThingsWith(response); + * }); + * var request = {}; + * // Write request objects. + * stream.write(request); + */ + methods.streamingRecognize = function(config, options) { + if (options === undefined) { + options = {}; + } + + var requestStream = this._streamingRecognize(options); + + // Format the audio content as input request for pipeline + var recognizeStream = streamEvents(pumpify.obj()); + + recognizeStream.once('writing', function() { + requestStream.on('error', function(err) { + recognizeStream.destroy(err); + }); + + requestStream.on('response', function(response) { + recognizeStream.emit('response', response); + }); + + // Write the initial configuration to the stream, + requestStream.write({ + streamingConfig: config + }); + + this.setPipeline([ + // Format the user's input. + through.obj(function(obj, _, next) { + next(null, { + audioContent: obj + }); + }), + + requestStream + ]); + }); + + return recognizeStream; + }; + + return methods; +}; diff --git a/packages/speech/src/index.js b/packages/speech/src/index.js index e590af6f7f6..d55e6a74498 100644 --- a/packages/speech/src/index.js +++ b/packages/speech/src/index.js @@ -1,11 +1,11 @@ -/*! - * Copyright 2016 Google Inc. All Rights Reserved. +/* + * Copyright 2017, Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -16,834 +16,57 @@ /*! * @module speech + * @name Speech */ 'use strict'; -var common = require('@google-cloud/common'); -var commonGrpc = require('@google-cloud/common-grpc'); -var eventsIntercept = require('events-intercept'); var extend = require('extend'); -var format = require('string-format-obj'); -var fs = require('fs'); -var googleProtoFiles = require('google-proto-files'); -var is = require('is'); -var path = require('path'); -var prop = require('propprop'); -var pumpify = require('pumpify'); -var request = require('request'); -var streamEvents = require('stream-events'); -var through = require('through2'); -var util = require('util'); -var v1 = require('./v1'); +var gapic = { + v1: require('./v1') +}; +var gaxGrpc = require('google-gax').grpc(); +var helpers = require('./helpers'); + +const VERSION = require('../package.json').version; /** - * The [Cloud Speech API](https://cloud.google.com/speech/docs) enables easy - * integration of Google speech recognition technologies into developer - * applications. Send audio and receive a text transcription from the Cloud - * Speech API service. + * Create an speechClient with additional helpers for common + * tasks. + * + * Service that implements Google Cloud Speech API. * * @constructor * @alias module:speech + * @mixes module:speech/helpers * - * @classdesc - * To learn more about the Speech API, see the - * [Getting Started guide](https://cloud.google.com/speech/docs/getting-started). - * - * @resource [Getting Started]{@link https://cloud.google.com/speech/docs/getting-started} - * @resource [Speech Best Practices]{@link https://cloud.google.com/speech/docs/best-practices} - * - * @param {object} options - [Configuration object](#/docs). + * @param {object=} options - [Configuration object](#/docs). + * @param {number=} options.port - The port on which to connect to + * the remote host. + * @param {string=} options.servicePath - The domain name of the + * API remote host. */ -function Speech(options) { - if (!(this instanceof Speech)) { - options = common.util.normalizeArguments(this, options); - return new Speech(options); - } - +function speechV1(options) { + // Define the header options. options = extend({}, options, { libName: 'gccl', - libVersion: require('../package.json').version + libVersion: VERSION }); - this.api = { - Speech: v1(options).speechClient(options) - }; - - var config = { - baseUrl: 'speech.googleapis.com', - projectIdRequired: false, - service: 'speech', - protoServices: { - Operations: { - path: googleProtoFiles('longrunning', 'operations.proto'), - service: 'longrunning' - } - }, - scopes: [ - 'https://www.googleapis.com/auth/cloud-platform' - ], - packageJson: require('../package.json') - }; - - commonGrpc.Service.call(this, config, options); + // Create the speech client with the provided options. + var client = gapic.v1(options).speechClient(options); + Object.assign(client.constructor.prototype, helpers()); + return client; } -util.inherits(Speech, commonGrpc.Service); - -/** - * The event types that the Speech API will return while processing a - * {module:speech#createRecognizeStream} request. You can track the progress of - * audio recognition by comparing the `data.eventType` property with these - * values. - * - * - `Speech.eventTypes.ENDPOINTER_EVENT_UNSPECIFIED`: No event specified. - * - `Speech.eventTypes.END_OF_SINGLE_UTTERANCE`: This event is only sent when - * `config.singleUtterance` passed to {module:speech#createRecognizeStream} - * is `true`. It indicates that the server has detected the end of the - * user's speech utterance and expects no additional speech. Therefore, the - * server will not process additional audio. The client should stop sending - * additional audio data. - * - * @type {object} - */ -Speech.eventTypes = -Speech.prototype.eventTypes = { - END_OF_SINGLE_UTTERANCE: 'END_OF_SINGLE_UTTERANCE', - ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED' -}; - -/** - * Guess the audio encoding from the file's extension. - * - * @resource [AudioEncoding API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.AudioEncoding} - * @private - * - * @throws {Error} If an encoding type could not be determined from the file's - * extension. - * - * @param {string} filename - The name of the file. - * @returns {string} The audio encoding. - */ -Speech.detectEncoding_ = function(filename) { - if (!is.string(filename)) { - return; - } - - switch (path.extname(filename).toLowerCase()) { - case '.raw': { - return 'LINEAR16'; - } - case '.amr': { - return 'AMR'; - } - case '.awb': { - return 'AMR_WB'; - } - case '.flac': { - return 'FLAC'; - } - case '.au': - case '.wav': { - return 'MULAW'; - } - default: { - throw new Error('Encoding could not be determined for file: ' + filename); - } - } -}; - -/** - * Determine the type of file the user is asking to be processed. If a - * {module:storage/file}, convert to its "gs://{bucket}/{file}" URL. If a remote - * URL, read the contents. If a file path, load the file. - * - * @private - */ -Speech.findFile_ = function(file, callback) { - if (global.GCLOUD_SANDBOX_ENV) { - callback(null, { - content: new Buffer('') - }); - return; - } - - if (common.util.isCustomType(file, 'storage/file')) { - // File is an instance of module:storage/file. - callback(null, { - uri: format('gs://{bucketName}/{fileName}', { - bucketName: file.bucket.name, - fileName: file.name - }) - }); - return; - } - - if (is.string(file) && file.indexOf('gs://') === 0) { - // File is a Cloud Storage URI. - callback(null, { - uri: file - }); - return; - } - - if (/^http/.test(file)) { - // File is a URL. - request({ - uri: file, - encoding: null - }, function(err, resp, body) { - if (err) { - callback(err); - return; - } - - callback(null, { - content: body - }); - }); - return; - } - - if (Buffer.isBuffer(file)) { - callback(null, { - content: file - }); - return; - } - - if (is.object(file)) { - // This might be a RecognitionAudio object. - if (!file.content && !file.uri) { - var errorMsg = 'RecognitionAudio requires a "content" or "uri" property.'; - callback(new Error(errorMsg)); - } else { - callback(null, file); - } - return; - } - - // File exists on disk. - fs.readFile(file, function(err, contents) { - if (err) { - callback(err); - return; - } - - callback(null, { - content: contents - }); - }); -}; - -/** - * Simplify the transcription results from the API. - * - * @resource [SpeechRecognitionResult API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.SpeechRecognitionResult} - * @resource [StreamingRecognitionResult API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.StreamingRecognitionResult} - * - * @private - * - * @param {object} resultSets - A `SpeechRecognitionResult` or - * `StreamingRecognitionResult` object. - * @param {boolean} verbose - Whether to use verbose mode. - * @return {object} - The simplified results. - * - * @example - * var resultSets = [ - * { - * alternatives: [ - * { - * transcript: 'Result 1a', - * confidence: 0.70 - * }, - * { - * transcript: 'Result 1b', - * confidence: 0.60 - * }, - * ... - * ] - * }, - * { - * alternatives: [ - * { - * transcript: 'Result 2a', - * confidence: 0.90 - * }, - * { - * transcript: 'Result 2b', - * confidence: 0.80 - * }, - * ... - * ] - * } - * ]; - * - * //- - * // Default output. - * //- - * Speech.formatResults_(resultSets); - * // 'Result 1a Result 2a' - * - * //- - * // Verbose output. - * //- - * Speech.formatResults_(resultSets, true); - * // [ - * // { - * // transcript: 'Result 1a', - * // confidence: 70, - * // alternatives: [ - * // { - * // transcript: 'Result 1b', - * // confidence: 60 - * // }, - * // ... - * // ] - * // }, - * // { - * // transcript: 'Result 2a', - * // confidence: 90, - * // alternatives: [ - * // { - * // transcript: 'Result 2b', - * // confidence: 80 - * // }, - * // ... - * // ] - * // } - * // ] - */ -Speech.formatResults_ = function(resultSets, verboseMode) { - function multiplyScores(result) { - if (is.defined(result.confidence)) { - result.confidence *= 100; - } - - if (is.defined(result.stability)) { - result.stability *= 100; - } - - return result; - } - - var verboseResultSets = resultSets - .map(function(resultSet) { - resultSet = extend(true, {}, resultSet); - - var mostProbableResult = multiplyScores(resultSet.alternatives.shift()); - - resultSet.transcript = mostProbableResult.transcript; - - if (is.defined(mostProbableResult.confidence)) { - resultSet.confidence = mostProbableResult.confidence; - } - - if (is.defined(mostProbableResult.stability)) { - resultSet.stability = mostProbableResult.stability; - } - - resultSet.alternatives = resultSet.alternatives.map(multiplyScores); - - return resultSet; - }); - - if (!verboseMode) { - return verboseResultSets.map(prop('transcript')).join(' '); - } - - return verboseResultSets; -}; - -/** - * Perform bidirectional streaming speech-recognition: receive results while - * sending audio. - * - * Each emitted `data` event is a - * [`StreamingRecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.StreamingRecognizeResponse) - * object, containing these properties: - * - * - **`eventType`** See {module:speech#eventTypes}. - * - **`results`** By default, a combined string of transcripts. When - * `config.verbose` is enabled, this is an object including a `transcript` - * property, a `confidence` score from `0` - `100`, and an `alternatives` - * array consisting of other transcription possibilities. - * - * Cloud Speech sets the limits for the audio duration. For more - * information, see - * [Content Limits]{@link https://cloud.google.com/speech/limits#content}. - * - * @resource [StreamingRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.Speech.StreamingRecognize} - * @resource [StreamingRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.StreamingRecognizeRequest} - * @resource [Content Limits]{@link https://cloud.google.com/speech/limits#content} - * - * @param {object} config - A `StreamingRecognitionConfig` object. See - * [`StreamingRecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.StreamingRecognitionConfig). - * @param {string} config.languageCode - The language of the supplied audio as - * [BCP-47 language tag](http://bit.ly/1ZHeENX). Example: 'en-US'. - * @param {number=} config.timeout - In seconds, the amount of time before the - * underlying API request times out. The default value, `190`, is sufficient - * for audio input of 60 seconds or less. If your input is longer, consider - * using a higher timeout value. - * @param {boolean=} config.verbose - Enable verbose mode for a more detailed - * response. See the examples below. Default: `false`. - * - * @example - * var fs = require('fs'); - * - * //- - * // See - * // `StreamingRecognizeRequest` for all of the available configuration - * // options. - * //- - * var request = { - * config: { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000 - * }, - * singleUtterance: false, - * interimResults: false - * }; - * - * fs.createReadStream('./bridge.raw') - * .on('error', console.error) - * .pipe(speech.createRecognizeStream(request)) - * .on('error', console.error) - * .on('data', function(data) { - * // data.results = "how old is the Brooklyn Bridge" - * }); - * - * //- - * // Enable verbose mode for more detailed results. - * //- - * var request = { - * config: { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000 - * }, - * singleUtterance: false, - * interimResults: false, - * verbose: true - * }; - * - * fs.createReadStream('./system-test/data/bridge.raw') - * .on('error', console.error) - * .pipe(speech.createRecognizeStream(request)) - * .on('error', console.error) - * .on('data', function(data) { - * // data.results = "how old is the Brooklyn Bridge" - * }); - */ -Speech.prototype.createRecognizeStream = function(config) { - var self = this; - - if (!config) { - throw new Error('A recognize request requires a configuration object.'); - } - - config = extend(true, { - config: {} - }, config); - - // As of Speech v1, a language code is required; throw an exception if we did - // not receive one. - if (config.languageCode) { - config.config.languageCode = config.languageCode; - delete config.languageCode; - } - - if (!config.config.languageCode) { - throw new Error('A `languageCode` is required in the config object.'); - } - - var verboseMode = config.verbose === true; - delete config.verbose; - - var gaxOptions = {}; - - if (is.number(config.timeout)) { - gaxOptions.timeout = config.timeout * 1000; - delete config.timeout; - } +var v1Protos = {}; - var recognizeStream = streamEvents(pumpify.obj()); - - recognizeStream.once('writing', function() { - var requestStream = self.api.Speech.streamingRecognize(gaxOptions); - - requestStream.on('error', function(err) { - recognizeStream.destroy(err); - }); - - requestStream.on('response', function(response) { - recognizeStream.emit('response', response); - }); - - requestStream.write({ - streamingConfig: config - }); - - this.setPipeline([ - // Format the user's input. - through.obj(function(obj, _, next) { - next(null, { - audioContent: obj - }); - }), - - requestStream, - - // Format the results. - through.obj(function(obj, _, next) { - obj.results = Speech.formatResults_(obj.results, verboseMode); - next(null, obj); - }) - ]); - }); - - return recognizeStream; -}; - -/*! Developer Documentation - * - * @returns {module:commonGrpc/Operation} - */ -/** - * Get a reference to an existing operation. - * - * @throws {Error} If a name is not provided. - * - * @param {string} name - The name of the operation. - * - * @example - * var operation = speech.operation('68850831366825'); - */ -Speech.prototype.operation = function(name) { - if (!name) { - throw new Error('A name must be specified for an operation.'); - } - - return new commonGrpc.Operation(this, name); -}; - -/** - * Perform synchronous speech recognition and receive results after all audio - * has been sent and processed. This is ideal for files 1 MB or below. For - * larger files, you will need to use {module:speech#startRecognition} or - * {module:speech#createRecognizeStream}. - * - * @resource [Recognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.Speech.Recognize} - * @resource [RecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognizeRequest} - * - * @param {string|object|module:storage/file} file - The source file to run the - * detection on. It can be either a local file path, a remote file URL, a - * Cloud Storage URI, a Cloud Storage File object, or a - * [`RecognitionAudio`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognitionAudio) - * object. - * @param {object} config - A `RecognitionConfig` object. See - * [`RecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognitionConfig). - * @param {string} config.languageCode - The language of the supplied audio as - * [BCP-47 language tag](http://bit.ly/1ZHeENX). Example: 'en-US'. - * @param {boolean=} config.verbose - Enable verbose mode for a more detailed - * response. See the examples below. Default: `false`. - * @param {function} callback - The callback function. - * @param {?error} callback.err - An error returned while making this request. - * @param {string|object[]} callback.results - By default, this will be a string - * comprised of all of the transcriptions recognized from the audio. If - * `config.verbose` is enabled, this is an object including a `transcript` - * property, a `confidence` score from `0` - `100`, and an `alternatives` - * array consisting of other transcription possibilities. See the examples - * below for more. - * @param {object} callback.apiResponse - Raw API response. See - * [`RecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognizeResponse). - * - * @example - * var config = { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000 - * }; - * - * function callback(err, transcript, apiResponse) { - * if (err) { - * // Error handling omitted. - * } - * - * // transcript = "how old is the Brooklyn Bridge" - * } - * - * //- - * // Run speech detection over a local file. - * //- - * speech.recognize('./bridge.raw', config, callback); - * - * //- - * // Run speech recognition over a file in Cloud Storage. - * //- - * speech.recognize('gs://your-bucket-name/bridge.raw', config, callback); - * - * //- - * // Run speech recognition over raw file contents. - * //- - * speech.recognize({ - * content: fs.readFileSync('./bridge.raw') - * }, config, callback); - * - * //- - * // Run speech recognition over a remote file. - * // - * // Note: This is not an officially supported feature of the Speech API. - * // This library will make a request to the URL given and send the file - * // contents to the upstream API. - * //- - * speech.recognize('https://example.com/files/bridge.raw', config, callback); - * - * //- - * // Enable verbose mode for more detailed results. - * //- - * var config = { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000, - * verbose: true - * }; - * - * speech.recognize('./bridge.raw', config, function(err, results) { - * if (err) { - * // Error handling omitted. - * } - * - * // results = [ - * // { - * // transcript: "how old is the Brooklyn Bridge", - * // confidence: 88.15, - * // alternatives: [ - * // { - * // transcript: "how old is the Brooklyn brim", - * // confidence: 22.39 - * // } - * // ] - * // } - * // ] - * }); - * - * //- - * // If the callback is omitted, we'll return a Promise. - * //- - * speech.recognize('./bridge.raw', config).then(function(data) { - * var results = data[0]; - * var apiResponse = data[1]; - * }); - */ -Speech.prototype.recognize = function(file, config, callback) { - var self = this; - - if (!is.object(config)) { - throw new Error('A recognize request requires a configuration object.'); - } - - config = extend(true, {}, config); - - // As of Speech v1, a language code is required; throw an exception if we - // did not receive one. - if (is.undefined(config.languageCode)) { - throw new Error('A `languageCode` is required in the config object.'); - } - - if (!config.encoding) { - config.encoding = Speech.detectEncoding_(file); - } - - var verboseMode = config.verbose === true; - delete config.verbose; - - Speech.findFile_(file, function(err, foundFile) { - if (err) { - callback(err); - return; - } - - self.api.Speech.recognize({ - config: config, - audio: foundFile - }, function(err, resp) { - if (err) { - callback(err, null, resp); - return; - } - - var results = Speech.formatResults_(resp.results, verboseMode); - - callback(null, results, resp); - }); - }); -}; - -/** - * Perform asynchronous speech recognition. - * - * This method sends audio to the Speech API, which immediately responds with an - * Operation object. Register event handlers for the "error" and "complete" - * events to see how the operation finishes. Follow along with the examples - * below. - * - * @resource [LongRunningRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.Speech.LongRunningRecognize} - * @resource [LongRunningRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.LongRunningRecognizeRequest} - * @resource [LongRunningRecognizeResponse API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.LongRunningRecognizeResponse} - * - * @param {string|object|module:storage/file} file - The source file to run the - * detection on. It can be either a local file path, a remote file URL, a - * Cloud Storage URI, a Cloud Storage File object, or a - * [`RecognitionAudio`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognitionAudio) - * object. - * @param {object} config - A `RecognitionConfig` object. See - * [`RecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognitionConfig). - * @param {boolean=} config.verbose - Enable verbose mode for a more detailed - * response. See the examples below. Default: `false`. - * @param {string} config.languageCode - The language of the supplied audio as - * [BCP-47 language tag](http://bit.ly/1ZHeENX). Example: 'en-US'. - * @param {function} callback - The callback function. - * @param {?error} callback.err - An error returned while making this request. - * @param {module:speech/operation} callback.operation - An operation object - * that can be used to check the status of the request. - * @param {object} callback.apiResponse - Raw API response. - * - * @example - * var config = { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000 - * }; - * - * function callback(err, operation, apiResponse) { - * if (err) { - * // Error handling omitted. - * } - * - * operation - * .on('error', function(err) {}) - * .on('complete', function(transcript) { - * // transcript = "how old is the Brooklyn Bridge" - * }); - * } - * - * //- - * // Run speech detection over a local file. - * //- - * speech.startRecognition('./bridge.raw', config, callback); - * - * //- - * // Run speech detection over a file in Cloud Storage. - * //- - * var file = 'gs://your-bucket-name/bridge.raw'; - * speech.startRecognition(file, config, callback); - * - * //- - * // Run speech detection over raw file contents. - * //- - * speech.startRecognition({ - * content: fs.readFileSync('./bridge.raw') - * }, config, callback); - * - * //- - * // Run speech detection over a remote file. - * // - * // Note: This is not an officially supported feature of the Speech API. - * // This library will make a request to the URL given and send the file - * // contents to the upstream API. - * //- - * var file = 'https://example.com/files/bridge.raw'; - * - * speech.startRecognition(file, config, callback); - * - * //- - * // Enable verbose mode for more detailed results. - * //- - * var config = { - * encoding: 'LINEAR16', - * languageCode: 'en-US', - * sampleRateHertz: 16000, - * verbose: true - * }; - * - * speech.startRecognition('./bridge.raw', config, function(err, operation) { - * if (err) { - * // Error handling omitted. - * } - * - * operation - * .on('error', function(err) {}) - * .on('complete', function(results) { - * // results = [ - * // { - * // transcript: "how old is the Brooklyn Bridge", - * // confidence: 88.15 - * // } - * // ] - * }); - * }); - * - * //- - * // If the callback is omitted, we'll return a Promise. - * //- - * speech.startRecognition('./bridge.raw', config).then(function(data) { - * var operation = data[0]; - * var apiResponse = data[1]; - * }); - */ -Speech.prototype.startRecognition = function(file, config, callback) { - var self = this; - - config = extend(true, {}, config); - - // As of Speech v1, a language code is required; throw an exception if we - // did not receive one. - if (is.undefined(config.languageCode)) { - throw new Error('A `languageCode` is required in the config object.'); - } - - if (!config.encoding) { - config.encoding = Speech.detectEncoding_(file); - } - - var verboseMode = config.verbose === true; - delete config.verbose; - - Speech.findFile_(file, function(err, foundFile) { - if (err) { - callback(err); - return; - } - - self.api.Speech.longRunningRecognize({ - config: config, - audio: foundFile - }, function(err, operation, resp) { - if (err) { - callback(err, null, resp); - return; - } - - eventsIntercept.patch(operation); - operation.intercept('complete', function(result, meta, resp, callback) { - callback(null, Speech.formatResults_(result.results, verboseMode)); - }); - - callback(null, operation, resp); - }); - }); -}; - -/*! Developer Documentation - * - * All async methods (except for streams) will return a Promise in the event - * that a callback is omitted. - */ -common.util.promisifyAll(Speech, { - exclude: ['operation'] -}); +extend(v1Protos, gaxGrpc.load([{ + root: require('google-proto-files')('..'), + file: 'google/cloud/speech/v1/cloud_speech.proto' +}]).google.cloud.speech.v1); -module.exports = Speech; -module.exports.v1 = v1; +module.exports = speechV1; +module.exports.types = v1Protos; +module.exports.v1 = speechV1; +module.exports.v1.types = v1Protos; diff --git a/packages/speech/src/v1/doc/doc_cloud_speech.js b/packages/speech/src/v1/doc/doc_cloud_speech.js new file mode 100644 index 00000000000..0438652ccb2 --- /dev/null +++ b/packages/speech/src/v1/doc/doc_cloud_speech.js @@ -0,0 +1,534 @@ +/* + * Copyright 2017, Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Note: this file is purely for documentation. Any contents are not expected + * to be loaded as the JS file. + */ + +/** + * The top-level message sent by the client for the `Recognize` method. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link RecognitionConfig} + * + * @property {Object} audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link RecognitionAudio} + * + * @class + * @see [google.cloud.speech.v1.RecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var RecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The top-level message sent by the client for the `LongRunningRecognize` + * method. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link RecognitionConfig} + * + * @property {Object} audio + * *Required* The audio data to be recognized. + * + * This object should have the same structure as [RecognitionAudio]{@link RecognitionAudio} + * + * @class + * @see [google.cloud.speech.v1.LongRunningRecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var LongRunningRecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The top-level message sent by the client for the `StreamingRecognize` method. + * Multiple `StreamingRecognizeRequest` messages are sent. The first message + * must contain a `streaming_config` message and must not contain `audio` data. + * All subsequent messages must contain `audio` data and must not contain a + * `streaming_config` message. + * + * @property {Object} streamingConfig + * Provides information to the recognizer that specifies how to process the + * request. The first `StreamingRecognizeRequest` message must contain a + * `streaming_config` message. + * + * This object should have the same structure as [StreamingRecognitionConfig]{@link StreamingRecognitionConfig} + * + * @property {string} audioContent + * The audio data to be recognized. Sequential chunks of audio data are sent + * in sequential `StreamingRecognizeRequest` messages. The first + * `StreamingRecognizeRequest` message must not contain `audio_content` data + * and all subsequent `StreamingRecognizeRequest` messages must contain + * `audio_content` data. The audio bytes must be encoded as specified in + * `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + * pure binary representation (not base64). See + * [audio limits](https://cloud.google.com/speech/limits#content). + * + * @class + * @see [google.cloud.speech.v1.StreamingRecognizeRequest definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var StreamingRecognizeRequest = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Provides information to the recognizer that specifies how to process the + * request. + * + * @property {Object} config + * *Required* Provides information to the recognizer that specifies how to + * process the request. + * + * This object should have the same structure as [RecognitionConfig]{@link RecognitionConfig} + * + * @property {boolean} singleUtterance + * *Optional* If `false` or omitted, the recognizer will perform continuous + * recognition (continuing to wait for and process audio even if the user + * pauses speaking) until the client closes the input stream (gRPC API) or + * until the maximum time limit has been reached. May return multiple + * `StreamingRecognitionResult`s with the `is_final` flag set to `true`. + * + * If `true`, the recognizer will detect a single spoken utterance. When it + * detects that the user has paused or stopped speaking, it will return an + * `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no + * more than one `StreamingRecognitionResult` with the `is_final` flag set to + * `true`. + * + * @property {boolean} interimResults + * *Optional* If `true`, interim results (tentative hypotheses) may be + * returned as they become available (these interim results are indicated with + * the `is_final=false` flag). + * If `false` or omitted, only `is_final=true` result(s) are returned. + * + * @class + * @see [google.cloud.speech.v1.StreamingRecognitionConfig definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var StreamingRecognitionConfig = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Provides information to the recognizer that specifies how to process the + * request. + * + * @property {number} encoding + * *Required* Encoding of audio data sent in all `RecognitionAudio` messages. + * + * The number should be among the values of [AudioEncoding]{@link AudioEncoding} + * + * @property {number} sampleRateHertz + * *Required* Sample rate in Hertz of the audio data sent in all + * `RecognitionAudio` messages. Valid values are: 8000-48000. + * 16000 is optimal. For best results, set the sampling rate of the audio + * source to 16000 Hz. If that's not possible, use the native sample rate of + * the audio source (instead of re-sampling). + * + * @property {string} languageCode + * *Required* The language of the supplied audio as a + * [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. + * Example: "en-US". + * See [Language Support](https://cloud.google.com/speech/docs/languages) + * for a list of the currently supported language codes. + * + * @property {number} maxAlternatives + * *Optional* Maximum number of recognition hypotheses to be returned. + * Specifically, the maximum number of `SpeechRecognitionAlternative` messages + * within each `SpeechRecognitionResult`. + * The server may return fewer than `max_alternatives`. + * Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of + * one. If omitted, will return a maximum of one. + * + * @property {boolean} profanityFilter + * *Optional* If set to `true`, the server will attempt to filter out + * profanities, replacing all but the initial character in each filtered word + * with asterisks, e.g. "f***". If set to `false` or omitted, profanities + * won't be filtered out. + * + * @property {Object[]} speechContexts + * *Optional* A means to provide context to assist the speech recognition. + * + * This object should have the same structure as [SpeechContext]{@link SpeechContext} + * + * @class + * @see [google.cloud.speech.v1.RecognitionConfig definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var RecognitionConfig = { + // This is for documentation. Actual contents will be loaded by gRPC. + + /** + * Audio encoding of the data sent in the audio message. All encodings support + * only 1 channel (mono) audio. Only `FLAC` includes a header that describes + * the bytes of audio that follow the header. The other encodings are raw + * audio bytes with no header. + * + * For best results, the audio source should be captured and transmitted using + * a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be + * reduced if lossy codecs, which include the other codecs listed in + * this section, are used to capture or transmit the audio, particularly if + * background noise is present. + * + * @enum {number} + */ + AudioEncoding: { + + /** + * Not specified. Will return result {@link google.rpc.Code.INVALID_ARGUMENT}. + */ + ENCODING_UNSPECIFIED: 0, + + /** + * Uncompressed 16-bit signed little-endian samples (Linear PCM). + */ + LINEAR16: 1, + + /** + * [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio + * Codec) is the recommended encoding because it is + * lossless--therefore recognition is not compromised--and + * requires only about half the bandwidth of `LINEAR16`. `FLAC` stream + * encoding supports 16-bit and 24-bit samples, however, not all fields in + * `STREAMINFO` are supported. + */ + FLAC: 2, + + /** + * 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. + */ + MULAW: 3, + + /** + * Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000. + */ + AMR: 4, + + /** + * Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000. + */ + AMR_WB: 5, + + /** + * Opus encoded audio frames in Ogg container + * ([OggOpus](https://wiki.xiph.org/OggOpus)). + * `sample_rate_hertz` must be 16000. + */ + OGG_OPUS: 6, + + /** + * Although the use of lossy encodings is not recommended, if a very low + * bitrate encoding is required, `OGG_OPUS` is highly preferred over + * Speex encoding. The [Speex](https://speex.org/) encoding supported by + * Cloud Speech API has a header byte in each block, as in MIME type + * `audio/x-speex-with-header-byte`. + * It is a variant of the RTP Speex encoding defined in + * [RFC 5574](https://tools.ietf.org/html/rfc5574). + * The stream is a sequence of blocks, one block per RTP packet. Each block + * starts with a byte containing the length of the block, in bytes, followed + * by one or more frames of Speex data, padded to an integral number of + * bytes (octets) as specified in RFC 5574. In other words, each RTP header + * is replaced with a single byte containing the block length. Only Speex + * wideband is supported. `sample_rate_hertz` must be 16000. + */ + SPEEX_WITH_HEADER_BYTE: 7 + } +}; + +/** + * Provides "hints" to the speech recognizer to favor specific words and phrases + * in the results. + * + * @property {string[]} phrases + * *Optional* A list of strings containing words and phrases "hints" so that + * the speech recognition is more likely to recognize them. This can be used + * to improve the accuracy for specific words and phrases, for example, if + * specific commands are typically spoken by the user. This can also be used + * to add additional words to the vocabulary of the recognizer. See + * [usage limits](https://cloud.google.com/speech/limits#content). + * + * @class + * @see [google.cloud.speech.v1.SpeechContext definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var SpeechContext = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Contains audio data in the encoding specified in the `RecognitionConfig`. + * Either `content` or `uri` must be supplied. Supplying both or neither + * returns {@link google.rpc.Code.INVALID_ARGUMENT}. See + * [audio limits](https://cloud.google.com/speech/limits#content). + * + * @property {string} content + * The audio data bytes encoded as specified in + * `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + * pure binary representation, whereas JSON representations use base64. + * + * @property {string} uri + * URI that points to a file that contains audio data bytes as specified in + * `RecognitionConfig`. Currently, only Google Cloud Storage URIs are + * supported, which must be specified in the following format: + * `gs://bucket_name/object_name` (other URI formats return + * {@link google.rpc.Code.INVALID_ARGUMENT}). For more information, see + * [Request URIs](https://cloud.google.com/storage/docs/reference-uris). + * + * @class + * @see [google.cloud.speech.v1.RecognitionAudio definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var RecognitionAudio = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The only message returned to the client by the `Recognize` method. It + * contains the result as zero or more sequential `SpeechRecognitionResult` + * messages. + * + * @property {Object[]} results + * *Output-only* Sequential list of transcription results corresponding to + * sequential portions of audio. + * + * This object should have the same structure as [SpeechRecognitionResult]{@link SpeechRecognitionResult} + * + * @class + * @see [google.cloud.speech.v1.RecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var RecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * The only message returned to the client by the `LongRunningRecognize` method. + * It contains the result as zero or more sequential `SpeechRecognitionResult` + * messages. It is included in the `result.response` field of the `Operation` + * returned by the `GetOperation` call of the `google::longrunning::Operations` + * service. + * + * @property {Object[]} results + * *Output-only* Sequential list of transcription results corresponding to + * sequential portions of audio. + * + * This object should have the same structure as [SpeechRecognitionResult]{@link SpeechRecognitionResult} + * + * @class + * @see [google.cloud.speech.v1.LongRunningRecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var LongRunningRecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Describes the progress of a long-running `LongRunningRecognize` call. It is + * included in the `metadata` field of the `Operation` returned by the + * `GetOperation` call of the `google::longrunning::Operations` service. + * + * @property {number} progressPercent + * Approximate percentage of audio processed thus far. Guaranteed to be 100 + * when the audio is fully processed and the results are available. + * + * @property {Object} startTime + * Time when the request was received. + * + * This object should have the same structure as [google.protobuf.Timestamp]{@link external:"google.protobuf.Timestamp"} + * + * @property {Object} lastUpdateTime + * Time of the most recent processing update. + * + * This object should have the same structure as [google.protobuf.Timestamp]{@link external:"google.protobuf.Timestamp"} + * + * @class + * @see [google.cloud.speech.v1.LongRunningRecognizeMetadata definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var LongRunningRecognizeMetadata = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * `StreamingRecognizeResponse` is the only message returned to the client by + * `StreamingRecognize`. A series of one or more `StreamingRecognizeResponse` + * messages are streamed back to the client. + * + * Here's an example of a series of ten `StreamingRecognizeResponse`s that might + * be returned while processing audio: + * + * 1. results { alternatives { transcript: "tube" } stability: 0.01 } + * + * 2. results { alternatives { transcript: "to be a" } stability: 0.01 } + * + * 3. results { alternatives { transcript: "to be" } stability: 0.9 } + * results { alternatives { transcript: " or not to be" } stability: 0.01 } + * + * 4. results { alternatives { transcript: "to be or not to be" + * confidence: 0.92 } + * alternatives { transcript: "to bee or not to bee" } + * is_final: true } + * + * 5. results { alternatives { transcript: " that's" } stability: 0.01 } + * + * 6. results { alternatives { transcript: " that is" } stability: 0.9 } + * results { alternatives { transcript: " the question" } stability: 0.01 } + * + * 7. speech_event_type: END_OF_SINGLE_UTTERANCE + * + * 8. results { alternatives { transcript: " that is the question" + * confidence: 0.98 } + * alternatives { transcript: " that was the question" } + * is_final: true } + * + * Notes: + * + * - Only two of the above responses #4 and #8 contain final results; they are + * indicated by `is_final: true`. Concatenating these together generates the + * full transcript: "to be or not to be that is the question". + * + * - The others contain interim `results`. #3 and #6 contain two interim + * `results`: the first portion has a high stability and is less likely to + * change; the second portion has a low stability and is very likely to + * change. A UI designer might choose to show only high stability `results`. + * + * - The specific `stability` and `confidence` values shown above are only for + * illustrative purposes. Actual values may vary. + * + * - In each response, only one of these fields will be set: + * `error`, + * `speech_event_type`, or + * one or more (repeated) `results`. + * + * @property {Object} error + * *Output-only* If set, returns a {@link google.rpc.Status} message that + * specifies the error for the operation. + * + * This object should have the same structure as [google.rpc.Status]{@link external:"google.rpc.Status"} + * + * @property {Object[]} results + * *Output-only* This repeated list contains zero or more results that + * correspond to consecutive portions of the audio currently being processed. + * It contains zero or one `is_final=true` result (the newly settled portion), + * followed by zero or more `is_final=false` results. + * + * This object should have the same structure as [StreamingRecognitionResult]{@link StreamingRecognitionResult} + * + * @property {number} speechEventType + * *Output-only* Indicates the type of speech event. + * + * The number should be among the values of [SpeechEventType]{@link SpeechEventType} + * + * @class + * @see [google.cloud.speech.v1.StreamingRecognizeResponse definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var StreamingRecognizeResponse = { + // This is for documentation. Actual contents will be loaded by gRPC. + + /** + * Indicates the type of speech event. + * + * @enum {number} + */ + SpeechEventType: { + + /** + * No speech event specified. + */ + SPEECH_EVENT_UNSPECIFIED: 0, + + /** + * This event indicates that the server has detected the end of the user's + * speech utterance and expects no additional speech. Therefore, the server + * will not process additional audio (although it may subsequently return + * additional results). The client should stop sending additional audio + * data, half-close the gRPC connection, and wait for any additional results + * until the server closes the gRPC connection. This event is only sent if + * `single_utterance` was set to `true`, and is not used otherwise. + */ + END_OF_SINGLE_UTTERANCE: 1 + } +}; + +/** + * A streaming speech recognition result corresponding to a portion of the audio + * that is currently being processed. + * + * @property {Object[]} alternatives + * *Output-only* May contain one or more recognition hypotheses (up to the + * maximum specified in `max_alternatives`). + * + * This object should have the same structure as [SpeechRecognitionAlternative]{@link SpeechRecognitionAlternative} + * + * @property {boolean} isFinal + * *Output-only* If `false`, this `StreamingRecognitionResult` represents an + * interim result that may change. If `true`, this is the final time the + * speech service will return this particular `StreamingRecognitionResult`, + * the recognizer will not return any further hypotheses for this portion of + * the transcript and corresponding audio. + * + * @property {number} stability + * *Output-only* An estimate of the likelihood that the recognizer will not + * change its guess about this interim result. Values range from 0.0 + * (completely unstable) to 1.0 (completely stable). + * This field is only provided for interim results (`is_final=false`). + * The default of 0.0 is a sentinel value indicating `stability` was not set. + * + * @class + * @see [google.cloud.speech.v1.StreamingRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var StreamingRecognitionResult = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * A speech recognition result corresponding to a portion of the audio. + * + * @property {Object[]} alternatives + * *Output-only* May contain one or more recognition hypotheses (up to the + * maximum specified in `max_alternatives`). + * + * This object should have the same structure as [SpeechRecognitionAlternative]{@link SpeechRecognitionAlternative} + * + * @class + * @see [google.cloud.speech.v1.SpeechRecognitionResult definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var SpeechRecognitionResult = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; + +/** + * Alternative hypotheses (a.k.a. n-best list). + * + * @property {string} transcript + * *Output-only* Transcript text representing the words that the user spoke. + * + * @property {number} confidence + * *Output-only* The confidence estimate between 0.0 and 1.0. A higher number + * indicates an estimated greater likelihood that the recognized words are + * correct. This field is typically provided only for the top hypothesis, and + * only for `is_final=true` results. Clients should not rely on the + * `confidence` field as it is not guaranteed to be accurate, or even set, in + * any of the results. + * The default of 0.0 is a sentinel value indicating `confidence` was not set. + * + * @class + * @see [google.cloud.speech.v1.SpeechRecognitionAlternative definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto} + */ +var SpeechRecognitionAlternative = { + // This is for documentation. Actual contents will be loaded by gRPC. +}; \ No newline at end of file diff --git a/packages/speech/src/v1/doc/doc_google_protobuf_any.js b/packages/speech/src/v1/doc/doc_google_protobuf_any.js new file mode 100644 index 00000000000..0697ec15814 --- /dev/null +++ b/packages/speech/src/v1/doc/doc_google_protobuf_any.js @@ -0,0 +1,121 @@ +/* + * Copyright 2017, Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Note: this file is purely for documentation. Any contents are not expected + * to be loaded as the JS file. + */ + +/** + * `Any` contains an arbitrary serialized protocol buffer message along with a + * URL that describes the type of the serialized message. + * + * Protobuf library provides support to pack/unpack Any values in the form + * of utility functions or additional generated methods of the Any type. + * + * Example 1: Pack and unpack a message in C++. + * + * Foo foo = ...; + * Any any; + * any.PackFrom(foo); + * ... + * if (any.UnpackTo(&foo)) { + * ... + * } + * + * Example 2: Pack and unpack a message in Java. + * + * Foo foo = ...; + * Any any = Any.pack(foo); + * ... + * if (any.is(Foo.class)) { + * foo = any.unpack(Foo.class); + * } + * + * Example 3: Pack and unpack a message in Python. + * + * foo = Foo(...) + * any = Any() + * any.Pack(foo) + * ... + * if any.Is(Foo.DESCRIPTOR): + * any.Unpack(foo) + * ... + * + * The pack methods provided by protobuf library will by default use + * 'type.googleapis.com/full.type.name' as the type URL and the unpack + * methods only use the fully qualified type name after the last '/' + * in the type URL, for example "foo.bar.com/x/y.z" will yield type + * name "y.z". + * + * + * # JSON + * + * The JSON representation of an `Any` value uses the regular + * representation of the deserialized, embedded message, with an + * additional field `@type` which contains the type URL. Example: + * + * package google.profile; + * message Person { + * string first_name = 1; + * string last_name = 2; + * } + * + * { + * "@type": "type.googleapis.com/google.profile.Person", + * "firstName": , + * "lastName": + * } + * + * If the embedded message type is well-known and has a custom JSON + * representation, that representation will be embedded adding a field + * `value` which holds the custom JSON in addition to the `@type` + * field. Example (for message {@link google.protobuf.Duration}): + * + * { + * "@type": "type.googleapis.com/google.protobuf.Duration", + * "value": "1.212s" + * } + * + * @external "google.protobuf.Any" + * @property {string} typeUrl + * A URL/resource name whose content describes the type of the + * serialized protocol buffer message. + * + * For URLs which use the scheme `http`, `https`, or no scheme, the + * following restrictions and interpretations apply: + * + * * If no scheme is provided, `https` is assumed. + * * The last segment of the URL's path must represent the fully + * qualified name of the type (as in `path/google.protobuf.Duration`). + * The name should be in a canonical form (e.g., leading "." is + * not accepted). + * * An HTTP GET on the URL must yield a {@link google.protobuf.Type} + * value in binary format, or produce an error. + * * Applications are allowed to cache lookup results based on the + * URL, or have them precompiled into a binary to avoid any + * lookup. Therefore, binary compatibility needs to be preserved + * on changes to types. (Use versioned type names to manage + * breaking changes.) + * + * Schemes other than `http`, `https` (or the empty scheme) might be + * used with implementation specific semantics. + * + * @property {string} value + * Must be a valid serialized protocol buffer of the above specified type. + * + * @see [google.protobuf.Any definition in proto format]{@link https://github.com/google/protobuf/blob/master/src/google/protobuf/any.proto} + */ \ No newline at end of file diff --git a/packages/speech/src/v1/doc/doc_google_rpc_status.js b/packages/speech/src/v1/doc/doc_google_rpc_status.js new file mode 100644 index 00000000000..c85f1befe90 --- /dev/null +++ b/packages/speech/src/v1/doc/doc_google_rpc_status.js @@ -0,0 +1,92 @@ +/* + * Copyright 2017, Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Note: this file is purely for documentation. Any contents are not expected + * to be loaded as the JS file. + */ + +/** + * The `Status` type defines a logical error model that is suitable for different + * programming environments, including REST APIs and RPC APIs. It is used by + * [gRPC](https://github.com/grpc). The error model is designed to be: + * + * - Simple to use and understand for most users + * - Flexible enough to meet unexpected needs + * + * # Overview + * + * The `Status` message contains three pieces of data: error code, error message, + * and error details. The error code should be an enum value of + * {@link google.rpc.Code}, but it may accept additional error codes if needed. The + * error message should be a developer-facing English message that helps + * developers *understand* and *resolve* the error. If a localized user-facing + * error message is needed, put the localized message in the error details or + * localize it in the client. The optional error details may contain arbitrary + * information about the error. There is a predefined set of error detail types + * in the package `google.rpc` which can be used for common error conditions. + * + * # Language mapping + * + * The `Status` message is the logical representation of the error model, but it + * is not necessarily the actual wire format. When the `Status` message is + * exposed in different client libraries and different wire protocols, it can be + * mapped differently. For example, it will likely be mapped to some exceptions + * in Java, but more likely mapped to some error codes in C. + * + * # Other uses + * + * The error model and the `Status` message can be used in a variety of + * environments, either with or without APIs, to provide a + * consistent developer experience across different environments. + * + * Example uses of this error model include: + * + * - Partial errors. If a service needs to return partial errors to the client, + * it may embed the `Status` in the normal response to indicate the partial + * errors. + * + * - Workflow errors. A typical workflow has multiple steps. Each step may + * have a `Status` message for error reporting purpose. + * + * - Batch operations. If a client uses batch request and batch response, the + * `Status` message should be used directly inside batch response, one for + * each error sub-response. + * + * - Asynchronous operations. If an API call embeds asynchronous operation + * results in its response, the status of those operations should be + * represented directly using the `Status` message. + * + * - Logging. If some API errors are stored in logs, the message `Status` could + * be used directly after any stripping needed for security/privacy reasons. + * + * @external "google.rpc.Status" + * @property {number} code + * The status code, which should be an enum value of {@link google.rpc.Code}. + * + * @property {string} message + * A developer-facing error message, which should be in English. Any + * user-facing error message should be localized and sent in the + * {@link google.rpc.Status.details} field, or localized by the client. + * + * @property {Object[]} details + * A list of messages that carry the error details. There will be a + * common set of message types for APIs to use. + * + * This object should have the same structure as [google.protobuf.Any]{@link external:"google.protobuf.Any"} + * + * @see [google.rpc.Status definition in proto format]{@link https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto} + */ \ No newline at end of file diff --git a/packages/speech/src/v1/index.js b/packages/speech/src/v1/index.js index 65030f74032..0abd7e3dada 100644 --- a/packages/speech/src/v1/index.js +++ b/packages/speech/src/v1/index.js @@ -1,11 +1,11 @@ /* - * Copyright 2016 Google Inc. All rights reserved. + * Copyright 2017, Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -27,7 +27,8 @@ function v1(options) { return speechClient(gaxGrpc); } +v1.GAPIC_VERSION = '0.7.1'; v1.SERVICE_ADDRESS = speechClient.SERVICE_ADDRESS; v1.ALL_SCOPES = speechClient.ALL_SCOPES; -module.exports = v1; +module.exports = v1; \ No newline at end of file diff --git a/packages/speech/src/v1/speech_client.js b/packages/speech/src/v1/speech_client.js index f6975908973..bb3096b41ef 100644 --- a/packages/speech/src/v1/speech_client.js +++ b/packages/speech/src/v1/speech_client.js @@ -52,15 +52,6 @@ var ALL_SCOPES = [ /** * Service that implements Google Cloud Speech API. * - * This will be created through a builder function which can be obtained by the module. - * See the following example of how to initialize the module and how to access to the builder. - * @see {@link speechClient} - * - * @example - * var speechV1 = require('@google-cloud/speech').v1({ - * // optional auth parameters. - * }); - * var client = speechV1.speechClient(); * * @class */ @@ -165,8 +156,13 @@ SpeechClient.prototype.getProjectId = function(callback) { * * @example * - * var client = speechV1.speechClient(); - * var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + * var speech = require('@google-cloud/speech'); + * + * var client = speech.v1({ + * // optional auth parameters. + * }); + * + * var encoding = speech.v1.types.RecognitionConfig.AudioEncoding.FLAC; * var sampleRateHertz = 44100; * var languageCode = 'en-US'; * var config = { @@ -185,7 +181,8 @@ SpeechClient.prototype.getProjectId = function(callback) { * client.recognize(request).then(function(responses) { * var response = responses[0]; * // doThingsWith(response) - * }).catch(function(err) { + * }) + * .catch(function(err) { * console.error(err); * }); */ @@ -231,8 +228,13 @@ SpeechClient.prototype.recognize = function(request, options, callback) { * * @example * - * var client = speechV1.speechClient(); - * var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + * var speech = require('@google-cloud/speech'); + * + * var client = speech.v1({ + * // optional auth parameters. + * }); + * + * var encoding = speech.v1.types.RecognitionConfig.AudioEncoding.FLAC; * var sampleRateHertz = 44100; * var languageCode = 'en-US'; * var config = { @@ -265,7 +267,8 @@ SpeechClient.prototype.recognize = function(request, options, callback) { * * // The response of the api call returning the complete operation. * var finalApiResponse = responses[2]; - * }).catch(function(err) { + * }) + * .catch(function(err) { * console.error(err); * }); * @@ -290,7 +293,8 @@ SpeechClient.prototype.recognize = function(request, options, callback) { * operation.on('error', function(err) { * // throw(err); * }) - * }).catch(function(err) { + * }) + * .catch(function(err) { * console.error(err); * }); */ @@ -320,9 +324,14 @@ SpeechClient.prototype.longRunningRecognize = function(request, options, callbac * * @example * - * var client = speechV1.speechClient(); + * var speech = require('@google-cloud/speech'); + * + * var client = speech.v1({ + * // optional auth parameters. + * }); + * * var stream = client.streamingRecognize().on('data', function(response) { - * // doThingsWith(response); + * // doThingsWith(response) * }); * var request = {}; * // Write request objects. @@ -369,4 +378,4 @@ function SpeechClientBuilder(gaxGrpc) { } module.exports = SpeechClientBuilder; module.exports.SERVICE_ADDRESS = SERVICE_ADDRESS; -module.exports.ALL_SCOPES = ALL_SCOPES; \ No newline at end of file +module.exports.ALL_SCOPES = ALL_SCOPES; diff --git a/packages/speech/system-test/data/bridge.raw b/packages/speech/system-test/data/bridge.raw deleted file mode 100644 index 5ebf79d3c9c..00000000000 Binary files a/packages/speech/system-test/data/bridge.raw and /dev/null differ diff --git a/packages/speech/system-test/data/quit.raw b/packages/speech/system-test/data/quit.raw deleted file mode 100644 index a01dfc45a59..00000000000 Binary files a/packages/speech/system-test/data/quit.raw and /dev/null differ diff --git a/packages/speech/system-test/data/spain.raw b/packages/speech/system-test/data/spain.raw deleted file mode 100644 index 35413b78817..00000000000 Binary files a/packages/speech/system-test/data/spain.raw and /dev/null differ diff --git a/packages/speech/system-test/speech.js b/packages/speech/system-test/speech.js deleted file mode 100644 index f5856aa602a..00000000000 --- a/packages/speech/system-test/speech.js +++ /dev/null @@ -1,328 +0,0 @@ -/*! - * Copyright 2016 Google Inc. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -'use strict'; - -var assert = require('assert'); -var async = require('async'); -var exec = require('methmeth'); -var extend = require('extend'); -var fs = require('fs'); -var path = require('path'); -var uuid = require('uuid'); - -var env = require('../../../system-test/env.js'); -var Speech = require('../'); -var storage = require('@google-cloud/storage')(env); - -var FILENAMES = ['bridge', 'spain', 'quit']; -var AUDIO_FILES = {}; -var BUCKET_NAME = 'gcloud-test-bucket-temp-' + uuid.v1(); - -FILENAMES.forEach(function(filename) { - var name = filename + '.raw'; - - AUDIO_FILES[filename] = { - name: name, - path: path.join(__dirname, 'data/' + name), - gcsUri: 'gs://' + BUCKET_NAME + '/' + name, - httpUri: 'https://storage.googleapis.com/' + BUCKET_NAME + '/' + name - }; -}); - -describe('Speech', function() { - var speech = new Speech(env); - var bucket = storage.bucket(BUCKET_NAME); - - var OPTIONS = { - encoding: 'LINEAR16', - languageCode: 'en-US', - sampleRateHertz: 16000 - }; - - var OPTIONS_VERBOSE = extend({}, OPTIONS, { - verbose: true - }); - - var TRANSCRIPTION = 'how old is the Brooklyn Bridge'; - - before(function(done) { - async.waterfall([ - function(next) { - bucket.create(next); - }, - - function(_, apiResponse, next) { - async.map(FILENAMES, function(filename, onComplete) { - fs.readFile(AUDIO_FILES[filename].path, onComplete); - }, next); - }, - - function(files, next) { - FILENAMES.forEach(function(filename, i) { - AUDIO_FILES[filename].content = files[i]; - }); - - async.map(FILENAMES, function(filename, onComplete) { - var file = bucket.file(AUDIO_FILES[filename].name); - - file.save(AUDIO_FILES[filename].content, function(err) { - onComplete(err, file); - }); - }, next); - }, - - function(files, next) { - async.map(files, exec('makePublic'), next); - } - ], done); - }); - - after(function(done) { - bucket.deleteFiles({ - force: true - }, function(err) { - if (err) { - done(err); - return; - } - - bucket.delete(done); - }); - }); - - describe('recognize', function() { - it('recognizes speech from raw audio', function(done) { - fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { - assert.ifError(err); - - speech.recognize({ - content: audioFile, - }, OPTIONS, assertSimplifiedResponse(done)); - }); - }); - - it('recognizes speech in verbose mode', function(done) { - fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { - assert.ifError(err); - - speech.recognize({ - content: audioFile, - }, OPTIONS_VERBOSE, assertVerboseResponse(done)); - }); - }); - - it('recognizes speech from local file', function(done) { - speech.recognize(AUDIO_FILES.bridge.path, { - // encoding should be automatically detected - languageCode: 'en-US', - sampleRateHertz: 16000 - }, assertSimplifiedResponse(done)); - }); - - it('recognizes speech from remote GCS audio file', function(done) { - var uri = AUDIO_FILES.bridge.gcsUri; - - speech.recognize(uri, OPTIONS, assertSimplifiedResponse(done)); - }); - - it('recognizes speech from remote audio file', function(done) { - var uri = AUDIO_FILES.bridge.httpUri; - - speech.recognize(uri, OPTIONS, assertSimplifiedResponse(done)); - }); - }); - - describe('startRecognition', function() { - it('recognizes speech from raw audio', function(done) { - fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { - assert.ifError(err); - - speech.startRecognition({ - content: audioFile - }, OPTIONS, function(err, operation) { - assert.ifError(err); - - operation - .on('error', done) - .on('complete', assertSimplifiedResponseOperation(done)); - }); - }); - }); - - it('recognizes speech from raw audio in verbose mode', function(done) { - fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { - assert.ifError(err); - - speech.startRecognition({ - content: audioFile - }, OPTIONS_VERBOSE, function(err, operation) { - assert.ifError(err); - - operation - .on('error', done) - .on('complete', assertVerboseResponseOperation(done)); - }); - }); - }); - - it('recognizes speech from local file', function(done) { - var options = { - // encoding should be automatically detected - languageCode: 'en-US', - sampleRateHertz: 16000 - }; - - var path = AUDIO_FILES.bridge.path; - - speech.startRecognition(path, options, function(err, operation) { - assert.ifError(err); - - operation - .on('error', done) - .on('complete', assertSimplifiedResponseOperation(done)); - }); - }); - - it('recognizes speech from remote GCS audio file', function(done) { - var uri = AUDIO_FILES.bridge.gcsUri; - - speech.startRecognition(uri, OPTIONS, function(err, operation) { - assert.ifError(err); - - operation - .on('error', done) - .on('complete', assertSimplifiedResponseOperation(done)); - }); - }); - - it('recognizes speech from remote audio file', function(done) { - var uri = AUDIO_FILES.bridge.httpUri; - - speech.startRecognition(uri, OPTIONS, function(err, operation) { - assert.ifError(err); - - operation - .on('error', done) - .on('complete', assertSimplifiedResponseOperation(done)); - }); - }); - - it('runs operation as a promise', function() { - var uri = AUDIO_FILES.bridge.httpUri; - - return speech.startRecognition(uri, OPTIONS) - .then(function(response) { - var operation = response[0]; - return operation.promise(); - }); - }); - }); - - describe('createRecognizeStream', function() { - it('recognizes speech from raw audio', function(done) { - var transcribed = false; - var responseEmitted = false; - - fs.createReadStream(AUDIO_FILES.bridge.path) - .on('error', done) - .pipe(speech.createRecognizeStream({ - config: OPTIONS, - interimResults: false, - singleUtterance: false - })) - .on('error', done) - .on('response', function() { - responseEmitted = true; - }) - .on('data', function(data) { - if (data.speechEventType === 'SPEECH_EVENT_UNSPECIFIED') { - if (data.results === TRANSCRIPTION) { - transcribed = true; - } - } - }) - .on('end', function() { - setTimeout(function() { - assert.strictEqual(responseEmitted, true); - assert.strictEqual(transcribed, true); - done(); - }, 1500); - }); - }); - - it('recognizes speech from raw audio in verbose mode', function(done) { - var transcribed = false; - var responseEmitted = false; - - fs.createReadStream(AUDIO_FILES.bridge.path) - .on('error', done) - .pipe(speech.createRecognizeStream({ - config: OPTIONS, - interimResults: false, - singleUtterance: false, - verbose: true - })) - .on('error', done) - .on('response', function() { - responseEmitted = true; - }) - .on('data', function(data) { - if (data.speechEventType === 'SPEECH_EVENT_UNSPECIFIED') { - if (data.results[0].transcript === TRANSCRIPTION) { - transcribed = true; - } - } - }) - .on('end', function() { - setTimeout(function() { - assert.strictEqual(responseEmitted, true); - assert.strictEqual(transcribed, true); - done(); - }, 1500); - }); - }); - }); - - function assertSimplifiedResponse(done) { - return function(err, transcript) { - assert.ifError(err); - assert.strictEqual(transcript, TRANSCRIPTION); - done(); - }; - } - - function assertVerboseResponse(done) { - return function(err, results) { - assert.ifError(err); - - assert(results.length > 0); - - var transcript = results[0].transcript; - assert.strictEqual(transcript, TRANSCRIPTION); - - done(); - }; - } - - function assertSimplifiedResponseOperation(done) { - return assertSimplifiedResponse(done).bind(null, null); - } - - function assertVerboseResponseOperation(done) { - return assertVerboseResponse(done).bind(null, null); - } -}); diff --git a/packages/speech/test/v1/v1.js b/packages/speech/test/gapic-v1.test.js similarity index 77% rename from packages/speech/test/v1/v1.js rename to packages/speech/test/gapic-v1.test.js index 3da63317a9e..2e37b7ff538 100644 --- a/packages/speech/test/v1/v1.js +++ b/packages/speech/test/gapic-v1.test.js @@ -1,11 +1,11 @@ /* - * Copyright 2016 Google Inc. All rights reserved. + * Copyright 2017, Google Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +'use strict'; var assert = require('assert'); var speechV1 = require('../src/v1')(); @@ -27,8 +28,18 @@ describe('SpeechClient', function() { it('invokes recognize without error', function(done) { var client = speechV1.speechClient(); // Mock request - var config = {}; - var audio = {}; + var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding : encoding, + sampleRateHertz : sampleRateHertz, + languageCode : languageCode + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri : uri + }; var request = { config : config, audio : audio @@ -50,8 +61,18 @@ describe('SpeechClient', function() { it('invokes recognize with error', function(done) { var client = speechV1.speechClient(); // Mock request - var config = {}; - var audio = {}; + var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding : encoding, + sampleRateHertz : sampleRateHertz, + languageCode : languageCode + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri : uri + }; var request = { config : config, audio : audio @@ -72,8 +93,18 @@ describe('SpeechClient', function() { it('invokes longRunningRecognize without error', function(done) { var client = speechV1.speechClient(); // Mock request - var config = {}; - var audio = {}; + var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding : encoding, + sampleRateHertz : sampleRateHertz, + languageCode : languageCode + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri : uri + }; var request = { config : config, audio : audio @@ -99,8 +130,18 @@ describe('SpeechClient', function() { it('invokes longRunningRecognize with error', function(done) { var client = speechV1.speechClient(); // Mock request - var config = {}; - var audio = {}; + var encoding = speechV1.RecognitionConfig.AudioEncoding.FLAC; + var sampleRateHertz = 44100; + var languageCode = 'en-US'; + var config = { + encoding : encoding, + sampleRateHertz : sampleRateHertz, + languageCode : languageCode + }; + var uri = 'gs://bucket_name/file_name.flac'; + var audio = { + uri : uri + }; var request = { config : config, audio : audio @@ -200,7 +241,7 @@ function mockLongRunningGrpcMethod(expectedRequest, response, error) { promise: function() { return new Promise(function(resolve, reject) { if (error) { - reject(error) + reject(error); } else { resolve([response]); } diff --git a/packages/speech/test/helpers.test.js b/packages/speech/test/helpers.test.js new file mode 100644 index 00000000000..7064630d557 --- /dev/null +++ b/packages/speech/test/helpers.test.js @@ -0,0 +1,54 @@ +/*! + * Copyright 2017 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var sinon = require('sinon'); +var stream = require('stream'); + +var Speech = require('../'); + + +describe('Speech helper methods', () => { + var sandbox = sinon.sandbox.create(); + + afterEach(() => { + sandbox.restore(); + }); + + describe('streamingRecognize', () => { + it('writes the config to the resulting stream', () => { + var speech = Speech.v1(); + + // Stub the underlying _streamingRecognize method to just return + // a bogus stream. + var writable = stream.Writable(); + var sr = sandbox.stub(speech, '_streamingRecognize').returns(writable); + + // Call the new helper method and establish that the config was + // forwarded as expected. + var config = {config: {languageCode: 'en-us'}}; + var options = {timeout: Infinity}; + speech.streamingRecognize(config, options); + + // Establish that the underlying streamingRecognize was called with + // the options. + assert(sr.calledOnce); + assert(sr.calledWithExactly(options)); + }); + }); +}); diff --git a/packages/speech/test/index.js b/packages/speech/test/index.js deleted file mode 100644 index f89c5cd4421..00000000000 --- a/packages/speech/test/index.js +++ /dev/null @@ -1,1218 +0,0 @@ -/** - * Copyright 2016 Google Inc. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -'use strict'; - -var assert = require('assert'); -var async = require('async'); -var extend = require('extend'); -var fs = require('fs'); -var googleProtoFiles = require('google-proto-files'); -var proxyquire = require('proxyquire'); -var through = require('through2'); -var tmp = require('tmp'); - -var util = require('@google-cloud/common').util; - -var promisified = false; -var fakeUtil = extend({}, util, { - promisifyAll: function(Class, options) { - if (Class.name !== 'Speech') { - return; - } - - promisified = true; - assert.deepEqual(options.exclude, ['operation']); - } -}); - -function FakeGrpcOperation() { - this.calledWith_ = arguments; -} - -function FakeGrpcService() { - this.calledWith_ = arguments; -} - -var fakeV1Override; -function fakeV1() { - if (fakeV1Override) { - return fakeV1Override.apply(null, arguments); - } - - return { - speechClient: util.noop - }; -} - -var requestOverride = null; -var fakeRequest = function() { - return (requestOverride || util.noop).apply(this, arguments); -}; - -describe('Speech', function() { - var OPTIONS = { - projectId: 'project-id' - }; - - var Speech; - var speech; - - var originalStaticMembers; - - before(function() { - Speech = proxyquire('../', { - '@google-cloud/common': { - util: fakeUtil - }, - '@google-cloud/common-grpc': { - Operation: FakeGrpcOperation, - Service: FakeGrpcService - }, - request: fakeRequest, - './v1': fakeV1 - }); - - originalStaticMembers = Object.keys(Speech).reduce(function(statics, key) { - statics[key] = Speech[key]; - return statics; - }, {}); - }); - - beforeEach(function() { - fakeV1Override = null; - requestOverride = null; - - speech = new Speech(OPTIONS); - - extend(Speech, originalStaticMembers); - }); - - describe('instantiation', function() { - it('should promisify all the things', function() { - assert(promisified); - }); - - it('should normalize the arguments', function() { - var normalizeArguments = fakeUtil.normalizeArguments; - var normalizeArgumentsCalled = false; - var fakeOptions = { projectId: OPTIONS.projectId }; - var fakeContext = {}; - - fakeUtil.normalizeArguments = function(context, options) { - normalizeArgumentsCalled = true; - assert.strictEqual(context, fakeContext); - assert.strictEqual(options, fakeOptions); - return options; - }; - - Speech.call(fakeContext, fakeOptions); - assert(normalizeArgumentsCalled); - - fakeUtil.normalizeArguments = normalizeArguments; - }); - - it('should create a gax api client', function() { - var expectedSpeechService = {}; - - fakeV1Override = function(options) { - var expected = extend({}, OPTIONS, { - libName: 'gccl', - libVersion: require('../package.json').version - }); - assert.deepStrictEqual(options, expected); - - return { - speechClient: function(options) { - assert.deepStrictEqual(options, expected); - return expectedSpeechService; - } - }; - }; - - var speech = new Speech(OPTIONS); - - assert.deepEqual(speech.api, { - Speech: expectedSpeechService - }); - }); - - it('should inherit from GrpcService', function() { - assert(speech instanceof FakeGrpcService); - - var calledWith = speech.calledWith_[0]; - - assert.deepEqual(calledWith, { - baseUrl: 'speech.googleapis.com', - projectIdRequired: false, - service: 'speech', - protoServices: { - Operations: { - path: googleProtoFiles('longrunning', 'operations.proto'), - service: 'longrunning' - } - }, - scopes: [ - 'https://www.googleapis.com/auth/cloud-platform' - ], - packageJson: require('../package.json') - }); - }); - }); - - describe('eventTypes', function() { - var EVENT_TYPES = { - END_OF_SINGLE_UTTERANCE: 'END_OF_SINGLE_UTTERANCE', - ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED' - }; - - it('should export static eventTypes', function() { - assert.deepEqual(Speech.eventTypes, EVENT_TYPES); - }); - - it('should export instance eventTypes', function() { - assert.deepEqual(speech.eventTypes, EVENT_TYPES); - }); - }); - - describe('detectEncoding_', function() { - it('should detect encoding', function() { - assert.equal(Speech.detectEncoding_('foo.raw'), 'LINEAR16'); - assert.equal(Speech.detectEncoding_('foo.amr'), 'AMR'); - assert.equal(Speech.detectEncoding_('foo.awb'), 'AMR_WB'); - assert.equal(Speech.detectEncoding_('foo.flac'), 'FLAC'); - assert.equal(Speech.detectEncoding_('foo.fLAc'), 'FLAC'); - assert.equal(Speech.detectEncoding_('foo.wav'), 'MULAW'); - assert.equal(Speech.detectEncoding_('foo.au'), 'MULAW'); - }); - - it('should throw if a supported encoding is not detected', function() { - assert.throws(function() { - Speech.detectEncoding_('blah.mp3'); - }, /Encoding could not be determined for file: blah\.mp3/); - }); - - it('should return nothing if the argument is not a string', function() { - assert.strictEqual(Speech.detectEncoding_({}), undefined); - }); - }); - - describe('findFile_', function() { - it('should return buffer for snippet sandbox', function(done) { - global.GCLOUD_SANDBOX_ENV = true; - - Speech.findFile_({}, function(err, foundFile) { - delete global.GCLOUD_SANDBOX_ENV; - assert.ifError(err); - - assert.deepEqual(foundFile, { - content: new Buffer('') - }); - - done(); - }); - }); - - it('should convert a File object', function(done) { - var file = { - bucket: { - name: 'bucket-name' - }, - name: 'file-name' - }; - - var isCustomTypeCalled = false; - var isCustomType = fakeUtil.isCustomType; - - fakeUtil.isCustomType = function(obj, module) { - isCustomTypeCalled = true; - fakeUtil.isCustomType = isCustomType; - assert.strictEqual(obj, file); - assert.strictEqual(module, 'storage/file'); - return true; - }; - - Speech.findFile_(file, function(err, foundFile) { - assert.ifError(err); - - assert.deepEqual(foundFile, { - uri: 'gs://' + file.bucket.name + '/' + file.name - }); - - assert.strictEqual(isCustomTypeCalled, true); - - done(); - }); - }); - - it('should detect a gs:// path', function(done) { - var file = 'gs://your-bucket-name/audio.raw'; - - Speech.findFile_(file, function(err, foundFile) { - assert.ifError(err); - - assert.deepEqual(foundFile, { - uri: file - }); - - done(); - }); - }); - - it('should get a file from a URL', function(done) { - var fileUri = 'http://www.google.com/audio.raw'; - var body = 'body'; - - requestOverride = function(reqOpts, callback) { - assert.strictEqual(reqOpts.uri, fileUri); - assert.strictEqual(reqOpts.encoding, null); - - var response = { - body: new Buffer(body) - }; - - callback(null, response, response.body); - }; - - Speech.findFile_(fileUri, function(err, foundFile) { - assert.ifError(err); - assert.deepEqual(foundFile, { - content: new Buffer(body) - }); - done(); - }); - }); - - it('should return an error from reading a URL', function(done) { - var fileUri = 'http://www.google.com/audio.raw'; - var error = new Error('Error.'); - - requestOverride = function(options, callback) { - callback(error); - }; - - Speech.findFile_(fileUri, function(err) { - assert.strictEqual(err, error); - done(); - }); - }); - - it('should accept a buffer', function(done) { - var file = new Buffer('abc'); - - Speech.findFile_(file, function(err, foundFile) { - assert.ifError(err); - - assert.deepEqual(foundFile, { - content: file - }); - - done(); - }); - }); - - it('should validate RecognitionAudio object', function(done) { - var file = {}; - - Speech.findFile_(file, function(err) { - assert.strictEqual( - err.message, - 'RecognitionAudio requires a "content" or "uri" property.' - ); - - done(); - }); - }); - - it('should accept RecognitionAudio object', function(done) { - var file = { - content: 'aGk=' - }; - - Speech.findFile_(file, function(err, foundFile) { - assert.ifError(err); - assert.strictEqual(foundFile, file); - done(); - }); - }); - - it('should read from a file path', function(done) { - tmp.setGracefulCleanup(); - - tmp.file(function tempFileCreated_(err, tmpFilePath) { - assert.ifError(err); - - var contents = 'abcdef'; - - function writeFile(callback) { - fs.writeFile(tmpFilePath, contents, callback); - } - - function convertFile(callback) { - Speech.findFile_(tmpFilePath, callback); - } - - async.waterfall([writeFile, convertFile], function(err, foundFile) { - assert.ifError(err); - - assert.deepEqual(foundFile, { - content: new Buffer(contents) - }); - - done(); - }); - }); - }); - - it('should return an error when file cannot be found', function(done) { - Speech.findFile_('./not-real-file.raw', function(err) { - assert.strictEqual(err.code, 'ENOENT'); - done(); - }); - }); - }); - - describe('formatResults_', function() { - describe('SpeechRecognitionResult', function() { - var SPEECH_RECOGNITION = { - original: [ - { - alternatives: [ - { - transcript: 'Result 1a', - confidence: 0.70, - stability: 0.1 - }, - { - transcript: 'Result 1b', - confidence: 0.60, - stability: 0.1 - } - ] - }, - { - alternatives: [ - { - transcript: 'Result 2a', - confidence: 0.90, - stability: 0.1 - }, - { - transcript: 'Result 2b', - confidence: 0.80, - stability: 0.1 - } - ] - } - ], - - expectedDefault: 'Result 1a Result 2a', - - expectedVerbose: [ - { - transcript: 'Result 1a', - confidence: 70, - stability: 10, - alternatives: [ - { - transcript: 'Result 1b', - confidence: 60, - stability: 10, - } - ] - }, - { - transcript: 'Result 2a', - confidence: 90, - stability: 10, - alternatives: [ - { - transcript: 'Result 2b', - confidence: 80, - stability: 10 - } - ] - } - ] - }; - - it('should simplify the results', function() { - assert.deepEqual( - Speech.formatResults_(SPEECH_RECOGNITION.original), - SPEECH_RECOGNITION.expectedDefault - ); - }); - - it('should simplify the results in verbose mode', function() { - assert.deepEqual( - Speech.formatResults_(SPEECH_RECOGNITION.original, true), - SPEECH_RECOGNITION.expectedVerbose - ); - }); - }); - }); - - describe('createRecognizeStream', function() { - var CONFIG = { - languageCode: 'en-US' - }; - var stream; - var requestStream; - - beforeEach(function() { - stream = speech.createRecognizeStream(CONFIG); - - stream.setPipeline = util.noop; - - speech.api.Speech = { - streamingRecognize: function() { - requestStream = through.obj(); - return requestStream; - } - }; - }); - - it('should throw if an object is not provided', function() { - assert.throws(function() { - speech.createRecognizeStream(); - }, /A recognize request requires a configuration object\./); - }); - - it('should throw if a language code is not provided', function() { - assert.throws(function() { - speech.createRecognizeStream({}); - }, /languageCode/); - }); - - it('should make the correct request once writing started', function(done) { - speech.api.Speech = { - streamingRecognize: function() { - setImmediate(done); - return through.obj(); - } - }; - - stream.emit('writing'); - }); - - it('should destroy user stream if request stream errors', function(done) { - var error = new Error('Error.'); - - stream.on('error', function(err) { - assert.strictEqual(error, err); - done(); - }); - - speech.api.Speech = { - streamingRecognize: function() { - var requestStream = through.obj(); - - setImmediate(function() { - requestStream.destroy(error); - }); - - return requestStream; - } - }; - - stream.emit('writing'); - }); - - it('should emit the response event on the user stream', function(done) { - var response = {}; - - stream.on('response', function(response_) { - assert.strictEqual(response_, response); - done(); - }); - - speech.api.Speech = { - streamingRecognize: function() { - var requestStream = through.obj(); - - setImmediate(function() { - requestStream.emit('response', response); - }); - - return requestStream; - } - }; - - stream.emit('writing'); - }); - - it('should send the initial write to the request stream', function(done) { - speech.api.Speech = { - streamingRecognize: function() { - var requestStream = through.obj(); - - requestStream.once('data', function(data) { - var expected = extend(true, { - config: { - languageCode: 'en-US' - } - }, CONFIG); - delete expected.languageCode; - - assert.deepEqual(data, { - streamingConfig: expected - }); - done(); - }); - - return requestStream; - } - }; - - stream.emit('writing'); - }); - - it('should format the incoming data into a duplex stream', function(done) { - stream.setPipeline = function(streams) { - var formatStream = streams[0]; - assert.strictEqual(streams[1], requestStream); - - var chunk = {}; - formatStream.once('data', function(data) { - assert.deepEqual(data, { - audioContent: chunk - }); - done(); - }); - - formatStream.end(chunk); - }; - - stream.emit('writing'); - }); - - it('should format the results from the API', function(done) { - stream.setPipeline = function(streams) { - var formatStream = streams[2]; - - var streamingRecognizeResponse = { - results: [] - }; - - var formattedResults = []; - - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(results, streamingRecognizeResponse.results); - assert.strictEqual(verboseMode, false); - return formattedResults; - }; - - formatStream.once('data', function(data) { - assert.strictEqual(data, streamingRecognizeResponse); - assert.deepEqual(data.results, formattedResults); - done(); - }); - - formatStream.end(streamingRecognizeResponse); - }; - - stream.emit('writing'); - }); - - it('should format results from the API in verbose mode', function(done) { - var stream = speech.createRecognizeStream({ - languageCode: 'en-US', - verbose: true - }); - - speech.requestWritableStream = function() { - return through.obj(); - }; - - stream.setPipeline = function(streams) { - var formatStream = streams[2]; - - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(verboseMode, true); - done(); - }; - - formatStream.end({}); - }; - - stream.emit('writing'); - }); - - it('should delete verbose option from request object', function(done) { - var stream = speech.createRecognizeStream({ - languageCode: 'en-US', - verbose: true - }); - - speech.api.Speech = { - streamingRecognize: function() { - var stream = through.obj(); - - stream.on('data', function(data) { - assert.strictEqual(data.streamingConfig.verbose, undefined); - done(); - }); - - return stream; - } - }; - - stream.emit('writing'); - }); - - it('should allow specifying a timeout', function(done) { - var timeout = 200; - var expectedTimeout = 200 * 1000; - - speech.api.Speech = { - streamingRecognize: function(opts) { - var requestStream = through.obj(); - requestStream._write = util.noop; - - assert.strictEqual(opts.timeout, expectedTimeout); - setImmediate(done); - - return requestStream; - } - }; - - var stream = speech.createRecognizeStream({ - languageCode: 'en-US', - timeout: timeout - }); - - stream.emit('writing'); - }); - - it('should delete timeout option from request object', function(done) { - speech.api.Speech = { - streamingRecognize: function() { - var stream = through.obj(); - - stream.on('data', function(data) { - assert.strictEqual(data.streamingConfig.timeout, undefined); - done(); - }); - - return stream; - } - }; - - var stream = speech.createRecognizeStream({ - languageCode: 'en-US', - timeout: 90 - }); - - stream.emit('writing'); - }); - - it('should allow specifying a languageCode', function(done) { - var languageCode = 'uk'; - - speech.api.Speech = { - streamingRecognize: function() { - var stream = through.obj(); - - stream.on('data', function(data) { - assert.strictEqual( - data.streamingConfig.config.languageCode, - languageCode - ); - done(); - }); - - return stream; - } - }; - - var stream = speech.createRecognizeStream({ - config: { - languageCode: languageCode - } - }); - - stream.emit('writing'); - }); - }); - - describe('operation', function() { - var NAME = 'op-name'; - - it('should throw if a name is not provided', function() { - assert.throws(function() { - speech.operation(); - }, /A name must be specified for an operation\./); - }); - - it('should return an Operation object', function() { - var operation = speech.operation(NAME); - assert(operation instanceof FakeGrpcOperation); - assert.strictEqual(operation.calledWith_[0], speech); - assert.strictEqual(operation.calledWith_[1], NAME); - }); - }); - - describe('recognize', function() { - var FILE = {}; - var FOUND_FILE = {}; - var CONFIG = { - a: 'b', - languageCode: 'en-US', - }; - var DETECTED_ENCODING = 'LINEAR16'; - - beforeEach(function() { - Speech.detectEncoding_ = function() { - return DETECTED_ENCODING; - }; - - Speech.findFile_ = function(files, callback) { - callback(null, FOUND_FILE); - }; - - speech.api.Speech = { - recognize: util.noop - }; - }); - - it('should throw if an object is not provided', function() { - assert.throws(function() { - speech.recognize(FILE, assert.ifError); - }, /A recognize request requires a configuration object\./); - }); - - it('should find the files', function(done) { - Speech.findFile_ = function(file) { - assert.strictEqual(file, FILE); - done(); - }; - - speech.recognize(FILE, CONFIG, assert.ifError); - }); - - it('should make the correct request', function(done) { - speech.api.Speech = { - recognize: function(reqOpts) { - var expectedConfig = extend({ - encoding: DETECTED_ENCODING, - languageCode: 'en-US' - }, CONFIG); - - assert.deepEqual(reqOpts.config, expectedConfig); - assert.strictEqual(reqOpts.audio, FOUND_FILE); - - done(); - } - }; - - speech.recognize(FILE, CONFIG, assert.ifError); - }); - - it('should fail if no language code is set', function() { - assert.throws(function() { - speech.recognize(FILE, {}); - }, /languageCode/); - }); - - it('should allow setting a languageCode', function(done) { - var languageCode = 'en-GB'; - - var config = { - languageCode: languageCode - }; - - speech.api.Speech = { - recognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.languageCode, languageCode); - done(); - } - }; - - speech.recognize(FILE, config, assert.ifError); - }); - - it('should respect the provided encoding', function(done) { - var config = { - encoding: 'LINEAR32', - languageCode: 'en-US' - }; - - Speech.detectEncoding_ = function() { - done(); // Will cause test to fail. - }; - - speech.api.Speech = { - recognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.encoding, config.encoding); - done(); - } - }; - - speech.recognize(FILE, config, assert.ifError); - }); - - it('should guess the encoding if it is not specified', function(done) { - var expectedEncoding = 'LINEAR16'; - - Speech.detectEncoding_ = function(file) { - assert.strictEqual(file, FILE); - return expectedEncoding; - }; - - speech.api.Speech = { - recognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.encoding, expectedEncoding); - done(); - } - }; - - speech.recognize(FILE, {languageCode: 'en-US'}, assert.ifError); - }); - - it('should return an error from findFile_', function(done) { - var error = new Error('Error.'); - - Speech.findFile_ = function(files, callback) { - callback(error); - }; - - speech.recognize(FILE, CONFIG, function(err) { - assert.strictEqual(err, error); - done(); - }); - }); - - describe('error', function() { - var error = new Error('Error.'); - var apiResponse = {}; - - beforeEach(function() { - speech.api.Speech = { - recognize: function(reqOpts, callback) { - callback(error, apiResponse); - } - }; - }); - - it('should return the error & API response', function(done) { - speech.recognize(FILE, CONFIG, function(err, results, apiResponse_) { - assert.strictEqual(err, error); - assert.strictEqual(results, null); - assert.strictEqual(apiResponse_, apiResponse); - done(); - }); - }); - }); - - describe('success', function() { - var apiResponse = { - results: [] - }; - var formattedResults = []; - - beforeEach(function() { - Speech.formatResults_ = function() { - return formattedResults; - }; - - speech.api.Speech = { - recognize: function(reqOpts, callback) { - callback(null, apiResponse); - } - }; - }); - - it('should return the detections & API response', function(done) { - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(results, apiResponse.results); - assert.strictEqual(verboseMode, false); - return formattedResults; - }; - - speech.recognize(FILE, CONFIG, function(err, results, apiResponse_) { - assert.ifError(err); - assert.strictEqual(results, formattedResults); - assert.strictEqual(apiResponse_, apiResponse); - done(); - }); - }); - - it('should return the detections in verbose mode', function(done) { - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(verboseMode, true); - done(); - }; - - var config = extend({}, CONFIG, { - verbose: true - }); - - speech.recognize(FILE, config, assert.ifError); - }); - - it('should return the detections in verbose mode', function(done) { - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(verboseMode, true); - done(); - }; - - var config = extend({}, CONFIG, { - verbose: true - }); - - speech.recognize(FILE, config, assert.ifError); - }); - - it('should delete verbose option from request object', function(done) { - speech.api.Speech = { - recognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.verbose, undefined); - done(); - } - }; - - var config = extend({}, CONFIG, { - verbose: true - }); - - speech.recognize(FILE, config, assert.ifError); - }); - }); - }); - - describe('startRecognition', function() { - var FILE = {}; - var FOUND_FILE = {}; - var CONFIG = { - a: 'b', - languageCode: 'en-US' - }; - var DETECTED_ENCODING = 'LINEAR16'; - - beforeEach(function() { - Speech.detectEncoding_ = function() { - return DETECTED_ENCODING; - }; - - Speech.findFile_ = function(files, callback) { - callback(null, FOUND_FILE); - }; - - speech.api.Speech = { - longRunningRecognize: util.noop - }; - }); - - it('should find the files', function(done) { - Speech.findFile_ = function(file) { - assert.strictEqual(file, FILE); - done(); - }; - - speech.startRecognition(FILE, CONFIG, assert.ifError); - }); - - it('should make the correct request', function(done) { - speech.api.Speech = { - longRunningRecognize: function(reqOpts) { - var expectedConfig = extend({}, CONFIG, { - encoding: DETECTED_ENCODING, - languageCode: 'en-US' - }); - - assert.deepEqual(reqOpts.config, expectedConfig); - assert.strictEqual(reqOpts.audio, FOUND_FILE); - - done(); - } - }; - - speech.startRecognition(FILE, CONFIG, assert.ifError); - }); - - it('should error if no language code is given', function() { - assert.throws(function() { - speech.startRecognition(FILE, {}); - }, /languageCode/); - }); - - it('should respect the provided language code', function(done) { - var languageCode = 'en-GB'; - - var config = { - languageCode: languageCode - }; - - speech.api.Speech = { - longRunningRecognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.languageCode, languageCode); - done(); - } - }; - - speech.startRecognition(FILE, config, assert.ifError); - }); - - it('should respect the provided encoding', function(done) { - var config = { - encoding: 'LINEAR32', - languageCode: 'en-US' - }; - - Speech.detectEncoding_ = function() { - done(); // Will cause test to fail. - }; - - speech.api.Speech = { - longRunningRecognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.encoding, config.encoding); - done(); - } - }; - - speech.startRecognition(FILE, config, assert.ifError); - }); - - it('should guess the encoding if it is not specified', function(done) { - var expectedEncoding = 'LINEAR16'; - - Speech.detectEncoding_ = function(file) { - assert.strictEqual(file, FILE); - return expectedEncoding; - }; - - speech.api.Speech = { - longRunningRecognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.encoding, expectedEncoding); - done(); - } - }; - - speech.startRecognition(FILE, {languageCode: 'en-US'}, assert.ifError); - }); - - it('should return an error from findFile_', function(done) { - var error = new Error('Error.'); - - Speech.findFile_ = function(files, callback) { - callback(error); - }; - - speech.startRecognition(FILE, CONFIG, function(err) { - assert.strictEqual(err, error); - done(); - }); - }); - - describe('error', function() { - var error = new Error('Error.'); - var apiResponse = {}; - - it('should return the error & API response', function(done) { - speech.api.Speech = { - longRunningRecognize: function(reqOpts, callback) { - callback(error, null, apiResponse); - } - }; - - speech.startRecognition(FILE, CONFIG, function(err, op, apiResponse_) { - assert.strictEqual(err, error); - assert.strictEqual(op, null); - assert.strictEqual(apiResponse_, apiResponse); - done(); - }); - }); - }); - - describe('success', function() { - var apiResponse = { - name: 'operation-name', - response: { - value: 'value string to be decoded' - } - }; - - it('should format the results', function(done) { - speech.api.Speech = { - longRunningRecognize: function(reqOpts, callback) { - var operation = through.obj(); - callback(null, operation, apiResponse); - } - }; - - var result = { - results: [] - }; - - var formattedResults = []; - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(results, result.results); - assert.strictEqual(verboseMode, false); - return formattedResults; - }; - - speech.startRecognition(FILE, CONFIG, function(err, operation) { - assert.ifError(err); - - operation.emit('complete', result, null, null, function(err, resp) { - assert.ifError(err); - assert.strictEqual(resp, formattedResults); - done(); - }); - }); - }); - - it('should format results in verbose mode', function(done) { - speech.api.Speech = { - longRunningRecognize: function(reqOpts, callback) { - var operation = through.obj(); - callback(null, operation, apiResponse); - } - }; - - Speech.formatResults_ = function(results, verboseMode) { - assert.strictEqual(verboseMode, true); - done(); - }; - - var config = extend({}, CONFIG, { - verbose: true - }); - - speech.startRecognition(FILE, config, function(err, operation) { - assert.ifError(err); - - operation.emit('complete', {}, null, null, assert.ifError); - }); - }); - - it('should delete verbose option from request object', function(done) { - speech.api.Speech = { - longRunningRecognize: function(reqOpts) { - assert.strictEqual(reqOpts.config.verbose, undefined); - done(); - } - }; - - var config = extend({}, CONFIG, { - verbose: true - }); - - speech.startRecognition(FILE, config, assert.ifError); - }); - }); - }); -}); diff --git a/scripts/docs/config.js b/scripts/docs/config.js index 53a9753c821..fbb4974401e 100644 --- a/scripts/docs/config.js +++ b/scripts/docs/config.js @@ -22,6 +22,7 @@ module.exports = { TYPES_DICT: 'types.json', TOC: 'toc.json', IGNORE: [ + '**/helpers.js', 'common', 'common-grpc', 'bigtable/src/mutation.js', diff --git a/scripts/helpers.js b/scripts/helpers.js index ba1cb63a9c2..c78a407fb0e 100644 --- a/scripts/helpers.js +++ b/scripts/helpers.js @@ -243,6 +243,7 @@ Module.prototype.runSystemTests = function() { 'error-reporting', 'google-cloud', 'monitoring', + 'speech', 'video-intelligence' ];