Skip to content

Commit 56791cc

Browse files
authored
Merge pull request #2640 from daspecster/speech-streaming-part-2
Add _make_streaming_request, formerly _make_streaming_config.
2 parents 8038698 + 4a6e1cb commit 56791cc

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed

speech/google/cloud/speech/_gax.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext
1919
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig
2020
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio
21+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
22+
StreamingRecognitionConfig)
23+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
24+
StreamingRecognizeRequest)
25+
2126

2227
from google.cloud.speech.transcript import Transcript
2328

@@ -138,3 +143,84 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
138143
for alternative in alternatives]
139144
else:
140145
raise ValueError('More than one result or none returned from API.')
146+
147+
148+
def _make_streaming_request(sample, language_code,
149+
max_alternatives, profanity_filter,
150+
speech_context, single_utterance,
151+
interim_results):
152+
"""Build streaming request.
153+
154+
:type sample: :class:`~google.cloud.speech.sample.Sample`
155+
:param sample: Instance of ``Sample`` containing audio information.
156+
157+
:type language_code: str
158+
:param language_code: The language of the supplied audio as
159+
BCP-47 language tag. Example: ``'en-GB'``.
160+
If omitted, defaults to ``'en-US'``.
161+
162+
:type max_alternatives: int
163+
:param max_alternatives: Maximum number of recognition
164+
hypotheses to be returned. The server may
165+
return fewer than maxAlternatives.
166+
Valid values are 0-30. A value of 0 or 1
167+
will return a maximum of 1. Defaults to 1
168+
169+
:type profanity_filter: bool
170+
:param profanity_filter: If True, the server will attempt to filter
171+
out profanities, replacing all but the
172+
initial character in each filtered word with
173+
asterisks, e.g. ``'f***'``. If False or
174+
omitted, profanities won't be filtered out.
175+
176+
:type speech_context: list
177+
:param speech_context: A list of strings (max 50) containing words and
178+
phrases "hints" so that the speech recognition
179+
is more likely to recognize them. This can be
180+
used to improve the accuracy for specific words
181+
and phrases. This can also be used to add new
182+
words to the vocabulary of the recognizer.
183+
184+
:type single_utterance: bool
185+
:param single_utterance: If false or omitted, the recognizer
186+
will perform continuous recognition
187+
(continuing to process audio even if the user
188+
pauses speaking) until the client closes the
189+
output stream (gRPC API) or when the maximum
190+
time limit has been reached. Multiple
191+
SpeechRecognitionResults with the is_final
192+
flag set to true may be returned.
193+
194+
If true, the recognizer will detect a single
195+
spoken utterance. When it detects that the
196+
user has paused or stopped speaking, it will
197+
return an END_OF_UTTERANCE event and cease
198+
recognition. It will return no more than one
199+
SpeechRecognitionResult with the is_final flag
200+
set to true.
201+
202+
:type interim_results: bool
203+
:param interim_results: If true, interim results (tentative
204+
hypotheses) may be returned as they become
205+
available (these interim results are indicated
206+
with the is_final=false flag). If false or
207+
omitted, only is_final=true result(s) are
208+
returned.
209+
210+
:rtype:
211+
:class:`~grpc.speech.v1beta1.cloud_speech_pb2.StreamingRecognizeRequest`
212+
:returns: Instance of ``StreamingRecognizeRequest``.
213+
"""
214+
config = RecognitionConfig(
215+
encoding=sample.encoding, sample_rate=sample.sample_rate,
216+
language_code=language_code, max_alternatives=max_alternatives,
217+
profanity_filter=profanity_filter, speech_context=speech_context)
218+
219+
streaming_config = StreamingRecognitionConfig(
220+
config=config, single_utterance=single_utterance,
221+
interim_results=interim_results)
222+
223+
config_request = StreamingRecognizeRequest(
224+
streaming_config=streaming_config)
225+
226+
return config_request

speech/unit_tests/test__gax.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2016 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
18+
class TestSpeechGAX(unittest.TestCase):
19+
SAMPLE_RATE = 16000
20+
HINTS = ['hi']
21+
AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
22+
23+
def _callFUT(self, sample, language_code, max_alternatives,
24+
profanity_filter, speech_context, single_utterance,
25+
interim_results):
26+
from google.cloud.speech._gax import _make_streaming_request
27+
return _make_streaming_request(sample=sample,
28+
language_code=language_code,
29+
max_alternatives=max_alternatives,
30+
profanity_filter=profanity_filter,
31+
speech_context=speech_context,
32+
single_utterance=single_utterance,
33+
interim_results=interim_results)
34+
35+
def test_ctor(self):
36+
from google.cloud import speech
37+
from google.cloud.speech.sample import Sample
38+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
39+
SpeechContext)
40+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
41+
RecognitionConfig)
42+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
43+
StreamingRecognitionConfig)
44+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
45+
StreamingRecognizeRequest)
46+
47+
sample = Sample(content=self.AUDIO_CONTENT,
48+
encoding=speech.Encoding.FLAC,
49+
sample_rate=self.SAMPLE_RATE)
50+
language_code = 'US-en'
51+
max_alternatives = 2
52+
profanity_filter = True
53+
speech_context = SpeechContext(phrases=self.HINTS)
54+
single_utterance = True
55+
interim_results = False
56+
57+
streaming_request = self._callFUT(sample, language_code,
58+
max_alternatives, profanity_filter,
59+
speech_context, single_utterance,
60+
interim_results)
61+
self.assertIsInstance(streaming_request, StreamingRecognizeRequest)
62+
63+
# This isn't set by _make_streaming_request().
64+
# The first request can only have `streaming_config` set.
65+
# The following requests can only have `audio_content` set.
66+
self.assertEqual(streaming_request.audio_content, b'')
67+
68+
self.assertIsInstance(streaming_request.streaming_config,
69+
StreamingRecognitionConfig)
70+
streaming_config = streaming_request.streaming_config
71+
self.assertTrue(streaming_config.single_utterance)
72+
self.assertFalse(streaming_config.interim_results)
73+
config = streaming_config.config
74+
self.assertIsInstance(config, RecognitionConfig)
75+
self.assertEqual(config.encoding, 2) # speech.Encoding.FLAC maps to 2.
76+
self.assertEqual(config.sample_rate, self.SAMPLE_RATE)
77+
self.assertEqual(config.language_code, language_code)
78+
self.assertEqual(config.max_alternatives, max_alternatives)
79+
self.assertTrue(config.profanity_filter)
80+
self.assertEqual(config.speech_context.phrases, self.HINTS)

0 commit comments

Comments
 (0)