Skip to content

Commit 39416df

Browse files
committed
Add _stream_requests() for managing speech streaming configuration order.
1 parent 1cbc793 commit 39416df

File tree

3 files changed

+147
-2
lines changed

3 files changed

+147
-2
lines changed

speech/google/cloud/speech/_gax.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,82 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
145145
raise ValueError('More than one result or none returned from API.')
146146

147147

148+
def _stream_requests(sample, language_code=None, max_alternatives=None,
149+
profanity_filter=None, speech_context=None,
150+
single_utterance=None, interim_results=None):
151+
"""Generate stream of requests from sample.
152+
153+
:type sample: :class:`~google.cloud.speech.sample.Sample`
154+
:param sample: Instance of ``Sample`` containing audio information.
155+
156+
:type language_code: str
157+
:param language_code: (Optional) The language of the supplied audio as
158+
BCP-47 language tag. Example: ``'en-GB'``.
159+
If omitted, defaults to ``'en-US'``.
160+
161+
:type max_alternatives: int
162+
:param max_alternatives: (Optional) Maximum number of recognition
163+
hypotheses to be returned. The server may
164+
return fewer than maxAlternatives.
165+
Valid values are 0-30. A value of 0 or 1
166+
will return a maximum of 1. Defaults to 1
167+
168+
:type profanity_filter: bool
169+
:param profanity_filter: (Optional) If True, the server will attempt to
170+
filter out profanities, replacing all but the
171+
initial character in each filtered word with
172+
asterisks, e.g. ``'f***'``. If False or
173+
omitted, profanities won't be filtered out.
174+
175+
:type speech_context: list
176+
:param speech_context: (Optional) A list of strings (max 50) containing
177+
words and phrases "hints" so that the speech
178+
recognition is more likely to recognize them.
179+
This can be used to improve the accuracy for
180+
specific words and phrases. This can also be used to
181+
add new words to the vocabulary of the recognizer.
182+
183+
:type single_utterance: bool
184+
:param single_utterance: (Optional) If false or omitted, the recognizer
185+
will perform continuous recognition
186+
(continuing to process audio even if the user
187+
pauses speaking) until the client closes the
188+
output stream (gRPC API) or when the maximum
189+
time limit has been reached. Multiple
190+
SpeechRecognitionResults with the is_final
191+
flag set to true may be returned.
192+
193+
If true, the recognizer will detect a single
194+
spoken utterance. When it detects that the
195+
user has paused or stopped speaking, it will
196+
return an END_OF_UTTERANCE event and cease
197+
recognition. It will return no more than one
198+
SpeechRecognitionResult with the is_final flag
199+
set to true.
200+
201+
:type interim_results: bool
202+
:param interim_results: (Optional) If true, interim results (tentative
203+
hypotheses) may be returned as they become
204+
available (these interim results are indicated
205+
with the is_final=false flag). If false or
206+
omitted, only is_final=true result(s) are
207+
returned.
208+
"""
209+
config_request = _make_streaming_request(
210+
sample, language_code=language_code, max_alternatives=max_alternatives,
211+
profanity_filter=profanity_filter, speech_context=speech_context,
212+
single_utterance=single_utterance, interim_results=interim_results)
213+
214+
# The config request MUST go first and not contain any audio data.
215+
yield config_request
216+
217+
while True:
218+
data = sample.content.read(sample.chunk_size)
219+
if not data:
220+
break
221+
yield StreamingRecognizeRequest(audio_content=data)
222+
223+
148224
def _make_streaming_request(sample, language_code,
149225
max_alternatives, profanity_filter,
150226
speech_context, single_utterance,

speech/google/cloud/speech/sample.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,15 @@ def __init__(self, content=None, source_uri=None,
6868
else:
6969
raise ValueError('Invalid encoding: %s' % (encoding,))
7070

71+
@property
72+
def chunk_size(self):
73+
"""Chunk size to send over gRPC. ~100ms
74+
75+
:rtype: int
76+
:returns: Optimized chunk size.
77+
"""
78+
return int(self.sample_rate / 10.0)
79+
7180
@property
7281
def source_uri(self):
7382
"""Google Cloud Storage URI of audio source.

speech/unit_tests/test__gax.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
import unittest
1616

1717

18-
class TestSpeechGAX(unittest.TestCase):
18+
class TestSpeechGAXMakeRequests(unittest.TestCase):
1919
SAMPLE_RATE = 16000
2020
HINTS = ['hi']
21-
AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
21+
AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq'
2222

2323
def _callFUT(self, sample, language_code, max_alternatives,
2424
profanity_filter, speech_context, single_utterance,
@@ -78,3 +78,63 @@ def test_ctor(self):
7878
self.assertEqual(config.max_alternatives, max_alternatives)
7979
self.assertTrue(config.profanity_filter)
8080
self.assertEqual(config.speech_context.phrases, self.HINTS)
81+
82+
83+
class TestSpeechGAXMakeRequestsStream(unittest.TestCase):
84+
SAMPLE_RATE = 16000
85+
HINTS = ['hi']
86+
AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq'
87+
88+
def _callFUT(self, sample, language_code, max_alternatives,
89+
profanity_filter, speech_context, single_utterance,
90+
interim_results):
91+
from google.cloud.speech._gax import _stream_requests
92+
return _stream_requests(sample=sample,
93+
language_code=language_code,
94+
max_alternatives=max_alternatives,
95+
profanity_filter=profanity_filter,
96+
speech_context=speech_context,
97+
single_utterance=single_utterance,
98+
interim_results=interim_results)
99+
100+
def test_stream_requests(self):
101+
from io import BytesIO
102+
from google.cloud import speech
103+
from google.cloud.speech.sample import Sample
104+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
105+
SpeechContext)
106+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
107+
StreamingRecognitionConfig)
108+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
109+
StreamingRecognizeRequest)
110+
111+
sample = Sample(content=BytesIO(self.AUDIO_CONTENT),
112+
encoding=speech.Encoding.FLAC,
113+
sample_rate=self.SAMPLE_RATE)
114+
language_code = 'US-en'
115+
max_alternatives = 2
116+
profanity_filter = True
117+
speech_context = SpeechContext(phrases=self.HINTS)
118+
single_utterance = True
119+
interim_results = False
120+
streaming_requests = self._callFUT(sample, language_code,
121+
max_alternatives, profanity_filter,
122+
speech_context, single_utterance,
123+
interim_results)
124+
all_requests = []
125+
for streaming_request in streaming_requests:
126+
self.assertIsInstance(streaming_request, StreamingRecognizeRequest)
127+
all_requests.append(streaming_request)
128+
129+
self.assertEqual(len(all_requests), 2)
130+
131+
config_request = all_requests[0]
132+
streaming_request = all_requests[1]
133+
self.assertIsInstance(config_request, StreamingRecognizeRequest)
134+
# This isn't set by _make_streaming_request().
135+
# The first request can only have `streaming_config` set.
136+
# The following requests can only have `audio_content` set.
137+
self.assertEqual(config_request.audio_content, b'')
138+
self.assertEqual(streaming_request.audio_content, self.AUDIO_CONTENT)
139+
self.assertIsInstance(config_request.streaming_config,
140+
StreamingRecognitionConfig)

0 commit comments

Comments
 (0)