Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions dashscope/aigc/multimodal_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def call(
api_key: str = None,
workspace: str = None,
text: str = None,
voice: str = None,
language_type: str = None,
**kwargs
) -> Union[MultiModalConversationResponse, Generator[
MultiModalConversationResponse, None, None]]:
Expand Down Expand Up @@ -57,6 +59,9 @@ def call(
[1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
workspace (str): The dashscope workspace id.
text (str): The text to generate.
voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
**kwargs:
stream(bool, `optional`): Enable server-sent events
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
Expand All @@ -70,8 +75,6 @@ def call(
tokens with top_p probability mass. So 0.1 means only
the tokens comprising the top 10% probability mass are
considered[qwen-turbo,bailian-v1].
voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
top_k(float, `optional`):


Expand Down Expand Up @@ -99,6 +102,10 @@ def call(

if text is not None and text:
input.update({'text': text})
if voice is not None and voice:
input.update({'voice': voice})
if language_type is not None and language_type:
input.update({'language_type': language_type})
Comment thread
songguocola marked this conversation as resolved.
if msg_copy is not None:
input.update({'messages': msg_copy})
response = super().call(model=model,
Expand Down Expand Up @@ -160,6 +167,8 @@ async def call(
api_key: str = None,
workspace: str = None,
text: str = None,
voice: str = None,
language_type: str = None,
**kwargs
) -> Union[MultiModalConversationResponse, Generator[
MultiModalConversationResponse, None, None]]:
Expand Down Expand Up @@ -189,6 +198,9 @@ async def call(
[1]: https://help.aliyun.com/zh/dashscope/developer-reference/api-key-settings. # noqa E501
workspace (str): The dashscope workspace id.
text (str): The text to generate.
voice (str): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
language_type (str): The synthesized language type, default is 'auto', useful for [qwen3-tts].
**kwargs:
stream(bool, `optional`): Enable server-sent events
(ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events) # noqa E501
Expand All @@ -202,8 +214,6 @@ async def call(
tokens with top_p probability mass. So 0.1 means only
the tokens comprising the top 10% probability mass are
considered[qwen-turbo,bailian-v1].
voice(string, `optional`): The voice name of qwen tts, include 'Cherry'/'Ethan'/'Sunny'/'Dylan' and so on,
you can get the total voice list : https://help.aliyun.com/zh/model-studio/qwen-tts.
top_k(float, `optional`):

Raises:
Expand All @@ -230,6 +240,10 @@ async def call(

if text is not None and text:
input.update({'text': text})
if voice is not None and voice:
input.update({'voice': voice})
if language_type is not None and language_type:
input.update({'language_type': language_type})
Comment on lines 241 to +246
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The conditional checks to add text, voice, and language_type to the input dictionary are repetitive. This can be refactored into a loop to improve code readability and make it easier to add more optional parameters in the future.

Suggested change
if text is not None and text:
input.update({'text': text})
if voice is not None and voice:
input.update({'voice': voice})
if language_type is not None and language_type:
input.update({'language_type': language_type})
for key, value in (('text', text), ('voice', voice), ('language_type', language_type)):
if value:
input[key] = value

if msg_copy is not None:
input.update({'messages': msg_copy})
response = await super().call(model=model,
Expand Down
5 changes: 5 additions & 0 deletions dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def update_session(self,
response_format: AudioFormat = AudioFormat.
PCM_24000HZ_MONO_16BIT,
mode: str = 'server_commit',
language_type: str = None,
**kwargs) -> None:
'''
update session configuration, should be used before create response
Expand All @@ -170,13 +171,17 @@ def update_session(self,
output audio format
mode: str
response mode, server_commit or commit
language_type: str
language type for synthesized audio, default is 'auto'
'''
self.config = {
'voice': voice,
'mode': mode,
'response_format': response_format.format,
'sample_rate': response_format.sample_rate,
}
if language_type is not None:
self.config['language_type'] = language_type
self.config.update(kwargs)
self.__send_str(
json.dumps({
Expand Down
21 changes: 14 additions & 7 deletions samples/test_qwen_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,26 @@

response = dashscope.MultiModalConversation.call(
api_key=os.getenv('DASHSCOPE_API_KEY'),
model="qwen-tts",
model="qwen3-tts-flash",
text="Today is a wonderful day to build something people love!",
voice="Cherry",
stream=use_stream
stream=use_stream,
language_type="zh"
Comment thread
songguocola marked this conversation as resolved.
)
if use_stream:
# print the audio data in stream mode
for chunk in response:
if chunk.output is None:
print(f"error: {chunk}")
break
audio = chunk.output.audio
print("base64 audio data is: {}", chunk.output.audio.data)
print(f"base64 audio data is: {chunk.output.audio.data}")
if chunk.output.finish_reason == "stop":
print("finish at: {} ", chunk.output.audio.expires_at)
print(f"finish at: {chunk.output.audio.expires_at}")
else:
# print the audio url in non-stream mode
print("synthesized audio url is: {}", response.output.audio.url)
print("finish at: {} ", response.output.audio.expires_at)
if response.output is None:
print(f"error: {response}")
else:
# print the audio url in non-stream mode
print(f"synthesized audio url is: {response.output.audio.url}")
print(f"finish at: {response.output.audio.expires_at}")